386 files changed, 53971 insertions, 24168 deletions
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index d33c9590..ef09b4e1 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -36,78 +36,55 @@ ifeq ($(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD),d)
         $(warning thunderx pmd is not supported by old compilers)
 endif
 
-core-libs := librte_eal librte_mbuf librte_mempool librte_ring librte_ether
-core-libs += librte_net librte_kvargs
-
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += af_packet
-DEPDIRS-af_packet = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_ARK_PMD) += ark
-DEPDIRS-ark = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += avp
-DEPDIRS-avp = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_BNX2X_PMD) += bnx2x
-DEPDIRS-bnx2x = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += bonding
-DEPDIRS-bonding = $(core-libs) librte_cmdline
 DIRS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += cxgbe
-DEPDIRS-cxgbe = $(core-libs)
+DIRS-$(CONFIG_RTE_LIBRTE_DPAA_PMD) += dpaa
 DIRS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += dpaa2
-DEPDIRS-dpaa2 = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += e1000
-DEPDIRS-e1000 = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += ena
-DEPDIRS-ena = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
-DEPDIRS-enic = $(core-libs) librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe
-DEPDIRS-failsafe = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
-DEPDIRS-fm10k = $(core-libs) librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
-DEPDIRS-i40e = $(core-libs) librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
-DEPDIRS-ixgbe = $(core-libs) librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio
-DEPDIRS-liquidio = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
-DEPDIRS-mlx4 = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
-DEPDIRS-mlx5 = $(core-libs)
+DIRS-$(CONFIG_RTE_LIBRTE_MRVL_PMD) += mrvl
 DIRS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp
-DEPDIRS-nfp = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_BNXT_PMD) += bnxt
-DEPDIRS-bnxt = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_NULL) += null
-DEPDIRS-null = $(core-libs)
+DIRS-$(CONFIG_RTE_LIBRTE_OCTEONTX_PMD) += octeontx
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
-DEPDIRS-pcap = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
-DEPDIRS-qede = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
-DEPDIRS-ring = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD) += sfc
-DEPDIRS-sfc = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
-DEPDIRS-szedata2 = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
-DEPDIRS-tap = $(core-libs) librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
-DEPDIRS-thunderx = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
-DEPDIRS-virtio = $(core-libs)
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
-DEPDIRS-vmxnet3 = $(core-libs)
-DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
-DEPDIRS-xenvirt = $(core-libs) librte_cmdline
 
 ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += kni
 endif
-DEPDIRS-kni = $(core-libs) librte_kni
+
+ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y)
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += softnic
+endif # $(CONFIG_RTE_LIBRTE_SCHED)
 
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
 endif # $(CONFIG_RTE_LIBRTE_VHOST)
-DEPDIRS-vhost = $(core-libs) librte_vhost
+
+ifeq ($(CONFIG_RTE_LIBRTE_MRVL_PMD),y)
+ifeq ($(CONFIG_RTE_LIBRTE_CFGFILE),n)
+$(error "RTE_LIBRTE_CFGFILE must be enabled in configuration!")
+endif
+endif
 
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/drivers/net/af_packet/Makefile b/drivers/net/af_packet/Makefile
index 70d517c1..bb37d67a 100644
--- a/drivers/net/af_packet/Makefile
+++ b/drivers/net/af_packet/Makefile
@@ -44,6 +44,9 @@ LIBABIVER := 1
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vdev
 
 #
 # all source are stored in SRCS-y
diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 9a47852c..fa84eb9c 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -41,7 +41,7 @@
 #include <rte_ethdev_vdev.h>
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
@@ -59,6 +59,7 @@
 #define ETH_AF_PACKET_BLOCKSIZE_ARG	"blocksz"
 #define ETH_AF_PACKET_FRAMESIZE_ARG	"framesz"
 #define ETH_AF_PACKET_FRAMECOUNT_ARG	"framecnt"
+#define ETH_AF_PACKET_QDISC_BYPASS_ARG	"qdisc_bypass"
 
 #define DFLT_BLOCK_SIZE		(1 << 12)
 #define DFLT_FRAME_SIZE		(1 << 11)
@@ -75,7 +76,7 @@ struct pkt_rx_queue {
 	unsigned int framenum;
 
 	struct rte_mempool *mb_pool;
-	uint8_t in_port;
+	uint16_t in_port;
 
 	volatile unsigned long rx_pkts;
 	volatile unsigned long err_pkts;
@@ -115,6 +116,7 @@ static const char *valid_arguments[] = {
 	ETH_AF_PACKET_BLOCKSIZE_ARG,
 	ETH_AF_PACKET_FRAMESIZE_ARG,
 	ETH_AF_PACKET_FRAMECOUNT_ARG,
+	ETH_AF_PACKET_QDISC_BYPASS_ARG,
 	NULL
 };
 
@@ -165,7 +167,7 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		/* check for vlan info */
 		if (ppd->tp_status & TP_STATUS_VLAN_VALID) {
 			mbuf->vlan_tci = ppd->tp_vlan_tci;
-			mbuf->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
+			mbuf->ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
 		}
 
 		/* release incoming frame and advance ring buffer */
@@ -216,7 +218,7 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf = *bufs++;
 
 		/* drop oversized packets */
-		if (rte_pktmbuf_data_len(mbuf) > pkt_q->frame_data_size) {
+		if (mbuf->pkt_len > pkt_q->frame_data_size) {
 			rte_pktmbuf_free(mbuf);
 			continue;
 		}
@@ -237,8 +239,17 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		/* copy the tx frame data */
 		pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN -
 			sizeof(struct sockaddr_ll);
-		memcpy(pbuf, rte_pktmbuf_mtod(mbuf, void*), rte_pktmbuf_data_len(mbuf));
-		ppd->tp_len = ppd->tp_snaplen = rte_pktmbuf_data_len(mbuf);
+
+		struct rte_mbuf *tmp_mbuf = mbuf;
+		while (tmp_mbuf) {
+			uint16_t data_len = rte_pktmbuf_data_len(tmp_mbuf);
+			memcpy(pbuf, rte_pktmbuf_mtod(tmp_mbuf, void*), data_len);
+			pbuf += data_len;
+			tmp_mbuf = tmp_mbuf->next;
+		}
+
+		ppd->tp_len = mbuf->pkt_len;
+		ppd->tp_snaplen = mbuf->pkt_len;
 
 		/* release incoming frame and advance ring buffer */
 		ppd->tp_status = TP_STATUS_SEND_REQUEST;
@@ -320,7 +331,7 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->min_rx_bufsize = 0;
 }
 
-static void
+static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
 {
 	unsigned i, imax;
@@ -353,6 +364,7 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
 	igb_stats->opackets = tx_total;
 	igb_stats->oerrors = tx_err_total;
 	igb_stats->obytes = tx_bytes_total;
+	return 0;
 }
 
 static void
@@ -550,6 +562,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
                        unsigned int blockcnt,
                        unsigned int framesize,
                        unsigned int framecnt,
+		       unsigned int qdisc_bypass,
                        struct pmd_internals **internals,
                        struct rte_eth_dev **eth_dev,
                        struct rte_kvargs *kvlist)
@@ -571,9 +584,6 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 #if defined(PACKET_FANOUT)
 	int fanout_arg;
 #endif
-#if defined(PACKET_QDISC_BYPASS)
-	int bypass;
-#endif
 
 	for (k_idx = 0; k_idx < kvlist->count; k_idx++) {
 		pair = &kvlist->pairs[k_idx];
@@ -689,9 +699,8 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 		}
 
 #if defined(PACKET_QDISC_BYPASS)
-		bypass = 1;
 		rc = setsockopt(qsockfd, SOL_PACKET, PACKET_QDISC_BYPASS,
-				&bypass, sizeof(bypass));
+				&qdisc_bypass, sizeof(qdisc_bypass));
 		if (rc == -1) {
 			RTE_LOG(ERR, PMD,
 				"%s: could not set PACKET_QDISC_BYPASS "
@@ -699,6 +708,8 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 			        pair->value);
 			goto error;
 		}
+#else
+		RTE_SET_USED(qdisc_bypass);
 #endif
 
 		rc = setsockopt(qsockfd, SOL_PACKET, PACKET_RX_RING, req, sizeof(*req));
@@ -803,7 +814,6 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 
 	(*eth_dev)->data = data;
 	(*eth_dev)->dev_ops = &ops;
-	(*eth_dev)->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
 	return 0;
 
@@ -842,6 +852,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
 	unsigned int framesize = DFLT_FRAME_SIZE;
 	unsigned int framecount = DFLT_FRAME_COUNT;
 	unsigned int qpairs = 1;
+	unsigned int qdisc_bypass = 1;
 
 	/* do some parameter checking */
 	if (*sockfd < 0)
@@ -893,6 +904,16 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
 			}
 			continue;
 		}
+		if (strstr(pair->key, ETH_AF_PACKET_QDISC_BYPASS_ARG) != NULL) {
+			qdisc_bypass = atoi(pair->value);
+			if (qdisc_bypass > 1) {
+				RTE_LOG(ERR, PMD,
+					"%s: invalid bypass value\n",
+					name);
+				return -1;
+			}
+			continue;
+		}
 	}
 
 	if (framesize > blocksize) {
@@ -918,6 +939,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
 	if (rte_pmd_init_internals(dev, *sockfd, qpairs,
 				   blocksize, blockcount,
 				   framesize, framecount,
+				   qdisc_bypass,
 				   &internals, &eth_dev,
 				   kvlist) < 0)
 		return -1;
@@ -1012,4 +1034,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_packet,
 	"qpairs=<int> "
 	"blocksz=<int> "
 	"framesz=<int> "
-	"framecnt=<int>");
+	"framecnt=<int> "
+	"qdisc_bypass=<0|1>");
diff --git a/drivers/net/ark/Makefile b/drivers/net/ark/Makefile
index ca64b195..f1433bd2 100644
--- a/drivers/net/ark/Makefile
+++ b/drivers/net/ark/Makefile
@@ -62,5 +62,8 @@ LDLIBS += -lpthread
 ifdef CONFIG_RTE_EXEC_ENV_LINUXAPP
 LDLIBS += -ldl
 endif
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/ark/ark_ddm.c b/drivers/net/ark/ark_ddm.c
index 221460c7..929dc7d1 100644
--- a/drivers/net/ark/ark_ddm.c
+++ b/drivers/net/ark/ark_ddm.c
@@ -93,7 +93,7 @@ ark_ddm_reset(struct ark_ddm_t *ddm)
 }
 
 void
-ark_ddm_setup(struct ark_ddm_t *ddm, phys_addr_t cons_addr, uint32_t interval)
+ark_ddm_setup(struct ark_ddm_t *ddm, rte_iova_t cons_addr, uint32_t interval)
 {
 	ddm->setup.cons_write_index_addr = cons_addr;
 	ddm->setup.write_index_interval = interval / 4;	/* 4 ns period */
diff --git a/drivers/net/ark/ark_ddm.h b/drivers/net/ark/ark_ddm.h
index de61926c..f67ad012 100644
--- a/drivers/net/ark/ark_ddm.h
+++ b/drivers/net/ark/ark_ddm.h
@@ -127,7 +127,7 @@ struct ark_ddm_cpld_ps_t {
 
 #define ARK_DDM_SETUP  0x00e0
 struct ark_ddm_setup_t {
-	phys_addr_t cons_write_index_addr;
+	rte_iova_t cons_write_index_addr;
 	uint32_t write_index_interval;	/* 4ns each */
 	volatile uint32_t cons_index;
 };
@@ -165,7 +165,7 @@ void ark_ddm_start(struct ark_ddm_t *ddm);
 int ark_ddm_stop(struct ark_ddm_t *ddm, const int wait);
 void ark_ddm_reset(struct ark_ddm_t *ddm);
 void ark_ddm_stats_reset(struct ark_ddm_t *ddm);
-void ark_ddm_setup(struct ark_ddm_t *ddm, phys_addr_t cons_addr,
+void ark_ddm_setup(struct ark_ddm_t *ddm, rte_iova_t cons_addr,
 		   uint32_t interval);
 void ark_ddm_dump_stats(struct ark_ddm_t *ddm, const char *msg);
 void ark_ddm_dump(struct ark_ddm_t *ddm, const char *msg);
diff --git a/drivers/net/ark/ark_ethdev.c b/drivers/net/ark/ark_ethdev.c
index 6db362b0..ff87c20e 100644
--- a/drivers/net/ark/ark_ethdev.c
+++ b/drivers/net/ark/ark_ethdev.c
@@ -35,12 +35,12 @@
 #include <sys/stat.h>
 #include <dlfcn.h>
 
+#include <rte_bus_pci.h>
 #include <rte_ethdev_pci.h>
 #include <rte_kvargs.h>
 
 #include "ark_global.h"
 #include "ark_logs.h"
-#include "ark_ethdev.h"
 #include "ark_ethdev_tx.h"
 #include "ark_ethdev_rx.h"
 #include "ark_mpu.h"
@@ -66,7 +66,7 @@ static int eth_ark_dev_link_update(struct rte_eth_dev *dev,
 				   int wait_to_complete);
 static int eth_ark_dev_set_link_up(struct rte_eth_dev *dev);
 static int eth_ark_dev_set_link_down(struct rte_eth_dev *dev);
-static void eth_ark_dev_stats_get(struct rte_eth_dev *dev,
+static int eth_ark_dev_stats_get(struct rte_eth_dev *dev,
 				  struct rte_eth_stats *stats);
 static void eth_ark_dev_stats_reset(struct rte_eth_dev *dev);
 static void eth_ark_set_default_mac_addr(struct rte_eth_dev *dev,
@@ -242,7 +242,7 @@ check_for_ext(struct ark_adapter *ark)
 		(int (*)(struct rte_eth_dev *, void *))
 		dlsym(ark->d_handle, "dev_set_link_down");
 	ark->user_ext.stats_get =
-		(void (*)(struct rte_eth_dev *, struct rte_eth_stats *,
+		(int (*)(struct rte_eth_dev *, struct rte_eth_stats *,
 			  void *))
 		dlsym(ark->d_handle, "stats_get");
 	ark->user_ext.stats_reset =
@@ -343,7 +343,6 @@ eth_ark_dev_init(struct rte_eth_dev *dev)
 	/* We are a single function multi-port device. */
 	ret = ark_config_device(dev);
 	dev->dev_ops = &ark_eth_dev_ops;
-	dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	dev->data->mac_addrs = rte_zmalloc("ark", ETHER_ADDR_LEN, 0);
 	if (!dev->data->mac_addrs) {
@@ -452,10 +451,16 @@ ark_config_device(struct rte_eth_dev *dev)
 	 */
 	ark->start_pg = 0;
 	ark->pg = ark_pktgen_init(ark->pktgen.v, 0, 1);
+	if (ark->pg == NULL)
+		return -1;
 	ark_pktgen_reset(ark->pg);
 	ark->pc = ark_pktchkr_init(ark->pktchkr.v, 0, 1);
+	if (ark->pc == NULL)
+		return -1;
 	ark_pktchkr_stop(ark->pc);
 	ark->pd = ark_pktdir_init(ark->pktdir.v);
+	if (ark->pd == NULL)
+		return -1;
 
 	/* Verify HW */
 	if (ark_udm_verify(ark->udm.v))
@@ -641,7 +646,7 @@ eth_ark_dev_stop(struct rte_eth_dev *dev)
 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
 		status = eth_ark_tx_queue_stop(dev, i);
 		if (status != 0) {
-			uint8_t port = dev->data->port_id;
+			uint16_t port = dev->data->port_id;
 			PMD_DRV_LOG(ERR,
 				    "tx_queue stop anomaly"
 				    " port %u, queue %u\n",
@@ -693,7 +698,7 @@ eth_ark_dev_stop(struct rte_eth_dev *dev)
 	ark_udm_dump_stats(ark->udm.v, "Post stop");
 	ark_udm_dump_perf(ark->udm.v, "Post stop");
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
 		eth_ark_rx_dump_queue(dev, i, __func__);
 
 	/* Stop the packet checker if it is running */
@@ -811,7 +816,7 @@ eth_ark_dev_set_link_down(struct rte_eth_dev *dev)
 	return 0;
 }
 
-static void
+static int
 eth_ark_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	uint16_t i;
@@ -830,8 +835,9 @@ eth_ark_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	for (i = 0; i < dev->data->nb_rx_queues; i++)
 		eth_rx_queue_stats_get(dev->data->rx_queues[i], stats);
 	if (ark->user_ext.stats_get)
-		ark->user_ext.stats_get(dev, stats,
+		return ark->user_ext.stats_get(dev, stats,
 			ark->user_data[dev->data->port_id]);
+	return 0;
 }
 
 static void
diff --git a/drivers/net/ark/ark_ethdev.h b/drivers/net/ark/ark_ethdev.h
deleted file mode 100644
index df5547bf..00000000
--- a/drivers/net/ark/ark_ethdev.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright (c) 2015-2017 Atomic Rules LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of copyright holder nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _ARK_ETHDEV_H_
-#define _ARK_ETHDEV_H_
-
-#endif
diff --git a/drivers/net/ark/ark_ethdev_rx.c b/drivers/net/ark/ark_ethdev_rx.c
index f39e6f68..a3c0377c 100644
--- a/drivers/net/ark/ark_ethdev_rx.c
+++ b/drivers/net/ark/ark_ethdev_rx.c
@@ -36,7 +36,6 @@
 #include "ark_ethdev_rx.h"
 #include "ark_global.h"
 #include "ark_logs.h"
-#include "ark_ethdev.h"
 #include "ark_mpu.h"
 #include "ark_udm.h"
 
@@ -62,7 +61,7 @@ struct ark_rx_queue {
 	struct rte_mbuf **reserve_q;
 	/* array of physical addresses of the mbuf data pointer */
 	/* This point is a virtual address */
-	phys_addr_t *paddress_q;
+	rte_iova_t *paddress_q;
 	struct rte_mempool *mb_pool;
 
 	struct ark_udm_t *udm;
@@ -96,18 +95,18 @@ eth_ark_rx_hw_setup(struct rte_eth_dev *dev,
 		    struct ark_rx_queue *queue,
 		    uint16_t rx_queue_id __rte_unused, uint16_t rx_queue_idx)
 {
-	phys_addr_t queue_base;
-	phys_addr_t phys_addr_q_base;
-	phys_addr_t phys_addr_prod_index;
+	rte_iova_t queue_base;
+	rte_iova_t phys_addr_q_base;
+	rte_iova_t phys_addr_prod_index;
 
-	queue_base = rte_malloc_virt2phy(queue);
+	queue_base = rte_malloc_virt2iova(queue);
 	phys_addr_prod_index = queue_base +
 		offsetof(struct ark_rx_queue, prod_index);
 
-	phys_addr_q_base = rte_malloc_virt2phy(queue->paddress_q);
+	phys_addr_q_base = rte_malloc_virt2iova(queue->paddress_q);
 
 	/* Verify HW */
-	if (ark_mpu_verify(queue->mpu, sizeof(phys_addr_t))) {
+	if (ark_mpu_verify(queue->mpu, sizeof(rte_iova_t))) {
 		PMD_DRV_LOG(ERR, "Illegal configuration rx queue\n");
 		return -1;
 	}
@@ -205,7 +204,7 @@ eth_ark_dev_rx_queue_setup(struct rte_eth_dev *dev,
 				   socket_id);
 	queue->paddress_q =
 		rte_zmalloc_socket("Ark_rx_queue paddr",
-				   nb_desc * sizeof(phys_addr_t),
+				   nb_desc * sizeof(rte_iova_t),
 				   64,
 				   socket_id);
 
@@ -500,22 +499,22 @@ eth_ark_rx_seed_mbufs(struct ark_rx_queue *queue)
 	case 0:
 		while (count != nb) {
 			queue->paddress_q[seed_m++] =
-				(*mbufs++)->buf_physaddr;
+				(*mbufs++)->buf_iova;
 			count++;
 		/* FALLTHROUGH */
 	case 3:
 		queue->paddress_q[seed_m++] =
-			(*mbufs++)->buf_physaddr;
+			(*mbufs++)->buf_iova;
 		count++;
 		/* FALLTHROUGH */
 	case 2:
 		queue->paddress_q[seed_m++] =
-			(*mbufs++)->buf_physaddr;
+			(*mbufs++)->buf_iova;
 		count++;
 		/* FALLTHROUGH */
 	case 1:
 		queue->paddress_q[seed_m++] =
-			(*mbufs++)->buf_physaddr;
+			(*mbufs++)->buf_iova;
 		count++;
 		/* FALLTHROUGH */
 
diff --git a/drivers/net/ark/ark_ethdev_tx.c b/drivers/net/ark/ark_ethdev_tx.c
index 9ae7ae0e..4ef55d10 100644
--- a/drivers/net/ark/ark_ethdev_tx.c
+++ b/drivers/net/ark/ark_ethdev_tx.c
@@ -37,7 +37,6 @@
 #include "ark_global.h"
 #include "ark_mpu.h"
 #include "ark_ddm.h"
-#include "ark_ethdev.h"
 #include "ark_logs.h"
 
 #define ARK_TX_META_SIZE   32
@@ -94,7 +93,7 @@ eth_ark_tx_meta_from_mbuf(struct ark_tx_meta *meta,
 			  const struct rte_mbuf *mbuf,
 			  uint8_t flags)
 {
-	meta->physaddr = rte_mbuf_data_dma_addr(mbuf);
+	meta->physaddr = rte_mbuf_data_iova(mbuf);
 	meta->delta_ns = 0;
 	meta->data_len = rte_pktmbuf_data_len(mbuf);
 	meta->flags = flags;
@@ -311,15 +310,15 @@ eth_ark_tx_queue_setup(struct rte_eth_dev *dev,
 static int
 eth_ark_tx_hw_queue_config(struct ark_tx_queue *queue)
 {
-	phys_addr_t queue_base, ring_base, cons_index_addr;
+	rte_iova_t queue_base, ring_base, cons_index_addr;
 	uint32_t write_interval_ns;
 
 	/* Verify HW -- MPU */
 	if (ark_mpu_verify(queue->mpu, sizeof(struct ark_tx_meta)))
 		return -1;
 
-	queue_base = rte_malloc_virt2phy(queue);
-	ring_base = rte_malloc_virt2phy(queue->meta_q);
+	queue_base = rte_malloc_virt2iova(queue);
+	ring_base = rte_malloc_virt2iova(queue->meta_q);
 	cons_index_addr =
 		queue_base + offsetof(struct ark_tx_queue, cons_index);
 
diff --git a/drivers/net/ark/ark_ext.h b/drivers/net/ark/ark_ext.h
index 63b7a261..d26c8198 100644
--- a/drivers/net/ark/ark_ext.h
+++ b/drivers/net/ark/ark_ext.h
@@ -91,7 +91,7 @@ int dev_set_link_up(struct rte_eth_dev *dev,
 int dev_set_link_down(struct rte_eth_dev *dev,
 		      void *user_data);
 
-void stats_get(struct rte_eth_dev *dev,
+int stats_get(struct rte_eth_dev *dev,
 	       struct rte_eth_stats *stats,
 	       void *user_data);
 
diff --git a/drivers/net/ark/ark_global.h b/drivers/net/ark/ark_global.h
index 2a6375fe..aef2cf73 100644
--- a/drivers/net/ark/ark_global.h
+++ b/drivers/net/ark/ark_global.h
@@ -97,7 +97,7 @@ struct ark_user_ext {
 	int (*link_update)(struct rte_eth_dev *, int wait_to_complete, void *);
 	int (*dev_set_link_up)(struct rte_eth_dev *, void *);
 	int (*dev_set_link_down)(struct rte_eth_dev *, void *);
-	void (*stats_get)(struct rte_eth_dev *, struct rte_eth_stats *, void *);
+	int (*stats_get)(struct rte_eth_dev *, struct rte_eth_stats *, void *);
 	void (*stats_reset)(struct rte_eth_dev *, void *);
 	void (*mac_addr_add)(struct rte_eth_dev *,
 						  struct ether_addr *,
diff --git a/drivers/net/ark/ark_mpu.c b/drivers/net/ark/ark_mpu.c
index cd2c0788..d4ba6dc7 100644
--- a/drivers/net/ark/ark_mpu.c
+++ b/drivers/net/ark/ark_mpu.c
@@ -118,7 +118,7 @@ ark_mpu_reset_stats(struct ark_mpu_t *mpu)
 }
 
 int
-ark_mpu_configure(struct ark_mpu_t *mpu, phys_addr_t ring, uint32_t ring_size,
+ark_mpu_configure(struct ark_mpu_t *mpu, rte_iova_t ring, uint32_t ring_size,
 		  int is_tx)
 {
 	ark_mpu_reset(mpu);
diff --git a/drivers/net/ark/ark_mpu.h b/drivers/net/ark/ark_mpu.h
index a0171dbd..f6f6c808 100644
--- a/drivers/net/ark/ark_mpu.h
+++ b/drivers/net/ark/ark_mpu.h
@@ -75,7 +75,7 @@ struct ark_mpu_hw_t {
 
 #define ARK_MPU_CFG 0x040
 struct ark_mpu_cfg_t {
-	phys_addr_t ring_base;	/* phys_addr_t is a uint64_t */
+	rte_iova_t ring_base;	/* rte_iova_t is a uint64_t */
 	uint32_t ring_size;
 	uint32_t ring_mask;
 	uint32_t min_host_move;
@@ -137,7 +137,7 @@ int ark_mpu_verify(struct ark_mpu_t *mpu, uint32_t obj_size);
 void ark_mpu_stop(struct ark_mpu_t *mpu);
 void ark_mpu_start(struct ark_mpu_t *mpu);
 int ark_mpu_reset(struct ark_mpu_t *mpu);
-int ark_mpu_configure(struct ark_mpu_t *mpu, phys_addr_t ring,
+int ark_mpu_configure(struct ark_mpu_t *mpu, rte_iova_t ring,
 		      uint32_t ring_size, int is_tx);
 
 void ark_mpu_dump(struct ark_mpu_t *mpu, const char *msg, uint16_t idx);
diff --git a/drivers/net/ark/ark_pktchkr.c b/drivers/net/ark/ark_pktchkr.c
index c3040af3..202a1d9b 100644
--- a/drivers/net/ark/ark_pktchkr.c
+++ b/drivers/net/ark/ark_pktchkr.c
@@ -112,6 +112,10 @@ ark_pktchkr_init(void *addr, int ord, int l2_mode)
 	struct ark_pkt_chkr_inst *inst =
 		rte_malloc("ark_pkt_chkr_inst",
 			   sizeof(struct ark_pkt_chkr_inst), 0);
+	if (inst == NULL) {
+		PMD_DRV_LOG(ERR, "Failed to malloc ark_pkt_chkr_inst.\n");
+		return inst;
+	}
 	inst->sregs = (struct ark_pkt_chkr_stat_regs *)addr;
 	inst->cregs =
 		(struct ark_pkt_chkr_ctl_regs *)(((uint8_t *)addr) + 0x100);
diff --git a/drivers/net/ark/ark_pktdir.c b/drivers/net/ark/ark_pktdir.c
index 66e5ce24..eb47dedb 100644
--- a/drivers/net/ark/ark_pktdir.c
+++ b/drivers/net/ark/ark_pktdir.c
@@ -36,6 +36,7 @@
 
 #include "ark_pktdir.h"
 #include "ark_global.h"
+#include "ark_logs.h"
 
 
 ark_pkt_dir_t
@@ -45,6 +46,10 @@ ark_pktdir_init(void *base)
 		rte_malloc("ark_pkt_dir_inst",
 			   sizeof(struct ark_pkt_dir_inst),
 			   0);
+	if (inst == NULL) {
+		PMD_DRV_LOG(ERR, "Failed to malloc ark_pkt_dir_inst.\n");
+		return inst;
+	}
 	inst->regs = (struct ark_pkt_dir_regs *)base;
 	inst->regs->ctrl = 0x00110110;	/* POR state */
 	return inst;
diff --git a/drivers/net/ark/ark_pktgen.c b/drivers/net/ark/ark_pktgen.c
index 8c7a8a2d..018f37b6 100644
--- a/drivers/net/ark/ark_pktgen.c
+++ b/drivers/net/ark/ark_pktgen.c
@@ -110,6 +110,10 @@ ark_pktgen_init(void *adr, int ord, int l2_mode)
 	struct ark_pkt_gen_inst *inst =
 		rte_malloc("ark_pkt_gen_inst_pmd",
 			   sizeof(struct ark_pkt_gen_inst), 0);
+	if (inst == NULL) {
+		PMD_DRV_LOG(ERR, "Failed to malloc ark_pkt_gen_inst.\n");
+		return inst;
+	}
 	inst->regs = (struct ark_pkt_gen_regs *)adr;
 	inst->ordinal = ord;
 	inst->l2_mode = l2_mode;
diff --git a/drivers/net/ark/ark_udm.c b/drivers/net/ark/ark_udm.c
index 1ba7d26d..7a429ac7 100644
--- a/drivers/net/ark/ark_udm.c
+++ b/drivers/net/ark/ark_udm.c
@@ -122,7 +122,7 @@ ark_udm_configure(struct ark_udm_t *udm,
 }
 
 void
-ark_udm_write_addr(struct ark_udm_t *udm, phys_addr_t addr)
+ark_udm_write_addr(struct ark_udm_t *udm, rte_iova_t addr)
 {
 	udm->rt_cfg.hw_prod_addr = addr;
 }
diff --git a/drivers/net/ark/ark_udm.h b/drivers/net/ark/ark_udm.h
index 29bf1e8f..915343fe 100644
--- a/drivers/net/ark/ark_udm.h
+++ b/drivers/net/ark/ark_udm.h
@@ -137,7 +137,7 @@ struct ark_udm_tlp_ps_t {
 
 #define ARK_UDM_RT_CFG 0x00e0
 struct ark_udm_rt_cfg_t {
-	phys_addr_t hw_prod_addr;
+	rte_iova_t hw_prod_addr;
 	uint32_t write_interval;	/* 4ns cycles */
 	volatile uint32_t prod_idx;	/* RO */
 };
@@ -171,7 +171,7 @@ void ark_udm_configure(struct ark_udm_t *udm,
 		       uint32_t headroom,
 		       uint32_t dataroom,
 		       uint32_t write_interval_ns);
-void ark_udm_write_addr(struct ark_udm_t *udm, phys_addr_t addr);
+void ark_udm_write_addr(struct ark_udm_t *udm, rte_iova_t addr);
 void ark_udm_stats_reset(struct ark_udm_t *udm);
 void ark_udm_dump_stats(struct ark_udm_t *udm, const char *msg);
 void ark_udm_dump_queue_stats(struct ark_udm_t *udm, const char *msg,
diff --git a/drivers/net/avp/Makefile b/drivers/net/avp/Makefile
index cd465aac..c29ecf45 100644
--- a/drivers/net/avp/Makefile
+++ b/drivers/net/avp/Makefile
@@ -39,6 +39,9 @@ LIB = librte_pmd_avp.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 EXPORT_MAP := rte_pmd_avp_version.map
 
diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c
index c746a0e2..9b342bfa 100644
--- a/drivers/net/avp/avp_ethdev.c
+++ b/drivers/net/avp/avp_ethdev.c
@@ -40,11 +40,11 @@
 #include <rte_ethdev_pci.h>
 #include <rte_memcpy.h>
 #include <rte_string_fns.h>
-#include <rte_memzone.h>
 #include <rte_malloc.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_ether.h>
 #include <rte_common.h>
 #include <rte_cycles.h>
@@ -70,7 +70,7 @@ static void avp_dev_stop(struct rte_eth_dev *dev);
 static void avp_dev_close(struct rte_eth_dev *dev);
 static void avp_dev_info_get(struct rte_eth_dev *dev,
 			     struct rte_eth_dev_info *dev_info);
-static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
+static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
 static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
 static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
@@ -107,7 +107,7 @@ static uint16_t avp_xmit_pkts(void *tx_queue,
 static void avp_dev_rx_queue_release(void *rxq);
 static void avp_dev_tx_queue_release(void *txq);
 
-static void avp_dev_stats_get(struct rte_eth_dev *dev,
+static int avp_dev_stats_get(struct rte_eth_dev *dev,
 			      struct rte_eth_stats *stats);
 static void avp_dev_stats_reset(struct rte_eth_dev *dev);
 
@@ -190,7 +190,7 @@ struct avp_dev {
 	struct rte_eth_dev_data *dev_data;
 	/**< Back pointer to ethernet device data */
 	volatile uint32_t flags; /**< Device operational flags */
-	uint8_t port_id; /**< Ethernet port identifier */
+	uint16_t port_id; /**< Ethernet port identifier */
 	struct rte_mempool *pool; /**< pkt mbuf mempool */
 	unsigned int guest_mbuf_size; /**< local pool mbuf size */
 	unsigned int host_mbuf_size; /**< host mbuf size */
@@ -387,7 +387,7 @@ avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
 /* translate from host physical address to guest virtual address */
 static void *
 avp_dev_translate_address(struct rte_eth_dev *eth_dev,
-			  phys_addr_t host_phys_addr)
+			  rte_iova_t host_phys_addr)
 {
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	struct rte_mem_resource *resource;
@@ -1004,8 +1004,6 @@ eth_avp_dev_init(struct rte_eth_dev *eth_dev)
 
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
 
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
-
 	/* Check current migration status */
 	if (avp_dev_migration_pending(eth_dev)) {
 		PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
@@ -1361,7 +1359,7 @@ avp_dev_copy_from_buffers(struct avp_dev *avp,
 	src_offset = 0;
 
 	if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
-		ol_flags = PKT_RX_VLAN_PKT;
+		ol_flags = PKT_RX_VLAN;
 		vlan_tci = pkt_buf->vlan_tci;
 	} else {
 		ol_flags = 0;
@@ -1619,7 +1617,7 @@ avp_recv_pkts(void *rx_queue,
 		m->port = avp->port_id;
 
 		if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
-			m->ol_flags = PKT_RX_VLAN_PKT;
+			m->ol_flags = PKT_RX_VLAN;
 			m->vlan_tci = pkt_buf->vlan_tci;
 		}
 
@@ -2031,7 +2029,12 @@ avp_dev_configure(struct rte_eth_dev *eth_dev)
 	mask = (ETH_VLAN_STRIP_MASK |
 		ETH_VLAN_FILTER_MASK |
 		ETH_VLAN_EXTEND_MASK);
-	avp_vlan_offload_set(eth_dev, mask);
+	ret = avp_vlan_offload_set(eth_dev, mask);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n",
+			    ret);
+		goto unlock;
+	}
 
 	/* update device config */
 	memset(&config, 0, sizeof(config));
@@ -2214,7 +2217,7 @@ avp_dev_info_get(struct rte_eth_dev *eth_dev,
 	}
 }
 
-static void
+static int
 avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
 {
 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
@@ -2239,9 +2242,11 @@ avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
 		if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
 			PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
 	}
+
+	return 0;
 }
 
-static void
+static int
 avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
 {
 	struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
@@ -2274,6 +2279,8 @@ avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
 			stats->q_errors[i] += txq->errors;
 		}
 	}
+
+	return 0;
 }
 
 static void
diff --git a/drivers/net/avp/rte_avp_common.h b/drivers/net/avp/rte_avp_common.h
index 488d7216..54437e9a 100644
--- a/drivers/net/avp/rte_avp_common.h
+++ b/drivers/net/avp/rte_avp_common.h
@@ -243,7 +243,7 @@ struct rte_avp_desc {
  */
 struct rte_avp_memmap {
 	void *addr;
-	phys_addr_t phys_addr;
+	rte_iova_t phys_addr;
 	uint64_t length;
 };
 
@@ -345,7 +345,7 @@ RTE_AVP_MAKE_VERSION(RTE_AVP_RELEASE_VERSION_1, \
  */
 struct rte_avp_mempool_info {
 	void *addr;
-	phys_addr_t phys_addr;
+	rte_iova_t phys_addr;
 	uint64_t length;
 };
 
@@ -359,10 +359,10 @@ struct rte_avp_device_info {
 
 	char ifname[RTE_AVP_NAMESIZE];	/**< Network device name for AVP */
 
-	phys_addr_t tx_phys;
-	phys_addr_t rx_phys;
-	phys_addr_t alloc_phys;
-	phys_addr_t free_phys;
+	rte_iova_t tx_phys;
+	rte_iova_t rx_phys;
+	rte_iova_t alloc_phys;
+	rte_iova_t free_phys;
 
 	uint32_t features; /**< Supported feature bitmap */
 	uint8_t min_rx_queues; /**< Minimum supported receive/free queues */
@@ -379,14 +379,14 @@ struct rte_avp_device_info {
 	uint32_t free_size;	/**< Size of each free queue */
 
 	/* Used by Ethtool */
-	phys_addr_t req_phys;
-	phys_addr_t resp_phys;
-	phys_addr_t sync_phys;
+	rte_iova_t req_phys;
+	rte_iova_t resp_phys;
+	rte_iova_t sync_phys;
 	void *sync_va;
 
 	/* mbuf mempool (used when a single memory area is supported) */
 	void *mbuf_va;
-	phys_addr_t mbuf_phys;
+	rte_iova_t mbuf_phys;
 
 	/* mbuf mempools */
 	struct rte_avp_mempool_info pool[RTE_AVP_MAX_MEMPOOLS];
diff --git a/drivers/net/bnx2x/Makefile b/drivers/net/bnx2x/Makefile
index e1231069..90ff8b1e 100644
--- a/drivers/net/bnx2x/Makefile
+++ b/drivers/net/bnx2x/Makefile
@@ -9,6 +9,9 @@ CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 CFLAGS += -DZLIB_CONST
 LDLIBS += -lz
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 EXPORT_MAP := rte_pmd_bnx2x_version.map
 
diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index 06733d15..9394f6c5 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -184,7 +184,7 @@ bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size, struct bnx2x_dma *dma,
 		PMD_DRV_LOG(ERR, "DMA alloc failed for %s", msg);
 		return -ENOMEM;
 	}
-	dma->paddr = (uint64_t) z->phys_addr;
+	dma->paddr = (uint64_t) z->iova;
 	dma->vaddr = z->addr;
 
 	PMD_DRV_LOG(DEBUG, "%s: virt=%p phys=%" PRIx64, msg, dma->vaddr, dma->paddr);
@@ -419,7 +419,7 @@ void bnx2x_read_dmae(struct bnx2x_softc *sc, uint32_t src_addr, uint32_t len32)
 }
 
 void
-bnx2x_write_dmae(struct bnx2x_softc *sc, phys_addr_t dma_addr, uint32_t dst_addr,
+bnx2x_write_dmae(struct bnx2x_softc *sc, rte_iova_t dma_addr, uint32_t dst_addr,
 	       uint32_t len32)
 {
 	struct dmae_command dmae;
@@ -447,7 +447,7 @@ bnx2x_write_dmae(struct bnx2x_softc *sc, phys_addr_t dma_addr, uint32_t dst_addr
 }
 
 static void
-bnx2x_write_dmae_phys_len(struct bnx2x_softc *sc, phys_addr_t phys_addr,
+bnx2x_write_dmae_phys_len(struct bnx2x_softc *sc, rte_iova_t phys_addr,
 			uint32_t addr, uint32_t len)
 {
 	uint32_t dmae_wr_max = DMAE_LEN32_WR_MAX(sc);
@@ -823,14 +823,14 @@ bnx2x_fw_command(struct bnx2x_softc *sc, uint32_t command, uint32_t param)
 
 static void
 __storm_memset_dma_mapping(struct bnx2x_softc *sc, uint32_t addr,
-			   phys_addr_t mapping)
+			   rte_iova_t mapping)
 {
 	REG_WR(sc, addr, U64_LO(mapping));
 	REG_WR(sc, (addr + 4), U64_HI(mapping));
 }
 
 static void
-storm_memset_spq_addr(struct bnx2x_softc *sc, phys_addr_t mapping,
+storm_memset_spq_addr(struct bnx2x_softc *sc, rte_iova_t mapping,
 		      uint16_t abs_fid)
 {
 	uint32_t addr = (XSEM_REG_FAST_MEMORY +
@@ -1498,7 +1498,7 @@ bnx2x_set_q_rx_mode(struct bnx2x_softc *sc, uint8_t cl_id,
 
 	ramrod_param.rdata = BNX2X_SP(sc, rx_mode_rdata);
 	ramrod_param.rdata_mapping =
-	    (phys_addr_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
+	    (rte_iova_t)BNX2X_SP_MAPPING(sc, rx_mode_rdata),
 	    bnx2x_set_bit(ECORE_FILTER_RX_MODE_PENDING, &sc->sp_state);
 
 	ramrod_param.ramrod_flags = ramrod_flags;
@@ -2135,7 +2135,7 @@ int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0)
 	tx_start_bd = &txq->tx_ring[TX_BD(bd_prod, txq)].start_bd;
 
 	tx_start_bd->addr =
-	    rte_cpu_to_le_64(rte_mbuf_data_dma_addr(m0));
+	    rte_cpu_to_le_64(rte_mbuf_data_iova(m0));
 	tx_start_bd->nbytes = rte_cpu_to_le_16(m0->data_len);
 	tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD;
 	tx_start_bd->general_data =
@@ -4599,9 +4599,9 @@ static void bnx2x_init_func_obj(struct bnx2x_softc *sc)
 	ecore_init_func_obj(sc,
 			    &sc->func_obj,
 			    BNX2X_SP(sc, func_rdata),
-			    (phys_addr_t)BNX2X_SP_MAPPING(sc, func_rdata),
+			    (rte_iova_t)BNX2X_SP_MAPPING(sc, func_rdata),
 			    BNX2X_SP(sc, func_afex_rdata),
-			    (phys_addr_t)BNX2X_SP_MAPPING(sc, func_afex_rdata),
+			    (rte_iova_t)BNX2X_SP_MAPPING(sc, func_afex_rdata),
 			    &bnx2x_func_sp_drv);
 }
 
@@ -4772,7 +4772,7 @@ static void bnx2x_map_sb_state_machines(struct hc_index_data *index_data)
 }
 
 static void
-bnx2x_init_sb(struct bnx2x_softc *sc, phys_addr_t busaddr, int vfid,
+bnx2x_init_sb(struct bnx2x_softc *sc, rte_iova_t busaddr, int vfid,
 	    uint8_t vf_valid, int fw_sb_id, int igu_sb_id)
 {
 	struct hc_status_block_data_e2 sb_data_e2;
@@ -4918,7 +4918,7 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 			     sc->max_cos,
 			     SC_FUNC(sc),
 			     BNX2X_SP(sc, q_rdata),
-			     (phys_addr_t)BNX2X_SP_MAPPING(sc, q_rdata),
+			     (rte_iova_t)BNX2X_SP_MAPPING(sc, q_rdata),
 			     q_type);
 
 	/* configure classification DBs */
@@ -4928,7 +4928,7 @@ static void bnx2x_init_eth_fp(struct bnx2x_softc *sc, int idx)
 			   idx,
 			   SC_FUNC(sc),
 			   BNX2X_SP(sc, mac_rdata),
-			   (phys_addr_t)BNX2X_SP_MAPPING(sc, mac_rdata),
+			   (rte_iova_t)BNX2X_SP_MAPPING(sc, mac_rdata),
 			   ECORE_FILTER_MAC_PENDING, &sc->sp_state,
 			   ECORE_OBJ_TYPE_RX_TX, &sc->macs_pool);
 }
@@ -5028,7 +5028,7 @@ static void bnx2x_init_tx_rings(struct bnx2x_softc *sc)
 static void bnx2x_init_def_sb(struct bnx2x_softc *sc)
 {
 	struct host_sp_status_block *def_sb = sc->def_sb;
-	phys_addr_t mapping = sc->def_sb_dma.paddr;
+	rte_iova_t mapping = sc->def_sb_dma.paddr;
 	int igu_sp_sb_index;
 	int igu_seg_id;
 	int port = SC_PORT(sc);
@@ -5700,7 +5700,7 @@ static void bnx2x_init_objs(struct bnx2x_softc *sc)
 			     SC_FUNC(sc),
 			     SC_FUNC(sc),
 			     BNX2X_SP(sc, mcast_rdata),
-			     (phys_addr_t)BNX2X_SP_MAPPING(sc, mcast_rdata),
+			     (rte_iova_t)BNX2X_SP_MAPPING(sc, mcast_rdata),
 			     ECORE_FILTER_MCAST_PENDING,
 			     &sc->sp_state, o_type);
 
@@ -5724,7 +5724,7 @@ static void bnx2x_init_objs(struct bnx2x_softc *sc)
 				  SC_FUNC(sc),
 				  SC_FUNC(sc),
 				  BNX2X_SP(sc, rss_rdata),
-				  (phys_addr_t)BNX2X_SP_MAPPING(sc, rss_rdata),
+				  (rte_iova_t)BNX2X_SP_MAPPING(sc, rss_rdata),
 				  ECORE_FILTER_RSS_CONF_PENDING,
 				  &sc->sp_state, ECORE_OBJ_TYPE_RX);
 }
@@ -6445,9 +6445,9 @@ bnx2x_pf_rx_q_prep(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 	pause->pri_map = 1;
 
 	/* rxq setup */
-	rxq_init->dscr_map = (phys_addr_t)rxq->rx_ring_phys_addr;
-	rxq_init->rcq_map = (phys_addr_t)rxq->cq_ring_phys_addr;
-	rxq_init->rcq_np_map = (phys_addr_t)(rxq->cq_ring_phys_addr +
+	rxq_init->dscr_map = (rte_iova_t)rxq->rx_ring_phys_addr;
+	rxq_init->rcq_map = (rte_iova_t)rxq->cq_ring_phys_addr;
+	rxq_init->rcq_np_map = (rte_iova_t)(rxq->cq_ring_phys_addr +
 					      BNX2X_PAGE_SIZE);
 
 	/*
@@ -6486,7 +6486,7 @@ bnx2x_pf_tx_q_prep(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 		PMD_TX_LOG(ERR, "ERROR: TX queue is NULL");
 		return;
 	}
-	txq_init->dscr_map = (phys_addr_t)txq->tx_ring_phys_addr;
+	txq_init->dscr_map = (rte_iova_t)txq->tx_ring_phys_addr;
 	txq_init->sb_cq_index = HC_INDEX_ETH_FIRST_TX_CQ_CONS + cos;
 	txq_init->traffic_type = LLFC_TRAFFIC_TYPE_NW;
 	txq_init->fw_sb_id = fp->fw_sb_id;
@@ -6604,7 +6604,7 @@ bnx2x_config_rss_pf(struct bnx2x_softc *sc, struct ecore_rss_config_obj *rss_obj
 	/* Hash bits */
 	params.rss_result_mask = MULTI_MASK;
 
-	(void)rte_memcpy(params.ind_table, rss_obj->ind_table,
+	rte_memcpy(params.ind_table, rss_obj->ind_table,
 			 sizeof(params.ind_table));
 
 	if (config_hash) {
@@ -6671,7 +6671,7 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
 
 	/* fill a user request section if needed */
 	if (!bnx2x_test_bit(RAMROD_CONT, ramrod_flags)) {
-		(void)rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
+		rte_memcpy(ramrod_param.user_req.u.mac.mac, mac,
 				 ETH_ALEN);
 
 		bnx2x_set_bit(mac_type, &ramrod_param.user_req.vlan_mac_flags);
@@ -6879,7 +6879,7 @@ static void bnx2x_link_report(struct bnx2x_softc *sc)
 	sc->link_cnt++;
 
 	/* report new link params and remember the state for the next time */
-	(void)rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
+	rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
 	if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
 			 &cur_data.link_report_flags)) {
@@ -11059,7 +11059,7 @@ static int bnx2x_init_hw_func(struct bnx2x_softc *sc)
 	for (i = 0; i < L2_ILT_LINES(sc); i++) {
 		ilt->lines[cdu_ilt_start + i].page = sc->context[i].vcxt;
 		ilt->lines[cdu_ilt_start + i].page_mapping =
-		    (phys_addr_t)sc->context[i].vcxt_dma.paddr;
+		    (rte_iova_t)sc->context[i].vcxt_dma.paddr;
 		ilt->lines[cdu_ilt_start + i].size = sc->context[i].size;
 	}
 	ecore_ilt_init_op(sc, INITOP_SET);
@@ -11357,7 +11357,7 @@ static void bnx2x_reset_port(struct bnx2x_softc *sc)
 	}
 }
 
-static void bnx2x_ilt_wr(struct bnx2x_softc *sc, uint32_t index, phys_addr_t addr)
+static void bnx2x_ilt_wr(struct bnx2x_softc *sc, uint32_t index, rte_iova_t addr)
 {
 	int reg;
 	uint32_t wb_write[2];
@@ -11587,7 +11587,7 @@ static int ecore_gunzip(struct bnx2x_softc *sc, const uint8_t * zbuf, int len)
 }
 
 static void
-ecore_write_dmae_phys_len(struct bnx2x_softc *sc, phys_addr_t phys_addr,
+ecore_write_dmae_phys_len(struct bnx2x_softc *sc, rte_iova_t phys_addr,
 			  uint32_t addr, uint32_t len)
 {
 	bnx2x_write_dmae_phys_len(sc, phys_addr, addr, len);
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 91c5aec2..17075d38 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -18,6 +18,7 @@
 
 #include <rte_byteorder.h>
 #include <rte_spinlock.h>
+#include <rte_bus_pci.h>
 #include <rte_io.h>
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
@@ -317,7 +318,7 @@ struct bnx2x_bar {
 /* Used to manage DMA allocations. */
 struct bnx2x_dma {
 	struct bnx2x_softc        *sc;
-	phys_addr_t             paddr;
+	rte_iova_t              paddr;
 	void                    *vaddr;
 	int                     nseg;
 	char                    msg[RTE_MEMZONE_NAMESIZE - 6];
@@ -370,10 +371,10 @@ struct bnx2x_fastpath {
 	struct bnx2x_dma                 sb_dma;
 	union bnx2x_host_hc_status_block status_block;
 
-	phys_addr_t tx_desc_mapping;
+	rte_iova_t tx_desc_mapping;
 
-	phys_addr_t rx_desc_mapping;
-	phys_addr_t rx_comp_mapping;
+	rte_iova_t rx_desc_mapping;
+	rte_iova_t rx_comp_mapping;
 
 	uint16_t *sb_index_values;
 	uint16_t *sb_running_index;
@@ -468,7 +469,7 @@ union cdu_context {
 struct hw_context {
     struct bnx2x_dma    vcxt_dma;
     union cdu_context *vcxt;
-    //phys_addr_t        cxt_mapping;
+    //rte_iova_t        cxt_mapping;
     size_t            size;
 };
 
@@ -1242,7 +1243,7 @@ struct bnx2x_softc {
 	uint32_t       gz_outlen;
 #define GUNZIP_BUF(sc)    (sc->gz_buf)
 #define GUNZIP_OUTLEN(sc) (sc->gz_outlen)
-#define GUNZIP_PHYS(sc)   (phys_addr_t)(sc->gz_buf_dma.paddr)
+#define GUNZIP_PHYS(sc)   (rte_iova_t)(sc->gz_buf_dma.paddr)
 #define FW_BUF_SIZE       0x40000
 
 	struct raw_op *init_ops;
@@ -1310,14 +1311,14 @@ struct bnx2x_softc {
 	 */
 	int                     fw_stats_req_size;
 	struct bnx2x_fw_stats_req *fw_stats_req;
-	phys_addr_t              fw_stats_req_mapping;
+	rte_iova_t              fw_stats_req_mapping;
 	/*
 	 * FW statistics data shortcut (points at the beginning of fw_stats
 	 * buffer + fw_stats_req_size).
 	 */
 	int                      fw_stats_data_size;
 	struct bnx2x_fw_stats_data *fw_stats_data;
-	phys_addr_t               fw_stats_data_mapping;
+	rte_iova_t               fw_stats_data_mapping;
 
 	/* tracking a pending STAT_QUERY ramrod */
 	uint16_t stats_pending;
@@ -1402,8 +1403,8 @@ union bnx2x_stats_show_data {
 #define FUNC_FLG_LEADING 0x0020 /* PF only */
 
 struct bnx2x_func_init_params {
-    phys_addr_t fw_stat_map; /* (dma) valid if FUNC_FLG_STATS */
-    phys_addr_t spq_map;     /* (dma) valid if FUNC_FLG_SPQ */
+    rte_iova_t fw_stat_map; /* (dma) valid if FUNC_FLG_STATS */
+    rte_iova_t spq_map;     /* (dma) valid if FUNC_FLG_SPQ */
     uint16_t   func_flgs;
     uint16_t   func_id;     /* abs function id */
     uint16_t   pf_id;
@@ -1525,12 +1526,12 @@ bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset)
 #define REG_RD_DMAE(sc, offset, valp, len32)               \
     do {                                                   \
 	(void)bnx2x_read_dmae(sc, offset, len32);                  \
-	(void)rte_memcpy(valp, BNX2X_SP(sc, wb_data[0]), (len32) * 4); \
+	rte_memcpy(valp, BNX2X_SP(sc, wb_data[0]), (len32) * 4); \
     } while (0)
 
 #define REG_WR_DMAE(sc, offset, valp, len32)                            \
     do {                                                                \
-	(void)rte_memcpy(BNX2X_SP(sc, wb_data[0]), valp, (len32) * 4);              \
+	rte_memcpy(BNX2X_SP(sc, wb_data[0]), valp, (len32) * 4);              \
 	(void)bnx2x_write_dmae(sc, BNX2X_SP_MAPPING(sc, wb_data), offset, len32); \
     } while (0)
 
@@ -1748,7 +1749,7 @@ uint32_t bnx2x_dmae_opcode(struct bnx2x_softc *sc, uint8_t src_type,
 			 uint8_t comp_type);
 void bnx2x_post_dmae(struct bnx2x_softc *sc, struct dmae_command *dmae, int idx);
 void bnx2x_read_dmae(struct bnx2x_softc *sc, uint32_t src_addr, uint32_t len32);
-void bnx2x_write_dmae(struct bnx2x_softc *sc, phys_addr_t dma_addr,
+void bnx2x_write_dmae(struct bnx2x_softc *sc, rte_iova_t dma_addr,
 		    uint32_t dst_addr, uint32_t len32);
 void bnx2x_set_ctx_validation(struct bnx2x_softc *sc, struct eth_context *cxt,
 			    uint32_t cid);
diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c
index 6f62a37f..95861a06 100644
--- a/drivers/net/bnx2x/bnx2x_ethdev.c
+++ b/drivers/net/bnx2x/bnx2x_ethdev.c
@@ -329,7 +329,7 @@ bnx2xvf_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_comple
 	return old_link_status == dev->data->dev_link.link_status ? -1 : 0;
 }
 
-static void
+static int
 bnx2x_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct bnx2x_softc *sc = dev->data->dev_private;
@@ -389,6 +389,8 @@ bnx2x_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 	stats->imissed = brb_drops + brb_truncates +
 			 brb_truncate_discard + stats->rx_nombuf;
+
+	return 0;
 }
 
 static int
diff --git a/drivers/net/bnx2x/bnx2x_ethdev.h b/drivers/net/bnx2x/bnx2x_ethdev.h
index a9da9de8..967d6dc5 100644
--- a/drivers/net/bnx2x/bnx2x_ethdev.h
+++ b/drivers/net/bnx2x/bnx2x_ethdev.h
@@ -35,7 +35,6 @@
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_spinlock.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
 #include <rte_mempool.h>
 #include <rte_mbuf.h>
diff --git a/drivers/net/bnx2x/bnx2x_rxtx.c b/drivers/net/bnx2x/bnx2x_rxtx.c
index 5dd4aee7..a0d4ac92 100644
--- a/drivers/net/bnx2x/bnx2x_rxtx.c
+++ b/drivers/net/bnx2x/bnx2x_rxtx.c
@@ -71,8 +71,8 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	struct bnx2x_softc *sc = dev->data->dev_private;
 	struct bnx2x_fastpath *fp = &sc->fp[queue_idx];
 	struct eth_rx_cqe_next_page *nextpg;
-	phys_addr_t *rx_bd;
-	phys_addr_t busaddr;
+	rte_iova_t *rx_bd;
+	rte_iova_t busaddr;
 
 	/* First allocate the rx queue data structure */
 	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct bnx2x_rx_queue),
@@ -108,7 +108,7 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		bnx2x_rx_queue_release(rxq);
 		return -ENOMEM;
 	}
-	fp->rx_desc_mapping = rxq->rx_ring_phys_addr = (uint64_t)dma->phys_addr;
+	fp->rx_desc_mapping = rxq->rx_ring_phys_addr = (uint64_t)dma->iova;
 	rxq->rx_ring = (uint64_t*)dma->addr;
 	memset((void *)rxq->rx_ring, 0, dma_size);
 
@@ -140,7 +140,7 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			return -ENOMEM;
 		}
 		rxq->sw_ring[idx] = mbuf;
-		rxq->rx_ring[idx] = mbuf->buf_physaddr;
+		rxq->rx_ring[idx] = mbuf->buf_iova;
 	}
 	rxq->pkt_first_seg = NULL;
 	rxq->pkt_last_seg = NULL;
@@ -154,7 +154,7 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		PMD_RX_LOG(ERR, "RCQ  alloc failed");
 		return -ENOMEM;
 	}
-	fp->rx_comp_mapping = rxq->cq_ring_phys_addr = (uint64_t)dma->phys_addr;
+	fp->rx_comp_mapping = rxq->cq_ring_phys_addr = (uint64_t)dma->iova;
 	rxq->cq_ring = (union eth_rx_cqe*)dma->addr;
 
 	/* Link the CQ chain pages. */
@@ -289,7 +289,7 @@ bnx2x_dev_tx_queue_setup(struct rte_eth_dev *dev,
 		bnx2x_tx_queue_release(txq);
 		return -ENOMEM;
 	}
-	fp->tx_desc_mapping = txq->tx_ring_phys_addr = (uint64_t)tz->phys_addr;
+	fp->tx_desc_mapping = txq->tx_ring_phys_addr = (uint64_t)tz->iova;
 	txq->tx_ring = (union eth_tx_bd_types *) tz->addr;
 	memset(txq->tx_ring, 0, tsize);
 
@@ -400,7 +400,7 @@ bnx2x_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 		rx_mb = rxq->sw_ring[bd_cons];
 		rxq->sw_ring[bd_cons] = new_mb;
-		rxq->rx_ring[bd_prod] = new_mb->buf_physaddr;
+		rxq->rx_ring[bd_prod] = new_mb->buf_iova;
 
 		rx_pref = NEXT_RX_BD(bd_cons) & MAX_RX_BD(rxq);
 		rte_prefetch0(rxq->sw_ring[rx_pref]);
@@ -422,7 +422,7 @@ bnx2x_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		 */
 		if (cqe_fp->pars_flags.flags & PARSING_FLAGS_VLAN) {
 			rx_mb->vlan_tci = cqe_fp->vlan_tag;
-			rx_mb->ol_flags |= PKT_RX_VLAN_PKT;
+			rx_mb->ol_flags |= PKT_RX_VLAN;
 		}
 
 		rx_pkts[nb_rx] = rx_mb;
diff --git a/drivers/net/bnx2x/bnx2x_rxtx.h b/drivers/net/bnx2x/bnx2x_rxtx.h
index 2e38ec26..9600e0f1 100644
--- a/drivers/net/bnx2x/bnx2x_rxtx.h
+++ b/drivers/net/bnx2x/bnx2x_rxtx.h
@@ -41,7 +41,7 @@ struct bnx2x_rx_queue {
 	uint16_t                   rx_cq_head;           /**< Index of current rcq bd. */
 	uint16_t                   rx_cq_tail;           /**< Index of last rcq bd. */
 	uint16_t                   queue_id;             /**< RX queue index. */
-	uint8_t                    port_id;              /**< Device port identifier. */
+	uint16_t                   port_id;              /**< Device port identifier. */
 	struct bnx2x_softc           *sc;                  /**< Ptr to dev_private data. */
 };
 
@@ -62,7 +62,7 @@ struct bnx2x_tx_queue {
 	uint16_t                   nb_tx_avail;          /**< Number of TX descriptors available. */
 	uint16_t                   nb_tx_pages;          /**< number of TX pages */
 	uint16_t                   queue_id;             /**< TX queue index. */
-	uint8_t                    port_id;              /**< Device port identifier. */
+	uint16_t                   port_id;              /**< Device port identifier. */
 	struct bnx2x_softc           *sc;                  /**< Ptr to dev_private data */
 };
 
diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c
index 6223cfef..b9b85963 100644
--- a/drivers/net/bnx2x/bnx2x_stats.c
+++ b/drivers/net/bnx2x/bnx2x_stats.c
@@ -1338,7 +1338,7 @@ bnx2x_prep_fw_stats_req(struct bnx2x_softc *sc)
     int i;
     int first_queue_query_index;
     struct stats_query_header *stats_hdr = &sc->fw_stats_req->hdr;
-    phys_addr_t cur_data_offset;
+    rte_iova_t cur_data_offset;
     struct stats_query_entry *cur_query_entry;
 
     stats_hdr->cmd_num = sc->fw_stats_num;
diff --git a/drivers/net/bnx2x/bnx2x_vfpf.c b/drivers/net/bnx2x/bnx2x_vfpf.c
index 0ca0df87..3c08f2a2 100644
--- a/drivers/net/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/bnx2x/bnx2x_vfpf.c
@@ -113,7 +113,7 @@ bnx2x_vf_finalize(struct bnx2x_softc *sc,
 #define BNX2X_VF_CHANNEL_TRIES 100
 
 static int
-bnx2x_do_req4pf(struct bnx2x_softc *sc, phys_addr_t phys_addr)
+bnx2x_do_req4pf(struct bnx2x_softc *sc, rte_iova_t phys_addr)
 {
 	uint8_t *status = &sc->vf2pf_mbox->resp.common_reply.status;
 	uint8_t i;
diff --git a/drivers/net/bnx2x/ecore_sp.h b/drivers/net/bnx2x/ecore_sp.h
index e7ec96e9..ff40413c 100644
--- a/drivers/net/bnx2x/ecore_sp.h
+++ b/drivers/net/bnx2x/ecore_sp.h
@@ -36,7 +36,7 @@
 #include "ecore_reg.h"
 
 struct bnx2x_softc;
-typedef phys_addr_t ecore_dma_addr_t; /* expected to be 64 bit wide */
+typedef rte_iova_t ecore_dma_addr_t; /* expected to be 64 bit wide */
 typedef volatile int ecore_atomic_t;
 
 
@@ -113,7 +113,7 @@ typedef rte_spinlock_t ECORE_MUTEX_SPIN;
 #define ECORE_FCOE_CID(sc) ((sc)->fp[FCOE_IDX(sc)].cl_id)
 
 #define ECORE_MEMCMP(_a, _b, _s) memcmp(_a, _b, _s)
-#define ECORE_MEMCPY(_a, _b, _s) (void)rte_memcpy(_a, _b, _s)
+#define ECORE_MEMCPY(_a, _b, _s) rte_memcpy(_a, _b, _s)
 #define ECORE_MEMSET(_a, _c, _s) memset(_a, _c, _s)
 
 #define ECORE_CPU_TO_LE16(x) htole16(x)
diff --git a/drivers/net/bnxt/Makefile b/drivers/net/bnxt/Makefile
index b03f65dc..2aa04411 100644
--- a/drivers/net/bnxt/Makefile
+++ b/drivers/net/bnxt/Makefile
@@ -40,10 +40,13 @@ LIB = librte_pmd_bnxt.a
 
 EXPORT_MAP := rte_pmd_bnxt_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 EXPORT_MAP := rte_pmd_bnxt_version.map
 
diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 405d94de..646fe79e 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -39,6 +39,7 @@
 #include <sys/queue.h>
 
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_ethdev.h>
 #include <rte_memory.h>
 #include <rte_lcore.h>
@@ -126,13 +127,13 @@ struct bnxt_pf_info {
 #define BNXT_FIRST_VF_FID	128
 #define BNXT_PF_RINGS_USED(bp)	bnxt_get_num_queues(bp)
 #define BNXT_PF_RINGS_AVAIL(bp)	(bp->pf.max_cp_rings - BNXT_PF_RINGS_USED(bp))
-	uint8_t			port_id;
+	uint16_t		port_id;
 	uint16_t		first_vf_id;
 	uint16_t		active_vfs;
 	uint16_t		max_vfs;
 	uint32_t		func_cfg_flags;
 	void			*vf_req_buf;
-	phys_addr_t		vf_req_buf_dma_addr;
+	rte_iova_t		vf_req_buf_dma_addr;
 	uint32_t		vf_req_fwd[8];
 	uint16_t		total_vnics;
 	struct bnxt_child_vf_info	*vf_info;
@@ -171,11 +172,18 @@ struct bnxt_cos_queue_info {
 	uint8_t	profile;
 };
 
+struct rte_flow {
+	STAILQ_ENTRY(rte_flow) next;
+	struct bnxt_filter_info *filter;
+	struct bnxt_vnic_info	*vnic;
+};
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
 
 	struct rte_eth_dev		*eth_dev;
+	struct rte_eth_rss_conf		rss_conf;
 	struct rte_pci_device		*pdev;
 
 	uint32_t		flags;
@@ -184,6 +192,7 @@ struct bnxt {
 #define BNXT_FLAG_PORT_STATS	(1 << 2)
 #define BNXT_FLAG_JUMBO		(1 << 3)
 #define BNXT_FLAG_SHORT_CMD	(1 << 4)
+#define BNXT_FLAG_UPDATE_HASH	(1 << 5)
 #define BNXT_PF(bp)		(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)		((bp)->flags & BNXT_FLAG_VF)
 #define BNXT_NPAR_ENABLED(bp)	((bp)->port_partition_type)
@@ -194,14 +203,14 @@ struct bnxt {
 	struct bnxt_rx_queue **rx_queues;
 	const void		*rx_mem_zone;
 	struct rx_port_stats    *hw_rx_port_stats;
-	phys_addr_t		hw_rx_port_stats_map;
+	rte_iova_t		hw_rx_port_stats_map;
 
 	unsigned int		tx_nr_rings;
 	unsigned int		tx_cp_nr_rings;
 	struct bnxt_tx_queue **tx_queues;
 	const void		*tx_mem_zone;
 	struct tx_port_stats    *hw_tx_port_stats;
-	phys_addr_t		hw_tx_port_stats_map;
+	rte_iova_t		hw_tx_port_stats_map;
 
 	/* Default completion ring */
 	struct bnxt_cp_ring_info	*def_cp_ring;
@@ -217,7 +226,7 @@ struct bnxt {
 	STAILQ_HEAD(, bnxt_filter_info)	free_filter_list;
 
 	/* VNIC pointer for flow filter (VMDq) pools */
-#define MAX_FF_POOLS	ETH_64_POOLS
+#define MAX_FF_POOLS	256
 	STAILQ_HEAD(, bnxt_vnic_info)	ff_pool[MAX_FF_POOLS];
 
 	struct bnxt_irq         *irq_tbl;
@@ -227,9 +236,9 @@ struct bnxt {
 
 	uint16_t			hwrm_cmd_seq;
 	void				*hwrm_cmd_resp_addr;
-	phys_addr_t			hwrm_cmd_resp_dma_addr;
+	rte_iova_t			hwrm_cmd_resp_dma_addr;
 	void				*hwrm_short_cmd_req_addr;
-	phys_addr_t			hwrm_short_cmd_req_dma_addr;
+	rte_iova_t			hwrm_short_cmd_req_dma_addr;
 	rte_spinlock_t			hwrm_lock;
 	uint16_t			max_req_len;
 	uint16_t			max_resp_len;
@@ -269,4 +278,5 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
 #define RX_PROD_AGG_BD_TYPE_RX_PROD_AGG		0x6
 
 bool is_bnxt_supported(struct rte_eth_dev *dev);
+extern const struct rte_flow_ops bnxt_flow_ops;
 #endif
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 68979bc4..26b2755e 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -183,8 +183,10 @@ void bnxt_free_def_cp_ring(struct bnxt *bp)
 		return;
 
 	bnxt_free_ring(cpr->cp_ring_struct);
+	cpr->cp_ring_struct = NULL;
 	rte_free(cpr->cp_ring_struct);
 	rte_free(cpr);
+	bp->def_cp_ring = NULL;
 }
 
 /* For the default completion ring only */
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index a6e87858..ce2b0cb8 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -41,6 +41,9 @@
 	(!!(((struct cmpl_base *)(cmp))->info3_v & CMPL_BASE_V) ==	\
 	 !((raw_cons) & ((ring)->ring_size)))
 
+#define CMPL_VALID(cmp, v)						\
+	(!!(((struct cmpl_base *)(cmp))->info3_v & CMPL_BASE_V) == !(v))
+
 #define CMP_TYPE(cmp)						\
 	(((struct cmpl_base *)cmp)->type & CMPL_BASE_TYPE_MASK)
 
@@ -48,6 +51,7 @@
 #define NEXT_RAW_CMP(idx)	ADV_RAW_CMP(idx, 1)
 #define RING_CMP(ring, idx)	((idx) & (ring)->ring_mask)
 #define NEXT_CMP(idx)		RING_CMP(ADV_RAW_CMP(idx, 1))
+#define FLIP_VALID(cons, mask, val)	((cons) >= (mask) ? !(val) : (val))
 
 #define DB_CP_REARM_FLAGS	(DB_KEY_CP | DB_IDX_VALID)
 #define DB_CP_FLAGS		(DB_KEY_CP | DB_IDX_VALID | DB_IRQ_DIS)
@@ -82,15 +86,15 @@ struct bnxt_cp_ring_info {
 
 	struct cmpl_base	*cp_desc_ring;
 
-	phys_addr_t		cp_desc_mapping;
+	rte_iova_t		cp_desc_mapping;
 
 	struct ctx_hw_stats	*hw_stats;
-	phys_addr_t		hw_stats_map;
+	rte_iova_t		hw_stats_map;
 	uint32_t		hw_stats_ctx_id;
 
 	struct bnxt_ring	*cp_ring_struct;
 	uint16_t		cp_cons;
-	bool			v;
+	bool			valid;
 };
 
 #define RX_CMP_L2_ERRORS						\
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index c9d11228..e8c7d0e7 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -53,6 +53,7 @@
 #include "bnxt_txr.h"
 #include "bnxt_vnic.h"
 #include "hsi_struct_def_dpdk.h"
+#include "bnxt_nvm_defs.h"
 
 #define DRV_MODULE_NAME		"bnxt"
 static const char bnxt_version[] =
@@ -144,7 +145,7 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
 	ETH_RSS_NONFRAG_IPV6_TCP |	\
 	ETH_RSS_NONFRAG_IPV6_UDP)
 
-static void bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
+static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
 
 /***********************/
 
@@ -201,8 +202,16 @@ static int bnxt_init_chip(struct bnxt *bp)
 {
 	unsigned int i, rss_idx, fw_idx;
 	struct rte_eth_link new;
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(bp->eth_dev);
+	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+	uint32_t intr_vector = 0;
+	uint32_t queue_id, base = BNXT_MISC_VEC_ID;
+	uint32_t vec = BNXT_MISC_VEC_ID;
 	int rc;
 
+	/* disable uio/vfio intr/eventfd mapping */
+	rte_intr_disable(intr_handle);
+
 	if (bp->eth_dev->data->mtu > ETHER_MTU) {
 		bp->eth_dev->data->dev_conf.rxmode.jumbo_frame = 1;
 		bp->flags |= BNXT_FLAG_JUMBO;
@@ -305,6 +314,48 @@ static int bnxt_init_chip(struct bnxt *bp)
 		goto err_out;
 	}
 
+	/* check and configure queue intr-vector mapping */
+	if ((rte_intr_cap_multiple(intr_handle) ||
+	     !RTE_ETH_DEV_SRIOV(bp->eth_dev).active) &&
+	    bp->eth_dev->data->dev_conf.intr_conf.rxq != 0) {
+		intr_vector = bp->eth_dev->data->nb_rx_queues;
+		RTE_LOG(INFO, PMD, "%s(): intr_vector = %d\n", __func__,
+			intr_vector);
+		if (intr_vector > bp->rx_cp_nr_rings) {
+			RTE_LOG(ERR, PMD, "At most %d intr queues supported",
+					bp->rx_cp_nr_rings);
+			return -ENOTSUP;
+		}
+		if (rte_intr_efd_enable(intr_handle, intr_vector))
+			return -1;
+	}
+
+	if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
+		intr_handle->intr_vec =
+			rte_zmalloc("intr_vec",
+				    bp->eth_dev->data->nb_rx_queues *
+				    sizeof(int), 0);
+		if (intr_handle->intr_vec == NULL) {
+			RTE_LOG(ERR, PMD, "Failed to allocate %d rx_queues"
+				" intr_vec", bp->eth_dev->data->nb_rx_queues);
+			return -ENOMEM;
+		}
+		RTE_LOG(DEBUG, PMD, "%s(): intr_handle->intr_vec = %p "
+			"intr_handle->nb_efd = %d intr_handle->max_intr = %d\n",
+			 __func__, intr_handle->intr_vec, intr_handle->nb_efd,
+			intr_handle->max_intr);
+	}
+
+	for (queue_id = 0; queue_id < bp->eth_dev->data->nb_rx_queues;
+	     queue_id++) {
+		intr_handle->intr_vec[queue_id] = vec;
+		if (vec < base + intr_handle->nb_efd - 1)
+			vec++;
+	}
+
+	/* enable uio/vfio intr/eventfd mapping */
+	rte_intr_enable(intr_handle);
+
 	rc = bnxt_get_hwrm_link_config(bp, &new);
 	if (rc) {
 		RTE_LOG(ERR, PMD, "HWRM Get link config failure rc: %x\n", rc);
@@ -360,27 +411,38 @@ static void bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
 {
 	struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
 	uint16_t max_vnics, i, j, vpool, vrxq;
+	unsigned int max_rx_rings;
 
 	dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 
 	/* MAC Specifics */
-	dev_info->max_mac_addrs = MAX_NUM_MAC_ADDR;
+	dev_info->max_mac_addrs = bp->max_l2_ctx;
 	dev_info->max_hash_mac_addrs = 0;
 
 	/* PF/VF specifics */
 	if (BNXT_PF(bp))
 		dev_info->max_vfs = bp->pdev->max_vfs;
-	dev_info->max_rx_queues = bp->max_rx_rings;
-	dev_info->max_tx_queues = bp->max_tx_rings;
+	max_rx_rings = RTE_MIN(bp->max_vnics, RTE_MIN(bp->max_l2_ctx,
+						RTE_MIN(bp->max_rsscos_ctx,
+						bp->max_stat_ctx)));
+	/* For the sake of symmetry, max_rx_queues = max_tx_queues */
+	dev_info->max_rx_queues = max_rx_rings;
+	dev_info->max_tx_queues = max_rx_rings;
 	dev_info->reta_size = bp->max_rsscos_ctx;
+	dev_info->hash_key_size = 40;
 	max_vnics = bp->max_vnics;
 
 	/* Fast path specifics */
 	dev_info->min_rx_bufsize = 1;
 	dev_info->max_rx_pktlen = BNXT_MAX_MTU + ETHER_HDR_LEN + ETHER_CRC_LEN
 				  + VLAN_TAG_SIZE;
-	dev_info->rx_offload_capa = 0;
-	dev_info->tx_offload_capa = DEV_TX_OFFLOAD_IPV4_CKSUM |
+	dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
+					DEV_RX_OFFLOAD_IPV4_CKSUM |
+					DEV_RX_OFFLOAD_UDP_CKSUM |
+					DEV_RX_OFFLOAD_TCP_CKSUM |
+					DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
+	dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
+					DEV_TX_OFFLOAD_IPV4_CKSUM |
 					DEV_TX_OFFLOAD_TCP_CKSUM |
 					DEV_TX_OFFLOAD_UDP_CKSUM |
 					DEV_TX_OFFLOAD_TCP_TSO |
@@ -414,6 +476,8 @@ static void bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
 	};
 	eth_dev->data->dev_conf.intr_conf.lsc = 1;
 
+	eth_dev->data->dev_conf.intr_conf.rxq = 1;
+
 	/* *INDENT-ON* */
 
 	/*
@@ -489,13 +553,13 @@ static void bnxt_print_link_info(struct rte_eth_dev *eth_dev)
 
 	if (link->link_status)
 		RTE_LOG(INFO, PMD, "Port %d Link Up - speed %u Mbps - %s\n",
-			(uint8_t)(eth_dev->data->port_id),
+			eth_dev->data->port_id,
 			(uint32_t)link->link_speed,
 			(link->link_duplex == ETH_LINK_FULL_DUPLEX) ?
 			("full-duplex") : ("half-duplex\n"));
 	else
 		RTE_LOG(INFO, PMD, "Port %d Link Down\n",
-			(uint8_t)(eth_dev->data->port_id));
+			eth_dev->data->port_id);
 }
 
 static int bnxt_dev_lsc_intr_setup(struct rte_eth_dev *eth_dev)
@@ -510,6 +574,11 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 	int vlan_mask = 0;
 	int rc;
 
+	if (bp->rx_cp_nr_rings > RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+		RTE_LOG(ERR, PMD,
+			"RxQ cnt %d > CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS %d\n",
+			bp->rx_cp_nr_rings, RTE_ETHDEV_QUEUE_STAT_CNTRS);
+	}
 	bp->dev_stopped = 0;
 
 	rc = bnxt_init_nic(bp);
@@ -522,7 +591,9 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 		vlan_mask |= ETH_VLAN_FILTER_MASK;
 	if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
 		vlan_mask |= ETH_VLAN_STRIP_MASK;
-	bnxt_vlan_offload_set_op(eth_dev, vlan_mask);
+	rc = bnxt_vlan_offload_set_op(eth_dev, vlan_mask);
+	if (rc)
+		goto error;
 
 	return 0;
 
@@ -593,13 +664,14 @@ static void bnxt_mac_addr_remove_op(struct rte_eth_dev *eth_dev,
 	uint64_t pool_mask = eth_dev->data->mac_pool_sel[index];
 	struct bnxt_vnic_info *vnic;
 	struct bnxt_filter_info *filter, *temp_filter;
-	int i;
+	uint32_t pool = RTE_MIN(MAX_FF_POOLS, ETH_64_POOLS);
+	uint32_t i;
 
 	/*
 	 * Loop through all VNICs from the specified filter flow pools to
 	 * remove the corresponding MAC addr filter
 	 */
-	for (i = 0; i < MAX_FF_POOLS; i++) {
+	for (i = 0; i < pool; i++) {
 		if (!(pool_mask & (1ULL << i)))
 			continue;
 
@@ -610,7 +682,7 @@ static void bnxt_mac_addr_remove_op(struct rte_eth_dev *eth_dev,
 				if (filter->mac_index == index) {
 					STAILQ_REMOVE(&vnic->filter, filter,
 						      bnxt_filter_info, next);
-					bnxt_hwrm_clear_filter(bp, filter);
+					bnxt_hwrm_clear_l2_filter(bp, filter);
 					filter->mac_index = INVALID_MAC_INDEX;
 					memset(&filter->l2_addr, 0,
 					       ETHER_ADDR_LEN);
@@ -657,7 +729,7 @@ static int bnxt_mac_addr_add_op(struct rte_eth_dev *eth_dev,
 	STAILQ_INSERT_TAIL(&vnic->filter, filter, next);
 	filter->mac_index = index;
 	memcpy(filter->l2_addr, mac_addr, ETHER_ADDR_LEN);
-	return bnxt_hwrm_set_filter(bp, vnic->fw_vnic_id, filter);
+	return bnxt_hwrm_set_l2_filter(bp, vnic->fw_vnic_id, filter);
 }
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete)
@@ -827,11 +899,15 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev,
 	 */
 	if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
 		if (!rss_conf->rss_hf)
-			return -EINVAL;
+			RTE_LOG(ERR, PMD, "Hash type NONE\n");
 	} else {
 		if (rss_conf->rss_hf & BNXT_ETH_RSS_SUPPORT)
 			return -EINVAL;
 	}
+
+	bp->flags |= BNXT_FLAG_UPDATE_HASH;
+	memcpy(&bp->rss_conf, rss_conf, sizeof(*rss_conf));
+
 	if (rss_conf->rss_hf & ETH_RSS_IPV4)
 		hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV4;
 	if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
@@ -1147,7 +1223,7 @@ static int bnxt_del_vlan_filter(struct bnxt *bp, uint16_t vlan_id)
 					/* Must delete the filter */
 					STAILQ_REMOVE(&vnic->filter, filter,
 						      bnxt_filter_info, next);
-					bnxt_hwrm_clear_filter(bp, filter);
+					bnxt_hwrm_clear_l2_filter(bp, filter);
 					STAILQ_INSERT_TAIL(
 							&bp->free_filter_list,
 							filter, next);
@@ -1173,7 +1249,7 @@ static int bnxt_del_vlan_filter(struct bnxt *bp, uint16_t vlan_id)
 					memcpy(new_filter->l2_addr,
 					       filter->l2_addr, ETHER_ADDR_LEN);
 					/* MAC only filter */
-					rc = bnxt_hwrm_set_filter(bp,
+					rc = bnxt_hwrm_set_l2_filter(bp,
 							vnic->fw_vnic_id,
 							new_filter);
 					if (rc)
@@ -1225,7 +1301,7 @@ static int bnxt_add_vlan_filter(struct bnxt *bp, uint16_t vlan_id)
 					/* Must delete the MAC filter */
 					STAILQ_REMOVE(&vnic->filter, filter,
 						      bnxt_filter_info, next);
-					bnxt_hwrm_clear_filter(bp, filter);
+					bnxt_hwrm_clear_l2_filter(bp, filter);
 					filter->l2_ovlan = 0;
 					STAILQ_INSERT_TAIL(
 							&bp->free_filter_list,
@@ -1248,8 +1324,9 @@ static int bnxt_add_vlan_filter(struct bnxt *bp, uint16_t vlan_id)
 				new_filter->l2_ovlan = vlan_id;
 				new_filter->l2_ovlan_mask = 0xF000;
 				new_filter->enables |= en;
-				rc = bnxt_hwrm_set_filter(bp, vnic->fw_vnic_id,
-							  new_filter);
+				rc = bnxt_hwrm_set_l2_filter(bp,
+							     vnic->fw_vnic_id,
+							     new_filter);
 				if (rc)
 					goto exit;
 				RTE_LOG(INFO, PMD,
@@ -1275,7 +1352,7 @@ static int bnxt_vlan_filter_set_op(struct rte_eth_dev *eth_dev,
 		return bnxt_del_vlan_filter(bp, vlan_id);
 }
 
-static void
+static int
 bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask)
 {
 	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
@@ -1307,6 +1384,8 @@ bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask)
 
 	if (mask & ETH_VLAN_EXTEND_MASK)
 		RTE_LOG(ERR, PMD, "Extend VLAN Not supported\n");
+
+	return 0;
 }
 
 static void
@@ -1328,7 +1407,7 @@ bnxt_set_default_mac_addr_op(struct rte_eth_dev *dev, struct ether_addr *addr)
 		/* Default Filter is at Index 0 */
 		if (filter->mac_index != 0)
 			continue;
-		rc = bnxt_hwrm_clear_filter(bp, filter);
+		rc = bnxt_hwrm_clear_l2_filter(bp, filter);
 		if (rc)
 			break;
 		memcpy(filter->l2_addr, bp->mac_addr, ETHER_ADDR_LEN);
@@ -1337,7 +1416,7 @@ bnxt_set_default_mac_addr_op(struct rte_eth_dev *dev, struct ether_addr *addr)
 		filter->enables |=
 			HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR |
 			HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR_MASK;
-		rc = bnxt_hwrm_set_filter(bp, vnic->fw_vnic_id, filter);
+		rc = bnxt_hwrm_set_l2_filter(bp, vnic->fw_vnic_id, filter);
 		if (rc)
 			break;
 		filter->mac_index = 0;
@@ -1517,6 +1596,1083 @@ bnxt_dev_led_off_op(struct rte_eth_dev *dev)
 	return bnxt_hwrm_port_led_cfg(bp, false);
 }
 
+static uint32_t
+bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+	uint32_t desc = 0, raw_cons = 0, cons;
+	struct bnxt_cp_ring_info *cpr;
+	struct bnxt_rx_queue *rxq;
+	struct rx_pkt_cmpl *rxcmp;
+	uint16_t cmp_type;
+	uint8_t cmp = 1;
+	bool valid;
+
+	rxq = dev->data->rx_queues[rx_queue_id];
+	cpr = rxq->cp_ring;
+	valid = cpr->valid;
+
+	while (raw_cons < rxq->nb_rx_desc) {
+		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
+		rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
+
+		if (!CMPL_VALID(rxcmp, valid))
+			goto nothing_to_do;
+		valid = FLIP_VALID(cons, cpr->cp_ring_struct->ring_mask, valid);
+		cmp_type = CMP_TYPE(rxcmp);
+		if (cmp_type == RX_TPA_END_CMPL_TYPE_RX_TPA_END) {
+			cmp = (rte_le_to_cpu_32(
+					((struct rx_tpa_end_cmpl *)
+					 (rxcmp))->agg_bufs_v1) &
+			       RX_TPA_END_CMPL_AGG_BUFS_MASK) >>
+				RX_TPA_END_CMPL_AGG_BUFS_SFT;
+			desc++;
+		} else if (cmp_type == 0x11) {
+			desc++;
+			cmp = (rxcmp->agg_bufs_v1 &
+				   RX_PKT_CMPL_AGG_BUFS_MASK) >>
+				RX_PKT_CMPL_AGG_BUFS_SFT;
+		} else {
+			cmp = 1;
+		}
+nothing_to_do:
+		raw_cons += cmp ? cmp : 2;
+	}
+
+	return desc;
+}
+
+static int
+bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
+{
+	struct bnxt_rx_queue *rxq = (struct bnxt_rx_queue *)rx_queue;
+	struct bnxt_rx_ring_info *rxr;
+	struct bnxt_cp_ring_info *cpr;
+	struct bnxt_sw_rx_bd *rx_buf;
+	struct rx_pkt_cmpl *rxcmp;
+	uint32_t cons, cp_cons;
+
+	if (!rxq)
+		return -EINVAL;
+
+	cpr = rxq->cp_ring;
+	rxr = rxq->rx_ring;
+
+	if (offset >= rxq->nb_rx_desc)
+		return -EINVAL;
+
+	cons = RING_CMP(cpr->cp_ring_struct, offset);
+	cp_cons = cpr->cp_raw_cons;
+	rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
+
+	if (cons > cp_cons) {
+		if (CMPL_VALID(rxcmp, cpr->valid))
+			return RTE_ETH_RX_DESC_DONE;
+	} else {
+		if (CMPL_VALID(rxcmp, !cpr->valid))
+			return RTE_ETH_RX_DESC_DONE;
+	}
+	rx_buf = &rxr->rx_buf_ring[cons];
+	if (rx_buf->mbuf == NULL)
+		return RTE_ETH_RX_DESC_UNAVAIL;
+
+
+	return RTE_ETH_RX_DESC_AVAIL;
+}
+
+static int
+bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
+{
+	struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
+	struct bnxt_tx_ring_info *txr;
+	struct bnxt_cp_ring_info *cpr;
+	struct bnxt_sw_tx_bd *tx_buf;
+	struct tx_pkt_cmpl *txcmp;
+	uint32_t cons, cp_cons;
+
+	if (!txq)
+		return -EINVAL;
+
+	cpr = txq->cp_ring;
+	txr = txq->tx_ring;
+
+	if (offset >= txq->nb_tx_desc)
+		return -EINVAL;
+
+	cons = RING_CMP(cpr->cp_ring_struct, offset);
+	txcmp = (struct tx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
+	cp_cons = cpr->cp_raw_cons;
+
+	if (cons > cp_cons) {
+		if (CMPL_VALID(txcmp, cpr->valid))
+			return RTE_ETH_TX_DESC_UNAVAIL;
+	} else {
+		if (CMPL_VALID(txcmp, !cpr->valid))
+			return RTE_ETH_TX_DESC_UNAVAIL;
+	}
+	tx_buf = &txr->tx_buf_ring[cons];
+	if (tx_buf->mbuf == NULL)
+		return RTE_ETH_TX_DESC_DONE;
+
+	return RTE_ETH_TX_DESC_FULL;
+}
+
+static struct bnxt_filter_info *
+bnxt_match_and_validate_ether_filter(struct bnxt *bp,
+				struct rte_eth_ethertype_filter *efilter,
+				struct bnxt_vnic_info *vnic0,
+				struct bnxt_vnic_info *vnic,
+				int *ret)
+{
+	struct bnxt_filter_info *mfilter = NULL;
+	int match = 0;
+	*ret = 0;
+
+	if (efilter->ether_type != ETHER_TYPE_IPv4 &&
+		efilter->ether_type != ETHER_TYPE_IPv6) {
+		RTE_LOG(ERR, PMD, "unsupported ether_type(0x%04x) in"
+			" ethertype filter.", efilter->ether_type);
+		*ret = -EINVAL;
+		goto exit;
+	}
+	if (efilter->queue >= bp->rx_nr_rings) {
+		RTE_LOG(ERR, PMD, "Invalid queue %d\n", efilter->queue);
+		*ret = -EINVAL;
+		goto exit;
+	}
+
+	vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+	vnic = STAILQ_FIRST(&bp->ff_pool[efilter->queue]);
+	if (vnic == NULL) {
+		RTE_LOG(ERR, PMD, "Invalid queue %d\n", efilter->queue);
+		*ret = -EINVAL;
+		goto exit;
+	}
+
+	if (efilter->flags & RTE_ETHTYPE_FLAGS_DROP) {
+		STAILQ_FOREACH(mfilter, &vnic0->filter, next) {
+			if ((!memcmp(efilter->mac_addr.addr_bytes,
+				     mfilter->l2_addr, ETHER_ADDR_LEN) &&
+			     mfilter->flags ==
+			     HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_DROP &&
+			     mfilter->ethertype == efilter->ether_type)) {
+				match = 1;
+				break;
+			}
+		}
+	} else {
+		STAILQ_FOREACH(mfilter, &vnic->filter, next)
+			if ((!memcmp(efilter->mac_addr.addr_bytes,
+				     mfilter->l2_addr, ETHER_ADDR_LEN) &&
+			     mfilter->ethertype == efilter->ether_type &&
+			     mfilter->flags ==
+			     HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH_RX)) {
+				match = 1;
+				break;
+			}
+	}
+
+	if (match)
+		*ret = -EEXIST;
+
+exit:
+	return mfilter;
+}
+
+static int
+bnxt_ethertype_filter(struct rte_eth_dev *dev,
+			enum rte_filter_op filter_op,
+			void *arg)
+{
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	struct rte_eth_ethertype_filter *efilter =
+			(struct rte_eth_ethertype_filter *)arg;
+	struct bnxt_filter_info *bfilter, *filter1;
+	struct bnxt_vnic_info *vnic, *vnic0;
+	int ret;
+
+	if (filter_op == RTE_ETH_FILTER_NOP)
+		return 0;
+
+	if (arg == NULL) {
+		RTE_LOG(ERR, PMD, "arg shouldn't be NULL for operation %u.",
+			    filter_op);
+		return -EINVAL;
+	}
+
+	vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+	vnic = STAILQ_FIRST(&bp->ff_pool[efilter->queue]);
+
+	switch (filter_op) {
+	case RTE_ETH_FILTER_ADD:
+		bnxt_match_and_validate_ether_filter(bp, efilter,
+							vnic0, vnic, &ret);
+		if (ret < 0)
+			return ret;
+
+		bfilter = bnxt_get_unused_filter(bp);
+		if (bfilter == NULL) {
+			RTE_LOG(ERR, PMD,
+				"Not enough resources for a new filter.\n");
+			return -ENOMEM;
+		}
+		bfilter->filter_type = HWRM_CFA_NTUPLE_FILTER;
+		memcpy(bfilter->l2_addr, efilter->mac_addr.addr_bytes,
+		       ETHER_ADDR_LEN);
+		memcpy(bfilter->dst_macaddr, efilter->mac_addr.addr_bytes,
+		       ETHER_ADDR_LEN);
+		bfilter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_MACADDR;
+		bfilter->ethertype = efilter->ether_type;
+		bfilter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE;
+
+		filter1 = bnxt_get_l2_filter(bp, bfilter, vnic0);
+		if (filter1 == NULL) {
+			ret = -1;
+			goto cleanup;
+		}
+		bfilter->enables |=
+			HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_L2_FILTER_ID;
+		bfilter->fw_l2_filter_id = filter1->fw_l2_filter_id;
+
+		bfilter->dst_id = vnic->fw_vnic_id;
+
+		if (efilter->flags & RTE_ETHTYPE_FLAGS_DROP) {
+			bfilter->flags =
+				HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_DROP;
+		}
+
+		ret = bnxt_hwrm_set_ntuple_filter(bp, bfilter->dst_id, bfilter);
+		if (ret)
+			goto cleanup;
+		STAILQ_INSERT_TAIL(&vnic->filter, bfilter, next);
+		break;
+	case RTE_ETH_FILTER_DELETE:
+		filter1 = bnxt_match_and_validate_ether_filter(bp, efilter,
+							vnic0, vnic, &ret);
+		if (ret == -EEXIST) {
+			ret = bnxt_hwrm_clear_ntuple_filter(bp, filter1);
+
+			STAILQ_REMOVE(&vnic->filter, filter1, bnxt_filter_info,
+				      next);
+			bnxt_free_filter(bp, filter1);
+		} else if (ret == 0) {
+			RTE_LOG(ERR, PMD, "No matching filter found\n");
+		}
+		break;
+	default:
+		RTE_LOG(ERR, PMD, "unsupported operation %u.", filter_op);
+		ret = -EINVAL;
+		goto error;
+	}
+	return ret;
+cleanup:
+	bnxt_free_filter(bp, bfilter);
+error:
+	return ret;
+}
+
+static inline int
+parse_ntuple_filter(struct bnxt *bp,
+		    struct rte_eth_ntuple_filter *nfilter,
+		    struct bnxt_filter_info *bfilter)
+{
+	uint32_t en = 0;
+
+	if (nfilter->queue >= bp->rx_nr_rings) {
+		RTE_LOG(ERR, PMD, "Invalid queue %d\n", nfilter->queue);
+		return -EINVAL;
+	}
+
+	switch (nfilter->dst_port_mask) {
+	case UINT16_MAX:
+		bfilter->dst_port_mask = -1;
+		bfilter->dst_port = nfilter->dst_port;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT |
+			NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT_MASK;
+		break;
+	default:
+		RTE_LOG(ERR, PMD, "invalid dst_port mask.");
+		return -EINVAL;
+	}
+
+	bfilter->ip_addr_type = NTUPLE_FLTR_ALLOC_INPUT_IP_ADDR_TYPE_IPV4;
+	en |= NTUPLE_FLTR_ALLOC_IN_EN_IP_PROTO;
+
+	switch (nfilter->proto_mask) {
+	case UINT8_MAX:
+		if (nfilter->proto == 17) /* IPPROTO_UDP */
+			bfilter->ip_protocol = 17;
+		else if (nfilter->proto == 6) /* IPPROTO_TCP */
+			bfilter->ip_protocol = 6;
+		else
+			return -EINVAL;
+		en |= NTUPLE_FLTR_ALLOC_IN_EN_IP_PROTO;
+		break;
+	default:
+		RTE_LOG(ERR, PMD, "invalid protocol mask.");
+		return -EINVAL;
+	}
+
+	switch (nfilter->dst_ip_mask) {
+	case UINT32_MAX:
+		bfilter->dst_ipaddr_mask[0] = -1;
+		bfilter->dst_ipaddr[0] = nfilter->dst_ip;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR |
+			NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR_MASK;
+		break;
+	default:
+		RTE_LOG(ERR, PMD, "invalid dst_ip mask.");
+		return -EINVAL;
+	}
+
+	switch (nfilter->src_ip_mask) {
+	case UINT32_MAX:
+		bfilter->src_ipaddr_mask[0] = -1;
+		bfilter->src_ipaddr[0] = nfilter->src_ip;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR |
+			NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR_MASK;
+		break;
+	default:
+		RTE_LOG(ERR, PMD, "invalid src_ip mask.");
+		return -EINVAL;
+	}
+
+	switch (nfilter->src_port_mask) {
+	case UINT16_MAX:
+		bfilter->src_port_mask = -1;
+		bfilter->src_port = nfilter->src_port;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT |
+			NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT_MASK;
+		break;
+	default:
+		RTE_LOG(ERR, PMD, "invalid src_port mask.");
+		return -EINVAL;
+	}
+
+	//TODO Priority
+	//nfilter->priority = (uint8_t)filter->priority;
+
+	bfilter->enables = en;
+	return 0;
+}
+
+static struct bnxt_filter_info*
+bnxt_match_ntuple_filter(struct bnxt_vnic_info *vnic,
+			 struct bnxt_filter_info *bfilter)
+{
+	struct bnxt_filter_info *mfilter = NULL;
+
+	STAILQ_FOREACH(mfilter, &vnic->filter, next) {
+		if (bfilter->src_ipaddr[0] == mfilter->src_ipaddr[0] &&
+		    bfilter->src_ipaddr_mask[0] ==
+		    mfilter->src_ipaddr_mask[0] &&
+		    bfilter->src_port == mfilter->src_port &&
+		    bfilter->src_port_mask == mfilter->src_port_mask &&
+		    bfilter->dst_ipaddr[0] == mfilter->dst_ipaddr[0] &&
+		    bfilter->dst_ipaddr_mask[0] ==
+		    mfilter->dst_ipaddr_mask[0] &&
+		    bfilter->dst_port == mfilter->dst_port &&
+		    bfilter->dst_port_mask == mfilter->dst_port_mask &&
+		    bfilter->flags == mfilter->flags &&
+		    bfilter->enables == mfilter->enables)
+			return mfilter;
+	}
+	return NULL;
+}
+
+static int
+bnxt_cfg_ntuple_filter(struct bnxt *bp,
+		       struct rte_eth_ntuple_filter *nfilter,
+		       enum rte_filter_op filter_op)
+{
+	struct bnxt_filter_info *bfilter, *mfilter, *filter1;
+	struct bnxt_vnic_info *vnic, *vnic0;
+	int ret;
+
+	if (nfilter->flags != RTE_5TUPLE_FLAGS) {
+		RTE_LOG(ERR, PMD, "only 5tuple is supported.");
+		return -EINVAL;
+	}
+
+	if (nfilter->flags & RTE_NTUPLE_FLAGS_TCP_FLAG) {
+		RTE_LOG(ERR, PMD, "Ntuple filter: TCP flags not supported\n");
+		return -EINVAL;
+	}
+
+	bfilter = bnxt_get_unused_filter(bp);
+	if (bfilter == NULL) {
+		RTE_LOG(ERR, PMD,
+			"Not enough resources for a new filter.\n");
+		return -ENOMEM;
+	}
+	ret = parse_ntuple_filter(bp, nfilter, bfilter);
+	if (ret < 0)
+		goto free_filter;
+
+	vnic = STAILQ_FIRST(&bp->ff_pool[nfilter->queue]);
+	vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+	filter1 = STAILQ_FIRST(&vnic0->filter);
+	if (filter1 == NULL) {
+		ret = -1;
+		goto free_filter;
+	}
+
+	bfilter->dst_id = vnic->fw_vnic_id;
+	bfilter->fw_l2_filter_id = filter1->fw_l2_filter_id;
+	bfilter->enables |=
+		HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_L2_FILTER_ID;
+	bfilter->ethertype = 0x800;
+	bfilter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE;
+
+	mfilter = bnxt_match_ntuple_filter(vnic, bfilter);
+
+	if (mfilter != NULL && filter_op == RTE_ETH_FILTER_ADD) {
+		RTE_LOG(ERR, PMD, "filter exists.");
+		ret = -EEXIST;
+		goto free_filter;
+	}
+	if (mfilter == NULL && filter_op == RTE_ETH_FILTER_DELETE) {
+		RTE_LOG(ERR, PMD, "filter doesn't exist.");
+		ret = -ENOENT;
+		goto free_filter;
+	}
+
+	if (filter_op == RTE_ETH_FILTER_ADD) {
+		bfilter->filter_type = HWRM_CFA_NTUPLE_FILTER;
+		ret = bnxt_hwrm_set_ntuple_filter(bp, bfilter->dst_id, bfilter);
+		if (ret)
+			goto free_filter;
+		STAILQ_INSERT_TAIL(&vnic->filter, bfilter, next);
+	} else {
+		if (mfilter == NULL) {
+			/* This should not happen. But for Coverity! */
+			ret = -ENOENT;
+			goto free_filter;
+		}
+		ret = bnxt_hwrm_clear_ntuple_filter(bp, mfilter);
+
+		STAILQ_REMOVE(&vnic->filter, mfilter, bnxt_filter_info,
+			      next);
+		bnxt_free_filter(bp, mfilter);
+		bfilter->fw_l2_filter_id = -1;
+		bnxt_free_filter(bp, bfilter);
+	}
+
+	return 0;
+free_filter:
+	bfilter->fw_l2_filter_id = -1;
+	bnxt_free_filter(bp, bfilter);
+	return ret;
+}
+
+static int
+bnxt_ntuple_filter(struct rte_eth_dev *dev,
+			enum rte_filter_op filter_op,
+			void *arg)
+{
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	int ret;
+
+	if (filter_op == RTE_ETH_FILTER_NOP)
+		return 0;
+
+	if (arg == NULL) {
+		RTE_LOG(ERR, PMD, "arg shouldn't be NULL for operation %u.",
+			    filter_op);
+		return -EINVAL;
+	}
+
+	switch (filter_op) {
+	case RTE_ETH_FILTER_ADD:
+		ret = bnxt_cfg_ntuple_filter(bp,
+			(struct rte_eth_ntuple_filter *)arg,
+			filter_op);
+		break;
+	case RTE_ETH_FILTER_DELETE:
+		ret = bnxt_cfg_ntuple_filter(bp,
+			(struct rte_eth_ntuple_filter *)arg,
+			filter_op);
+		break;
+	default:
+		RTE_LOG(ERR, PMD, "unsupported operation %u.", filter_op);
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static int
+bnxt_parse_fdir_filter(struct bnxt *bp,
+		       struct rte_eth_fdir_filter *fdir,
+		       struct bnxt_filter_info *filter)
+{
+	enum rte_fdir_mode fdir_mode =
+		bp->eth_dev->data->dev_conf.fdir_conf.mode;
+	struct bnxt_vnic_info *vnic0, *vnic;
+	struct bnxt_filter_info *filter1;
+	uint32_t en = 0;
+	int i;
+
+	if (fdir_mode == RTE_FDIR_MODE_PERFECT_TUNNEL)
+		return -EINVAL;
+
+	filter->l2_ovlan = fdir->input.flow_ext.vlan_tci;
+	en |= EM_FLOW_ALLOC_INPUT_EN_OVLAN_VID;
+
+	switch (fdir->input.flow_type) {
+	case RTE_ETH_FLOW_IPV4:
+	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
+		/* FALLTHROUGH */
+		filter->src_ipaddr[0] = fdir->input.flow.ip4_flow.src_ip;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR;
+		filter->dst_ipaddr[0] = fdir->input.flow.ip4_flow.dst_ip;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR;
+		filter->ip_protocol = fdir->input.flow.ip4_flow.proto;
+		en |= NTUPLE_FLTR_ALLOC_IN_EN_IP_PROTO;
+		filter->ip_addr_type =
+			NTUPLE_FLTR_ALLOC_INPUT_IP_ADDR_TYPE_IPV4;
+		filter->src_ipaddr_mask[0] = 0xffffffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR_MASK;
+		filter->dst_ipaddr_mask[0] = 0xffffffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR_MASK;
+		filter->ethertype = 0x800;
+		filter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE;
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
+		filter->src_port = fdir->input.flow.tcp4_flow.src_port;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT;
+		filter->dst_port = fdir->input.flow.tcp4_flow.dst_port;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT;
+		filter->dst_port_mask = 0xffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT_MASK;
+		filter->src_port_mask = 0xffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT_MASK;
+		filter->src_ipaddr[0] = fdir->input.flow.tcp4_flow.ip.src_ip;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR;
+		filter->dst_ipaddr[0] = fdir->input.flow.tcp4_flow.ip.dst_ip;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR;
+		filter->ip_protocol = 6;
+		en |= NTUPLE_FLTR_ALLOC_IN_EN_IP_PROTO;
+		filter->ip_addr_type =
+			NTUPLE_FLTR_ALLOC_INPUT_IP_ADDR_TYPE_IPV4;
+		filter->src_ipaddr_mask[0] = 0xffffffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR_MASK;
+		filter->dst_ipaddr_mask[0] = 0xffffffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR_MASK;
+		filter->ethertype = 0x800;
+		filter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE;
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
+		filter->src_port = fdir->input.flow.udp4_flow.src_port;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT;
+		filter->dst_port = fdir->input.flow.udp4_flow.dst_port;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT;
+		filter->dst_port_mask = 0xffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT_MASK;
+		filter->src_port_mask = 0xffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT_MASK;
+		filter->src_ipaddr[0] = fdir->input.flow.udp4_flow.ip.src_ip;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR;
+		filter->dst_ipaddr[0] = fdir->input.flow.udp4_flow.ip.dst_ip;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR;
+		filter->ip_protocol = 17;
+		en |= NTUPLE_FLTR_ALLOC_IN_EN_IP_PROTO;
+		filter->ip_addr_type =
+			NTUPLE_FLTR_ALLOC_INPUT_IP_ADDR_TYPE_IPV4;
+		filter->src_ipaddr_mask[0] = 0xffffffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR_MASK;
+		filter->dst_ipaddr_mask[0] = 0xffffffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR_MASK;
+		filter->ethertype = 0x800;
+		filter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE;
+		break;
+	case RTE_ETH_FLOW_IPV6:
+	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
+		/* FALLTHROUGH */
+		filter->ip_addr_type =
+			NTUPLE_FLTR_ALLOC_INPUT_IP_ADDR_TYPE_IPV6;
+		filter->ip_protocol = fdir->input.flow.ipv6_flow.proto;
+		en |= NTUPLE_FLTR_ALLOC_IN_EN_IP_PROTO;
+		rte_memcpy(filter->src_ipaddr,
+			   fdir->input.flow.ipv6_flow.src_ip, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR;
+		rte_memcpy(filter->dst_ipaddr,
+			   fdir->input.flow.ipv6_flow.dst_ip, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR;
+		memset(filter->dst_ipaddr_mask, 0xff, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR_MASK;
+		memset(filter->src_ipaddr_mask, 0xff, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR_MASK;
+		filter->ethertype = 0x86dd;
+		filter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE;
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
+		filter->src_port = fdir->input.flow.tcp6_flow.src_port;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT;
+		filter->dst_port = fdir->input.flow.tcp6_flow.dst_port;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT;
+		filter->dst_port_mask = 0xffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT_MASK;
+		filter->src_port_mask = 0xffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT_MASK;
+		filter->ip_addr_type =
+			NTUPLE_FLTR_ALLOC_INPUT_IP_ADDR_TYPE_IPV6;
+		filter->ip_protocol = fdir->input.flow.tcp6_flow.ip.proto;
+		en |= NTUPLE_FLTR_ALLOC_IN_EN_IP_PROTO;
+		rte_memcpy(filter->src_ipaddr,
+			   fdir->input.flow.tcp6_flow.ip.src_ip, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR;
+		rte_memcpy(filter->dst_ipaddr,
+			   fdir->input.flow.tcp6_flow.ip.dst_ip, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR;
+		memset(filter->dst_ipaddr_mask, 0xff, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR_MASK;
+		memset(filter->src_ipaddr_mask, 0xff, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR_MASK;
+		filter->ethertype = 0x86dd;
+		filter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE;
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
+		filter->src_port = fdir->input.flow.udp6_flow.src_port;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT;
+		filter->dst_port = fdir->input.flow.udp6_flow.dst_port;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT;
+		filter->dst_port_mask = 0xffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT_MASK;
+		filter->src_port_mask = 0xffff;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT_MASK;
+		filter->ip_addr_type =
+			NTUPLE_FLTR_ALLOC_INPUT_IP_ADDR_TYPE_IPV6;
+		filter->ip_protocol = fdir->input.flow.udp6_flow.ip.proto;
+		en |= NTUPLE_FLTR_ALLOC_IN_EN_IP_PROTO;
+		rte_memcpy(filter->src_ipaddr,
+			   fdir->input.flow.udp6_flow.ip.src_ip, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR;
+		rte_memcpy(filter->dst_ipaddr,
+			   fdir->input.flow.udp6_flow.ip.dst_ip, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR;
+		memset(filter->dst_ipaddr_mask, 0xff, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR_MASK;
+		memset(filter->src_ipaddr_mask, 0xff, 16);
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR_MASK;
+		filter->ethertype = 0x86dd;
+		filter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE;
+		break;
+	case RTE_ETH_FLOW_L2_PAYLOAD:
+		filter->ethertype = fdir->input.flow.l2_flow.ether_type;
+		en |= NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE;
+		break;
+	case RTE_ETH_FLOW_VXLAN:
+		if (fdir->action.behavior == RTE_ETH_FDIR_REJECT)
+			return -EINVAL;
+		filter->vni = fdir->input.flow.tunnel_flow.tunnel_id;
+		filter->tunnel_type =
+			CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
+		en |= HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE;
+		break;
+	case RTE_ETH_FLOW_NVGRE:
+		if (fdir->action.behavior == RTE_ETH_FDIR_REJECT)
+			return -EINVAL;
+		filter->vni = fdir->input.flow.tunnel_flow.tunnel_id;
+		filter->tunnel_type =
+			CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE;
+		en |= HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE;
+		break;
+	case RTE_ETH_FLOW_UNKNOWN:
+	case RTE_ETH_FLOW_RAW:
+	case RTE_ETH_FLOW_FRAG_IPV4:
+	case RTE_ETH_FLOW_NONFRAG_IPV4_SCTP:
+	case RTE_ETH_FLOW_FRAG_IPV6:
+	case RTE_ETH_FLOW_NONFRAG_IPV6_SCTP:
+	case RTE_ETH_FLOW_IPV6_EX:
+	case RTE_ETH_FLOW_IPV6_TCP_EX:
+	case RTE_ETH_FLOW_IPV6_UDP_EX:
+	case RTE_ETH_FLOW_GENEVE:
+		/* FALLTHROUGH */
+	default:
+		return -EINVAL;
+	}
+
+	vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+	vnic = STAILQ_FIRST(&bp->ff_pool[fdir->action.rx_queue]);
+	if (vnic == NULL) {
+		RTE_LOG(ERR, PMD, "Invalid queue %d\n", fdir->action.rx_queue);
+		return -EINVAL;
+	}
+
+
+	if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
+		rte_memcpy(filter->dst_macaddr,
+			fdir->input.flow.mac_vlan_flow.mac_addr.addr_bytes, 6);
+			en |= NTUPLE_FLTR_ALLOC_INPUT_EN_DST_MACADDR;
+	}
+
+	if (fdir->action.behavior == RTE_ETH_FDIR_REJECT) {
+		filter->flags = HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_DROP;
+		filter1 = STAILQ_FIRST(&vnic0->filter);
+		//filter1 = bnxt_get_l2_filter(bp, filter, vnic0);
+	} else {
+		filter->dst_id = vnic->fw_vnic_id;
+		for (i = 0; i < ETHER_ADDR_LEN; i++)
+			if (filter->dst_macaddr[i] == 0x00)
+				filter1 = STAILQ_FIRST(&vnic0->filter);
+			else
+				filter1 = bnxt_get_l2_filter(bp, filter, vnic);
+	}
+
+	if (filter1 == NULL)
+		return -EINVAL;
+
+	en |= HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_L2_FILTER_ID;
+	filter->fw_l2_filter_id = filter1->fw_l2_filter_id;
+
+	filter->enables = en;
+
+	return 0;
+}
+
+static struct bnxt_filter_info *
+bnxt_match_fdir(struct bnxt *bp, struct bnxt_filter_info *nf)
+{
+	struct bnxt_filter_info *mf = NULL;
+	int i;
+
+	for (i = bp->nr_vnics - 1; i >= 0; i--) {
+		struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
+
+		STAILQ_FOREACH(mf, &vnic->filter, next) {
+			if (mf->filter_type == nf->filter_type &&
+			    mf->flags == nf->flags &&
+			    mf->src_port == nf->src_port &&
+			    mf->src_port_mask == nf->src_port_mask &&
+			    mf->dst_port == nf->dst_port &&
+			    mf->dst_port_mask == nf->dst_port_mask &&
+			    mf->ip_protocol == nf->ip_protocol &&
+			    mf->ip_addr_type == nf->ip_addr_type &&
+			    mf->ethertype == nf->ethertype &&
+			    mf->vni == nf->vni &&
+			    mf->tunnel_type == nf->tunnel_type &&
+			    mf->l2_ovlan == nf->l2_ovlan &&
+			    mf->l2_ovlan_mask == nf->l2_ovlan_mask &&
+			    mf->l2_ivlan == nf->l2_ivlan &&
+			    mf->l2_ivlan_mask == nf->l2_ivlan_mask &&
+			    !memcmp(mf->l2_addr, nf->l2_addr, ETHER_ADDR_LEN) &&
+			    !memcmp(mf->l2_addr_mask, nf->l2_addr_mask,
+				    ETHER_ADDR_LEN) &&
+			    !memcmp(mf->src_macaddr, nf->src_macaddr,
+				    ETHER_ADDR_LEN) &&
+			    !memcmp(mf->dst_macaddr, nf->dst_macaddr,
+				    ETHER_ADDR_LEN) &&
+			    !memcmp(mf->src_ipaddr, nf->src_ipaddr,
+				    sizeof(nf->src_ipaddr)) &&
+			    !memcmp(mf->src_ipaddr_mask, nf->src_ipaddr_mask,
+				    sizeof(nf->src_ipaddr_mask)) &&
+			    !memcmp(mf->dst_ipaddr, nf->dst_ipaddr,
+				    sizeof(nf->dst_ipaddr)) &&
+			    !memcmp(mf->dst_ipaddr_mask, nf->dst_ipaddr_mask,
+				    sizeof(nf->dst_ipaddr_mask)))
+				return mf;
+		}
+	}
+	return NULL;
+}
+
+static int
+bnxt_fdir_filter(struct rte_eth_dev *dev,
+		 enum rte_filter_op filter_op,
+		 void *arg)
+{
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	struct rte_eth_fdir_filter *fdir  = (struct rte_eth_fdir_filter *)arg;
+	struct bnxt_filter_info *filter, *match;
+	struct bnxt_vnic_info *vnic;
+	int ret = 0, i;
+
+	if (filter_op == RTE_ETH_FILTER_NOP)
+		return 0;
+
+	if (arg == NULL && filter_op != RTE_ETH_FILTER_FLUSH)
+		return -EINVAL;
+
+	switch (filter_op) {
+	case RTE_ETH_FILTER_ADD:
+	case RTE_ETH_FILTER_DELETE:
+		/* FALLTHROUGH */
+		filter = bnxt_get_unused_filter(bp);
+		if (filter == NULL) {
+			RTE_LOG(ERR, PMD,
+				"Not enough resources for a new flow.\n");
+			return -ENOMEM;
+		}
+
+		ret = bnxt_parse_fdir_filter(bp, fdir, filter);
+		if (ret != 0)
+			goto free_filter;
+		filter->filter_type = HWRM_CFA_NTUPLE_FILTER;
+
+		match = bnxt_match_fdir(bp, filter);
+		if (match != NULL && filter_op == RTE_ETH_FILTER_ADD) {
+			RTE_LOG(ERR, PMD, "Flow already exists.\n");
+			ret = -EEXIST;
+			goto free_filter;
+		}
+		if (match == NULL && filter_op == RTE_ETH_FILTER_DELETE) {
+			RTE_LOG(ERR, PMD, "Flow does not exist.\n");
+			ret = -ENOENT;
+			goto free_filter;
+		}
+
+		if (fdir->action.behavior == RTE_ETH_FDIR_REJECT)
+			vnic = STAILQ_FIRST(&bp->ff_pool[0]);
+		else
+			vnic =
+			STAILQ_FIRST(&bp->ff_pool[fdir->action.rx_queue]);
+
+		if (filter_op == RTE_ETH_FILTER_ADD) {
+			ret = bnxt_hwrm_set_ntuple_filter(bp,
+							  filter->dst_id,
+							  filter);
+			if (ret)
+				goto free_filter;
+			STAILQ_INSERT_TAIL(&vnic->filter, filter, next);
+		} else {
+			ret = bnxt_hwrm_clear_ntuple_filter(bp, match);
+			STAILQ_REMOVE(&vnic->filter, match,
+				      bnxt_filter_info, next);
+			bnxt_free_filter(bp, match);
+			filter->fw_l2_filter_id = -1;
+			bnxt_free_filter(bp, filter);
+		}
+		break;
+	case RTE_ETH_FILTER_FLUSH:
+		for (i = bp->nr_vnics - 1; i >= 0; i--) {
+			struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
+
+			STAILQ_FOREACH(filter, &vnic->filter, next) {
+				if (filter->filter_type ==
+				    HWRM_CFA_NTUPLE_FILTER) {
+					ret =
+					bnxt_hwrm_clear_ntuple_filter(bp,
+								      filter);
+					STAILQ_REMOVE(&vnic->filter, filter,
+						      bnxt_filter_info, next);
+				}
+			}
+		}
+		return ret;
+	case RTE_ETH_FILTER_UPDATE:
+	case RTE_ETH_FILTER_STATS:
+	case RTE_ETH_FILTER_INFO:
+		/* FALLTHROUGH */
+		RTE_LOG(ERR, PMD, "operation %u not implemented", filter_op);
+		break;
+	default:
+		RTE_LOG(ERR, PMD, "unknown operation %u", filter_op);
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+
+free_filter:
+	filter->fw_l2_filter_id = -1;
+	bnxt_free_filter(bp, filter);
+	return ret;
+}
+
+static int
+bnxt_filter_ctrl_op(struct rte_eth_dev *dev __rte_unused,
+		    enum rte_filter_type filter_type,
+		    enum rte_filter_op filter_op, void *arg)
+{
+	int ret = 0;
+
+	switch (filter_type) {
+	case RTE_ETH_FILTER_TUNNEL:
+		RTE_LOG(ERR, PMD,
+			"filter type: %d: To be implemented\n", filter_type);
+		break;
+	case RTE_ETH_FILTER_FDIR:
+		ret = bnxt_fdir_filter(dev, filter_op, arg);
+		break;
+	case RTE_ETH_FILTER_NTUPLE:
+		ret = bnxt_ntuple_filter(dev, filter_op, arg);
+		break;
+	case RTE_ETH_FILTER_ETHERTYPE:
+		ret = bnxt_ethertype_filter(dev, filter_op, arg);
+		break;
+	case RTE_ETH_FILTER_GENERIC:
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &bnxt_flow_ops;
+		break;
+	default:
+		RTE_LOG(ERR, PMD,
+			"Filter type (%d) not supported", filter_type);
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static const uint32_t *
+bnxt_dev_supported_ptypes_get_op(struct rte_eth_dev *dev)
+{
+	static const uint32_t ptypes[] = {
+		RTE_PTYPE_L2_ETHER_VLAN,
+		RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
+		RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
+		RTE_PTYPE_L4_ICMP,
+		RTE_PTYPE_L4_TCP,
+		RTE_PTYPE_L4_UDP,
+		RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
+		RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
+		RTE_PTYPE_INNER_L4_ICMP,
+		RTE_PTYPE_INNER_L4_TCP,
+		RTE_PTYPE_INNER_L4_UDP,
+		RTE_PTYPE_UNKNOWN
+	};
+
+	if (dev->rx_pkt_burst == bnxt_recv_pkts)
+		return ptypes;
+	return NULL;
+}
+
+
+
+static int
+bnxt_get_eeprom_length_op(struct rte_eth_dev *dev)
+{
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	int rc;
+	uint32_t dir_entries;
+	uint32_t entry_length;
+
+	RTE_LOG(INFO, PMD, "%s(): %04x:%02x:%02x:%02x\n",
+		__func__, bp->pdev->addr.domain, bp->pdev->addr.bus,
+		bp->pdev->addr.devid, bp->pdev->addr.function);
+
+	rc = bnxt_hwrm_nvm_get_dir_info(bp, &dir_entries, &entry_length);
+	if (rc != 0)
+		return rc;
+
+	return dir_entries * entry_length;
+}
+
+static int
+bnxt_get_eeprom_op(struct rte_eth_dev *dev,
+		struct rte_dev_eeprom_info *in_eeprom)
+{
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	uint32_t index;
+	uint32_t offset;
+
+	RTE_LOG(INFO, PMD, "%s(): %04x:%02x:%02x:%02x in_eeprom->offset = %d "
+		"len = %d\n", __func__, bp->pdev->addr.domain,
+		bp->pdev->addr.bus, bp->pdev->addr.devid,
+		bp->pdev->addr.function, in_eeprom->offset, in_eeprom->length);
+
+	if (in_eeprom->offset == 0) /* special offset value to get directory */
+		return bnxt_get_nvram_directory(bp, in_eeprom->length,
+						in_eeprom->data);
+
+	index = in_eeprom->offset >> 24;
+	offset = in_eeprom->offset & 0xffffff;
+
+	if (index != 0)
+		return bnxt_hwrm_get_nvram_item(bp, index - 1, offset,
+					   in_eeprom->length, in_eeprom->data);
+
+	return 0;
+}
+
+static bool bnxt_dir_type_is_ape_bin_format(uint16_t dir_type)
+{
+	switch (dir_type) {
+	case BNX_DIR_TYPE_CHIMP_PATCH:
+	case BNX_DIR_TYPE_BOOTCODE:
+	case BNX_DIR_TYPE_BOOTCODE_2:
+	case BNX_DIR_TYPE_APE_FW:
+	case BNX_DIR_TYPE_APE_PATCH:
+	case BNX_DIR_TYPE_KONG_FW:
+	case BNX_DIR_TYPE_KONG_PATCH:
+	case BNX_DIR_TYPE_BONO_FW:
+	case BNX_DIR_TYPE_BONO_PATCH:
+		return true;
+	}
+
+	return false;
+}
+
+static bool bnxt_dir_type_is_other_exec_format(uint16_t dir_type)
+{
+	switch (dir_type) {
+	case BNX_DIR_TYPE_AVS:
+	case BNX_DIR_TYPE_EXP_ROM_MBA:
+	case BNX_DIR_TYPE_PCIE:
+	case BNX_DIR_TYPE_TSCF_UCODE:
+	case BNX_DIR_TYPE_EXT_PHY:
+	case BNX_DIR_TYPE_CCM:
+	case BNX_DIR_TYPE_ISCSI_BOOT:
+	case BNX_DIR_TYPE_ISCSI_BOOT_IPV6:
+	case BNX_DIR_TYPE_ISCSI_BOOT_IPV4N6:
+		return true;
+	}
+
+	return false;
+}
+
+static bool bnxt_dir_type_is_executable(uint16_t dir_type)
+{
+	return bnxt_dir_type_is_ape_bin_format(dir_type) ||
+		bnxt_dir_type_is_other_exec_format(dir_type);
+}
+
+static int
+bnxt_set_eeprom_op(struct rte_eth_dev *dev,
+		struct rte_dev_eeprom_info *in_eeprom)
+{
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	uint8_t index, dir_op;
+	uint16_t type, ext, ordinal, attr;
+
+	RTE_LOG(INFO, PMD, "%s(): %04x:%02x:%02x:%02x in_eeprom->offset = %d "
+		"len = %d\n", __func__, bp->pdev->addr.domain,
+		bp->pdev->addr.bus, bp->pdev->addr.devid,
+		bp->pdev->addr.function, in_eeprom->offset, in_eeprom->length);
+
+	if (!BNXT_PF(bp)) {
+		RTE_LOG(ERR, PMD, "NVM write not supported from a VF\n");
+		return -EINVAL;
+	}
+
+	type = in_eeprom->magic >> 16;
+
+	if (type == 0xffff) { /* special value for directory operations */
+		index = in_eeprom->magic & 0xff;
+		dir_op = in_eeprom->magic >> 8;
+		if (index == 0)
+			return -EINVAL;
+		switch (dir_op) {
+		case 0x0e: /* erase */
+			if (in_eeprom->offset != ~in_eeprom->magic)
+				return -EINVAL;
+			return bnxt_hwrm_erase_nvram_directory(bp, index - 1);
+		default:
+			return -EINVAL;
+		}
+	}
+
+	/* Create or re-write an NVM item: */
+	if (bnxt_dir_type_is_executable(type) == true)
+		return -EOPNOTSUPP;
+	ext = in_eeprom->magic & 0xffff;
+	ordinal = in_eeprom->offset >> 16;
+	attr = in_eeprom->offset & 0xffff;
+
+	return bnxt_hwrm_flash_nvram(bp, type, ordinal, ext, attr,
+				     in_eeprom->data, in_eeprom->length);
+	return 0;
+}
+
 /*
  * Initialization
  */
@@ -1535,6 +2691,8 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.rx_queue_release = bnxt_rx_queue_release_op,
 	.tx_queue_setup = bnxt_tx_queue_setup_op,
 	.tx_queue_release = bnxt_tx_queue_release_op,
+	.rx_queue_intr_enable = bnxt_rx_queue_intr_enable_op,
+	.rx_queue_intr_disable = bnxt_rx_queue_intr_disable_op,
 	.reta_update = bnxt_reta_update_op,
 	.reta_query = bnxt_reta_query_op,
 	.rss_hash_update = bnxt_rss_hash_update_op,
@@ -1564,6 +2722,16 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.txq_info_get = bnxt_txq_info_get_op,
 	.dev_led_on = bnxt_dev_led_on_op,
 	.dev_led_off = bnxt_dev_led_off_op,
+	.xstats_get_by_id = bnxt_dev_xstats_get_by_id_op,
+	.xstats_get_names_by_id = bnxt_dev_xstats_get_names_by_id_op,
+	.rx_queue_count = bnxt_rx_queue_count_op,
+	.rx_descriptor_status = bnxt_rx_descriptor_status_op,
+	.tx_descriptor_status = bnxt_tx_descriptor_status_op,
+	.filter_ctrl = bnxt_filter_ctrl_op,
+	.dev_supported_ptypes_get = bnxt_dev_supported_ptypes_get_op,
+	.get_eeprom_length    = bnxt_get_eeprom_length_op,
+	.get_eeprom           = bnxt_get_eeprom_op,
+	.set_eeprom           = bnxt_set_eeprom_op,
 };
 
 static bool bnxt_vf_pciid(uint16_t id)
@@ -1628,7 +2796,7 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 	const struct rte_memzone *mz = NULL;
 	static int version_printed;
 	uint32_t total_alloc_len;
-	phys_addr_t mz_phys_addr;
+	rte_iova_t mz_phys_addr;
 	struct bnxt *bp;
 	int rc;
 
@@ -1636,13 +2804,15 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 		RTE_LOG(INFO, PMD, "%s\n", bnxt_version);
 
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	bp = eth_dev->data->dev_private;
 
 	rte_atomic64_init(&bp->rx_mbuf_alloc_fail);
 	bp->dev_stopped = 1;
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		goto skip_init;
+
 	if (bnxt_vf_pciid(pci_dev->id.device_id))
 		bp->flags |= BNXT_FLAG_VF;
 
@@ -1652,7 +2822,10 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 			"Board initialization failed rc: %x\n", rc);
 		goto error;
 	}
+skip_init:
 	eth_dev->dev_ops = &bnxt_dev_ops;
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
 	eth_dev->rx_pkt_burst = &bnxt_recv_pkts;
 	eth_dev->tx_pkt_burst = &bnxt_xmit_pkts;
 
@@ -1674,13 +2847,13 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 				return -ENOMEM;
 		}
 		memset(mz->addr, 0, mz->len);
-		mz_phys_addr = mz->phys_addr;
+		mz_phys_addr = mz->iova;
 		if ((unsigned long)mz->addr == mz_phys_addr) {
 			RTE_LOG(WARNING, PMD,
 				"Memzone physical address same as virtual.\n");
 			RTE_LOG(WARNING, PMD,
-				"Using rte_mem_virt2phy()\n");
-			mz_phys_addr = rte_mem_virt2phy(mz->addr);
+				"Using rte_mem_virt2iova()\n");
+			mz_phys_addr = rte_mem_virt2iova(mz->addr);
 			if (mz_phys_addr == 0) {
 				RTE_LOG(ERR, PMD,
 				"unable to map address to physical memory\n");
@@ -1709,13 +2882,13 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 				return -ENOMEM;
 		}
 		memset(mz->addr, 0, mz->len);
-		mz_phys_addr = mz->phys_addr;
+		mz_phys_addr = mz->iova;
 		if ((unsigned long)mz->addr == mz_phys_addr) {
 			RTE_LOG(WARNING, PMD,
 				"Memzone physical address same as virtual.\n");
 			RTE_LOG(WARNING, PMD,
-				"Using rte_mem_virt2phy()\n");
-			mz_phys_addr = rte_mem_virt2phy(mz->addr);
+				"Using rte_mem_virt2iova()\n");
+			mz_phys_addr = rte_mem_virt2iova(mz->addr);
 			if (mz_phys_addr == 0) {
 				RTE_LOG(ERR, PMD,
 				"unable to map address to physical memory\n");
@@ -1755,11 +2928,11 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 		goto error_free;
 	}
 	eth_dev->data->mac_addrs = rte_zmalloc("bnxt_mac_addr_tbl",
-					ETHER_ADDR_LEN * MAX_NUM_MAC_ADDR, 0);
+					ETHER_ADDR_LEN * bp->max_l2_ctx, 0);
 	if (eth_dev->data->mac_addrs == NULL) {
 		RTE_LOG(ERR, PMD,
 			"Failed to alloc %u bytes needed to store MAC addr tbl",
-			ETHER_ADDR_LEN * MAX_NUM_MAC_ADDR);
+			ETHER_ADDR_LEN * bp->max_l2_ctx);
 		rc = -ENOMEM;
 		goto error_free;
 	}
@@ -1798,6 +2971,8 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 	ALLOW_FUNC(HWRM_VNIC_RSS_COS_LB_CTX_FREE);
 	ALLOW_FUNC(HWRM_CFA_L2_FILTER_FREE);
 	ALLOW_FUNC(HWRM_STAT_CTX_FREE);
+	ALLOW_FUNC(HWRM_PORT_PHY_QCFG);
+	ALLOW_FUNC(HWRM_VNIC_TPA_CFG);
 	rc = bnxt_hwrm_func_driver_register(bp);
 	if (rc) {
 		RTE_LOG(ERR, PMD,
@@ -1877,6 +3052,9 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev) {
 	struct bnxt *bp = eth_dev->data->dev_private;
 	int rc;
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return -EPERM;
+
 	bnxt_disable_int(bp);
 	bnxt_free_int(bp);
 	bnxt_free_mem(bp);
diff --git a/drivers/net/bnxt/bnxt_filter.c b/drivers/net/bnxt/bnxt_filter.c
index e9aac271..65d30fb3 100644
--- a/drivers/net/bnxt/bnxt_filter.c
+++ b/drivers/net/bnxt/bnxt_filter.c
@@ -35,6 +35,9 @@
 
 #include <rte_log.h>
 #include <rte_malloc.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_tailq.h>
 
 #include "bnxt.h"
 #include "bnxt_filter.h"
@@ -94,6 +97,8 @@ void bnxt_init_filters(struct bnxt *bp)
 	for (i = 0; i < max_filters; i++) {
 		filter = &bp->filter_info[i];
 		filter->fw_l2_filter_id = -1;
+		filter->fw_em_filter_id = -1;
+		filter->fw_ntuple_filter_id = -1;
 		STAILQ_INSERT_TAIL(&bp->free_filter_list, filter, next);
 	}
 }
@@ -121,7 +126,7 @@ void bnxt_free_all_filters(struct bnxt *bp)
 
 	for (i = 0; i < bp->pf.max_vfs; i++) {
 		STAILQ_FOREACH(filter, &bp->pf.vf_info[i].filter, next) {
-			bnxt_hwrm_clear_filter(bp, filter);
+			bnxt_hwrm_clear_l2_filter(bp, filter);
 		}
 	}
 }
@@ -142,7 +147,7 @@ void bnxt_free_filter_mem(struct bnxt *bp)
 		if (filter->fw_l2_filter_id != ((uint64_t)-1)) {
 			RTE_LOG(ERR, PMD, "HWRM filter is not freed??\n");
 			/* Call HWRM to try to free filter again */
-			rc = bnxt_hwrm_clear_filter(bp, filter);
+			rc = bnxt_hwrm_clear_l2_filter(bp, filter);
 			if (rc)
 				RTE_LOG(ERR, PMD,
 				       "HWRM filter cannot be freed rc = %d\n",
@@ -174,3 +179,1031 @@ int bnxt_alloc_filter_mem(struct bnxt *bp)
 	bp->filter_info = filter_mem;
 	return 0;
 }
+
+struct bnxt_filter_info *bnxt_get_unused_filter(struct bnxt *bp)
+{
+	struct bnxt_filter_info *filter;
+
+	/* Find the 1st unused filter from the free_filter_list pool*/
+	filter = STAILQ_FIRST(&bp->free_filter_list);
+	if (!filter) {
+		RTE_LOG(ERR, PMD, "No more free filter resources\n");
+		return NULL;
+	}
+	STAILQ_REMOVE_HEAD(&bp->free_filter_list, next);
+
+	return filter;
+}
+
+void bnxt_free_filter(struct bnxt *bp, struct bnxt_filter_info *filter)
+{
+	STAILQ_INSERT_TAIL(&bp->free_filter_list, filter, next);
+}
+
+static int
+bnxt_flow_agrs_validate(const struct rte_flow_attr *attr,
+			const struct rte_flow_item pattern[],
+			const struct rte_flow_action actions[],
+			struct rte_flow_error *error)
+{
+	if (!pattern) {
+		rte_flow_error_set(error, EINVAL,
+			RTE_FLOW_ERROR_TYPE_ITEM_NUM,
+			NULL, "NULL pattern.");
+		return -rte_errno;
+	}
+
+	if (!actions) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ACTION_NUM,
+				   NULL, "NULL action.");
+		return -rte_errno;
+	}
+
+	if (!attr) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ATTR,
+				   NULL, "NULL attribute.");
+		return -rte_errno;
+	}
+
+	return 0;
+}
+
+static const struct rte_flow_item *
+nxt_non_void_pattern(const struct rte_flow_item *cur)
+{
+	while (1) {
+		if (cur->type != RTE_FLOW_ITEM_TYPE_VOID)
+			return cur;
+		cur++;
+	}
+}
+
+static const struct rte_flow_action *
+nxt_non_void_action(const struct rte_flow_action *cur)
+{
+	while (1) {
+		if (cur->type != RTE_FLOW_ACTION_TYPE_VOID)
+			return cur;
+		cur++;
+	}
+}
+
+static inline int check_zero_bytes(const uint8_t *bytes, int len)
+{
+	int i;
+	for (i = 0; i < len; i++)
+		if (bytes[i] != 0x00)
+			return 0;
+	return 1;
+}
+
+static int
+bnxt_filter_type_check(const struct rte_flow_item pattern[],
+		       struct rte_flow_error *error __rte_unused)
+{
+	const struct rte_flow_item *item = nxt_non_void_pattern(pattern);
+	int use_ntuple = 1;
+
+	while (item->type != RTE_FLOW_ITEM_TYPE_END) {
+		switch (item->type) {
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			use_ntuple = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			use_ntuple = 0;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+		case RTE_FLOW_ITEM_TYPE_TCP:
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			/* FALLTHROUGH */
+			/* need ntuple match, reset exact match */
+			if (!use_ntuple) {
+				RTE_LOG(ERR, PMD,
+					"VLAN flow cannot use NTUPLE filter\n");
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Cannot use VLAN with NTUPLE");
+				return -rte_errno;
+			}
+			use_ntuple |= 1;
+			break;
+		default:
+			RTE_LOG(ERR, PMD, "Unknown Flow type");
+			use_ntuple |= 1;
+		}
+		item++;
+	}
+	return use_ntuple;
+}
+
+static int
+bnxt_validate_and_parse_flow_type(struct bnxt *bp,
+				  const struct rte_flow_item pattern[],
+				  struct rte_flow_error *error,
+				  struct bnxt_filter_info *filter)
+{
+	const struct rte_flow_item *item = nxt_non_void_pattern(pattern);
+	const struct rte_flow_item_vlan *vlan_spec, *vlan_mask;
+	const struct rte_flow_item_ipv4 *ipv4_spec, *ipv4_mask;
+	const struct rte_flow_item_ipv6 *ipv6_spec, *ipv6_mask;
+	const struct rte_flow_item_tcp *tcp_spec, *tcp_mask;
+	const struct rte_flow_item_udp *udp_spec, *udp_mask;
+	const struct rte_flow_item_eth *eth_spec, *eth_mask;
+	const struct rte_flow_item_nvgre *nvgre_spec;
+	const struct rte_flow_item_nvgre *nvgre_mask;
+	const struct rte_flow_item_vxlan *vxlan_spec;
+	const struct rte_flow_item_vxlan *vxlan_mask;
+	uint8_t vni_mask[] = {0xFF, 0xFF, 0xFF};
+	uint8_t tni_mask[] = {0xFF, 0xFF, 0xFF};
+	const struct rte_flow_item_vf *vf_spec;
+	uint32_t tenant_id_be = 0;
+	bool vni_masked = 0;
+	bool tni_masked = 0;
+	uint32_t vf = 0;
+	int use_ntuple;
+	uint32_t en = 0;
+	int dflt_vnic;
+
+	use_ntuple = bnxt_filter_type_check(pattern, error);
+	RTE_LOG(DEBUG, PMD, "Use NTUPLE %d\n", use_ntuple);
+	if (use_ntuple < 0)
+		return use_ntuple;
+
+	filter->filter_type = use_ntuple ?
+		HWRM_CFA_NTUPLE_FILTER : HWRM_CFA_EM_FILTER;
+
+	while (item->type != RTE_FLOW_ITEM_TYPE_END) {
+		if (item->last) {
+			/* last or range is NOT supported as match criteria */
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "No support for range");
+			return -rte_errno;
+		}
+		if (!item->spec || !item->mask) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "spec/mask is NULL");
+			return -rte_errno;
+		}
+		switch (item->type) {
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			eth_spec = (const struct rte_flow_item_eth *)item->spec;
+			eth_mask = (const struct rte_flow_item_eth *)item->mask;
+
+			/* Source MAC address mask cannot be partially set.
+			 * Should be All 0's or all 1's.
+			 * Destination MAC address mask must not be partially
+			 * set. Should be all 1's or all 0's.
+			 */
+			if ((!is_zero_ether_addr(&eth_mask->src) &&
+			     !is_broadcast_ether_addr(&eth_mask->src)) ||
+			    (!is_zero_ether_addr(&eth_mask->dst) &&
+			     !is_broadcast_ether_addr(&eth_mask->dst))) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "MAC_addr mask not valid");
+				return -rte_errno;
+			}
+
+			/* Mask is not allowed. Only exact matches are */
+			if ((eth_mask->type & UINT16_MAX) != UINT16_MAX) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "ethertype mask not valid");
+				return -rte_errno;
+			}
+
+			if (is_broadcast_ether_addr(&eth_mask->dst)) {
+				rte_memcpy(filter->dst_macaddr,
+					   &eth_spec->dst, 6);
+				en |= use_ntuple ?
+					NTUPLE_FLTR_ALLOC_INPUT_EN_DST_MACADDR :
+					EM_FLOW_ALLOC_INPUT_EN_DST_MACADDR;
+			}
+			if (is_broadcast_ether_addr(&eth_mask->src)) {
+				rte_memcpy(filter->src_macaddr,
+					   &eth_spec->src, 6);
+				en |= use_ntuple ?
+					NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_MACADDR :
+					EM_FLOW_ALLOC_INPUT_EN_SRC_MACADDR;
+			} /*
+			   * else {
+			   *  RTE_LOG(ERR, PMD, "Handle this condition\n");
+			   * }
+			   */
+			if (eth_spec->type) {
+				filter->ethertype =
+					rte_be_to_cpu_16(eth_spec->type);
+				en |= use_ntuple ?
+					NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE :
+					EM_FLOW_ALLOC_INPUT_EN_ETHERTYPE;
+			}
+
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			vlan_spec =
+				(const struct rte_flow_item_vlan *)item->spec;
+			vlan_mask =
+				(const struct rte_flow_item_vlan *)item->mask;
+			if (vlan_mask->tci & 0xFFFF && !vlan_mask->tpid) {
+				/* Only the VLAN ID can be matched. */
+				filter->l2_ovlan =
+					rte_be_to_cpu_16(vlan_spec->tci &
+							 0xFFF);
+				en |= EM_FLOW_ALLOC_INPUT_EN_OVLAN_VID;
+			} else {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "VLAN mask is invalid");
+				return -rte_errno;
+			}
+
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			/* If mask is not involved, we could use EM filters. */
+			ipv4_spec =
+				(const struct rte_flow_item_ipv4 *)item->spec;
+			ipv4_mask =
+				(const struct rte_flow_item_ipv4 *)item->mask;
+			/* Only IP DST and SRC fields are maskable. */
+			if (ipv4_mask->hdr.version_ihl ||
+			    ipv4_mask->hdr.type_of_service ||
+			    ipv4_mask->hdr.total_length ||
+			    ipv4_mask->hdr.packet_id ||
+			    ipv4_mask->hdr.fragment_offset ||
+			    ipv4_mask->hdr.time_to_live ||
+			    ipv4_mask->hdr.next_proto_id ||
+			    ipv4_mask->hdr.hdr_checksum) {
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Invalid IPv4 mask.");
+				return -rte_errno;
+			}
+			filter->dst_ipaddr[0] = ipv4_spec->hdr.dst_addr;
+			filter->src_ipaddr[0] = ipv4_spec->hdr.src_addr;
+			if (use_ntuple)
+				en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR |
+					NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR;
+			else
+				en |= EM_FLOW_ALLOC_INPUT_EN_SRC_IPADDR |
+					EM_FLOW_ALLOC_INPUT_EN_DST_IPADDR;
+			if (ipv4_mask->hdr.src_addr) {
+				filter->src_ipaddr_mask[0] =
+					ipv4_mask->hdr.src_addr;
+				en |= !use_ntuple ? 0 :
+				     NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR_MASK;
+			}
+			if (ipv4_mask->hdr.dst_addr) {
+				filter->dst_ipaddr_mask[0] =
+					ipv4_mask->hdr.dst_addr;
+				en |= !use_ntuple ? 0 :
+				     NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR_MASK;
+			}
+			filter->ip_addr_type = use_ntuple ?
+			 HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV4 :
+			 HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV4;
+			if (ipv4_spec->hdr.next_proto_id) {
+				filter->ip_protocol =
+					ipv4_spec->hdr.next_proto_id;
+				if (use_ntuple)
+					en |= NTUPLE_FLTR_ALLOC_IN_EN_IP_PROTO;
+				else
+					en |= EM_FLOW_ALLOC_INPUT_EN_IP_PROTO;
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			ipv6_spec =
+				(const struct rte_flow_item_ipv6 *)item->spec;
+			ipv6_mask =
+				(const struct rte_flow_item_ipv6 *)item->mask;
+
+			/* Only IP DST and SRC fields are maskable. */
+			if (ipv6_mask->hdr.vtc_flow ||
+			    ipv6_mask->hdr.payload_len ||
+			    ipv6_mask->hdr.proto ||
+			    ipv6_mask->hdr.hop_limits) {
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Invalid IPv6 mask.");
+				return -rte_errno;
+			}
+
+			if (use_ntuple)
+				en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR |
+					NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR;
+			else
+				en |= EM_FLOW_ALLOC_INPUT_EN_SRC_IPADDR |
+					EM_FLOW_ALLOC_INPUT_EN_DST_IPADDR;
+			rte_memcpy(filter->src_ipaddr,
+				   ipv6_spec->hdr.src_addr, 16);
+			rte_memcpy(filter->dst_ipaddr,
+				   ipv6_spec->hdr.dst_addr, 16);
+			if (!check_zero_bytes(ipv6_mask->hdr.src_addr, 16)) {
+				rte_memcpy(filter->src_ipaddr_mask,
+					   ipv6_mask->hdr.src_addr, 16);
+				en |= !use_ntuple ? 0 :
+				    NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR_MASK;
+			}
+			if (!check_zero_bytes(ipv6_mask->hdr.dst_addr, 16)) {
+				rte_memcpy(filter->dst_ipaddr_mask,
+					   ipv6_mask->hdr.dst_addr, 16);
+				en |= !use_ntuple ? 0 :
+				     NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR_MASK;
+			}
+			filter->ip_addr_type = use_ntuple ?
+				NTUPLE_FLTR_ALLOC_INPUT_IP_ADDR_TYPE_IPV6 :
+				EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV6;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			tcp_spec = (const struct rte_flow_item_tcp *)item->spec;
+			tcp_mask = (const struct rte_flow_item_tcp *)item->mask;
+
+			/* Check TCP mask. Only DST & SRC ports are maskable */
+			if (tcp_mask->hdr.sent_seq ||
+			    tcp_mask->hdr.recv_ack ||
+			    tcp_mask->hdr.data_off ||
+			    tcp_mask->hdr.tcp_flags ||
+			    tcp_mask->hdr.rx_win ||
+			    tcp_mask->hdr.cksum ||
+			    tcp_mask->hdr.tcp_urp) {
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Invalid TCP mask");
+				return -rte_errno;
+			}
+			filter->src_port = tcp_spec->hdr.src_port;
+			filter->dst_port = tcp_spec->hdr.dst_port;
+			if (use_ntuple)
+				en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT |
+					NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT;
+			else
+				en |= EM_FLOW_ALLOC_INPUT_EN_SRC_PORT |
+					EM_FLOW_ALLOC_INPUT_EN_DST_PORT;
+			if (tcp_mask->hdr.dst_port) {
+				filter->dst_port_mask = tcp_mask->hdr.dst_port;
+				en |= !use_ntuple ? 0 :
+				  NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT_MASK;
+			}
+			if (tcp_mask->hdr.src_port) {
+				filter->src_port_mask = tcp_mask->hdr.src_port;
+				en |= !use_ntuple ? 0 :
+				  NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT_MASK;
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			udp_spec = (const struct rte_flow_item_udp *)item->spec;
+			udp_mask = (const struct rte_flow_item_udp *)item->mask;
+
+			if (udp_mask->hdr.dgram_len ||
+			    udp_mask->hdr.dgram_cksum) {
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Invalid UDP mask");
+				return -rte_errno;
+			}
+
+			filter->src_port = udp_spec->hdr.src_port;
+			filter->dst_port = udp_spec->hdr.dst_port;
+			if (use_ntuple)
+				en |= NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT |
+					NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT;
+			else
+				en |= EM_FLOW_ALLOC_INPUT_EN_SRC_PORT |
+					EM_FLOW_ALLOC_INPUT_EN_DST_PORT;
+
+			if (udp_mask->hdr.dst_port) {
+				filter->dst_port_mask = udp_mask->hdr.dst_port;
+				en |= !use_ntuple ? 0 :
+				  NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT_MASK;
+			}
+			if (udp_mask->hdr.src_port) {
+				filter->src_port_mask = udp_mask->hdr.src_port;
+				en |= !use_ntuple ? 0 :
+				  NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT_MASK;
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			vxlan_spec =
+				(const struct rte_flow_item_vxlan *)item->spec;
+			vxlan_mask =
+				(const struct rte_flow_item_vxlan *)item->mask;
+			/* Check if VXLAN item is used to describe protocol.
+			 * If yes, both spec and mask should be NULL.
+			 * If no, both spec and mask shouldn't be NULL.
+			 */
+			if ((!vxlan_spec && vxlan_mask) ||
+			    (vxlan_spec && !vxlan_mask)) {
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Invalid VXLAN item");
+				return -rte_errno;
+			}
+
+			if (vxlan_spec->rsvd1 || vxlan_spec->rsvd0[0] ||
+			    vxlan_spec->rsvd0[1] || vxlan_spec->rsvd0[2] ||
+			    vxlan_spec->flags != 0x8) {
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Invalid VXLAN item");
+				return -rte_errno;
+			}
+
+			/* Check if VNI is masked. */
+			if (vxlan_spec && vxlan_mask) {
+				vni_masked =
+					!!memcmp(vxlan_mask->vni, vni_mask,
+						 RTE_DIM(vni_mask));
+				if (vni_masked) {
+					rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid VNI mask");
+					return -rte_errno;
+				}
+
+				rte_memcpy(((uint8_t *)&tenant_id_be + 1),
+					   vxlan_spec->vni, 3);
+				filter->vni =
+					rte_be_to_cpu_32(tenant_id_be);
+				filter->tunnel_type =
+				 CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_NVGRE:
+			nvgre_spec =
+				(const struct rte_flow_item_nvgre *)item->spec;
+			nvgre_mask =
+				(const struct rte_flow_item_nvgre *)item->mask;
+			/* Check if NVGRE item is used to describe protocol.
+			 * If yes, both spec and mask should be NULL.
+			 * If no, both spec and mask shouldn't be NULL.
+			 */
+			if ((!nvgre_spec && nvgre_mask) ||
+			    (nvgre_spec && !nvgre_mask)) {
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Invalid NVGRE item");
+				return -rte_errno;
+			}
+
+			if (nvgre_spec->c_k_s_rsvd0_ver != 0x2000 ||
+			    nvgre_spec->protocol != 0x6558) {
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Invalid NVGRE item");
+				return -rte_errno;
+			}
+
+			if (nvgre_spec && nvgre_mask) {
+				tni_masked =
+					!!memcmp(nvgre_mask->tni, tni_mask,
+						 RTE_DIM(tni_mask));
+				if (tni_masked) {
+					rte_flow_error_set(error, EINVAL,
+						       RTE_FLOW_ERROR_TYPE_ITEM,
+						       item,
+						       "Invalid TNI mask");
+					return -rte_errno;
+				}
+				rte_memcpy(((uint8_t *)&tenant_id_be + 1),
+					   nvgre_spec->tni, 3);
+				filter->vni =
+					rte_be_to_cpu_32(tenant_id_be);
+				filter->tunnel_type =
+				 CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE;
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_VF:
+			vf_spec = (const struct rte_flow_item_vf *)item->spec;
+			vf = vf_spec->id;
+			if (!BNXT_PF(bp)) {
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Configuring on a VF!");
+				return -rte_errno;
+			}
+
+			if (vf >= bp->pdev->max_vfs) {
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Incorrect VF id!");
+				return -rte_errno;
+			}
+
+			filter->mirror_vnic_id =
+			dflt_vnic = bnxt_hwrm_func_qcfg_vf_dflt_vnic_id(bp, vf);
+			if (dflt_vnic < 0) {
+				/* This simply indicates there's no driver
+				 * loaded. This is not an error.
+				 */
+				rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Unable to get default VNIC for VF");
+				return -rte_errno;
+			}
+			filter->mirror_vnic_id = dflt_vnic;
+			en |= NTUPLE_FLTR_ALLOC_INPUT_EN_MIRROR_VNIC_ID;
+			break;
+		default:
+			break;
+		}
+		item++;
+	}
+	filter->enables = en;
+
+	return 0;
+}
+
+/* Parse attributes */
+static int
+bnxt_flow_parse_attr(const struct rte_flow_attr *attr,
+		     struct rte_flow_error *error)
+{
+	/* Must be input direction */
+	if (!attr->ingress) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+				   attr, "Only support ingress.");
+		return -rte_errno;
+	}
+
+	/* Not supported */
+	if (attr->egress) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+				   attr, "No support for egress.");
+		return -rte_errno;
+	}
+
+	/* Not supported */
+	if (attr->priority) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+				   attr, "No support for priority.");
+		return -rte_errno;
+	}
+
+	/* Not supported */
+	if (attr->group) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+				   attr, "No support for group.");
+		return -rte_errno;
+	}
+
+	return 0;
+}
+
+struct bnxt_filter_info *
+bnxt_get_l2_filter(struct bnxt *bp, struct bnxt_filter_info *nf,
+		   struct bnxt_vnic_info *vnic)
+{
+	struct bnxt_filter_info *filter1, *f0;
+	struct bnxt_vnic_info *vnic0;
+	int rc;
+
+	vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+	f0 = STAILQ_FIRST(&vnic0->filter);
+
+	//This flow has same DST MAC as the port/l2 filter.
+	if (memcmp(f0->l2_addr, nf->dst_macaddr, ETHER_ADDR_LEN) == 0)
+		return f0;
+
+	//This flow needs DST MAC which is not same as port/l2
+	RTE_LOG(DEBUG, PMD, "Create L2 filter for DST MAC\n");
+	filter1 = bnxt_get_unused_filter(bp);
+	if (filter1 == NULL)
+		return NULL;
+	filter1->flags = HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_RX;
+	filter1->enables = HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR |
+			L2_FILTER_ALLOC_INPUT_EN_L2_ADDR_MASK;
+	memcpy(filter1->l2_addr, nf->dst_macaddr, ETHER_ADDR_LEN);
+	memset(filter1->l2_addr_mask, 0xff, ETHER_ADDR_LEN);
+	rc = bnxt_hwrm_set_l2_filter(bp, vnic->fw_vnic_id,
+				     filter1);
+	if (rc) {
+		bnxt_free_filter(bp, filter1);
+		return NULL;
+	}
+	STAILQ_INSERT_TAIL(&vnic->filter, filter1, next);
+	return filter1;
+}
+
+static int
+bnxt_validate_and_parse_flow(struct rte_eth_dev *dev,
+			     const struct rte_flow_item pattern[],
+			     const struct rte_flow_action actions[],
+			     const struct rte_flow_attr *attr,
+			     struct rte_flow_error *error,
+			     struct bnxt_filter_info *filter)
+{
+	const struct rte_flow_action *act = nxt_non_void_action(actions);
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	const struct rte_flow_action_queue *act_q;
+	const struct rte_flow_action_vf *act_vf;
+	struct bnxt_vnic_info *vnic, *vnic0;
+	struct bnxt_filter_info *filter1;
+	uint32_t vf = 0;
+	int dflt_vnic;
+	int rc;
+
+	if (bp->eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+		RTE_LOG(ERR, PMD, "Cannot create flow on RSS queues\n");
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				   "Cannot create flow on RSS queues");
+		rc = -rte_errno;
+		goto ret;
+	}
+
+	rc = bnxt_validate_and_parse_flow_type(bp, pattern, error, filter);
+	if (rc != 0)
+		goto ret;
+
+	rc = bnxt_flow_parse_attr(attr, error);
+	if (rc != 0)
+		goto ret;
+	//Since we support ingress attribute only - right now.
+	filter->flags = HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_RX;
+
+	switch (act->type) {
+	case RTE_FLOW_ACTION_TYPE_QUEUE:
+		/* Allow this flow. Redirect to a VNIC. */
+		act_q = (const struct rte_flow_action_queue *)act->conf;
+		if (act_q->index >= bp->rx_nr_rings) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ACTION, act,
+					   "Invalid queue ID.");
+			rc = -rte_errno;
+			goto ret;
+		}
+		RTE_LOG(DEBUG, PMD, "Queue index %d\n", act_q->index);
+
+		vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+		vnic = STAILQ_FIRST(&bp->ff_pool[act_q->index]);
+		if (vnic == NULL) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ACTION, act,
+					   "No matching VNIC for queue ID.");
+			rc = -rte_errno;
+			goto ret;
+		}
+		filter->dst_id = vnic->fw_vnic_id;
+		filter1 = bnxt_get_l2_filter(bp, filter, vnic);
+		if (filter1 == NULL) {
+			rc = -ENOSPC;
+			goto ret;
+		}
+		filter->fw_l2_filter_id = filter1->fw_l2_filter_id;
+		RTE_LOG(DEBUG, PMD, "VNIC found\n");
+		break;
+	case RTE_FLOW_ACTION_TYPE_DROP:
+		vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+		filter1 = bnxt_get_l2_filter(bp, filter, vnic0);
+		if (filter1 == NULL) {
+			rc = -ENOSPC;
+			goto ret;
+		}
+		filter->fw_l2_filter_id = filter1->fw_l2_filter_id;
+		if (filter->filter_type == HWRM_CFA_EM_FILTER)
+			filter->flags =
+				HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_DROP;
+		else
+			filter->flags =
+				HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_DROP;
+		break;
+	case RTE_FLOW_ACTION_TYPE_COUNT:
+		vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+		filter1 = bnxt_get_l2_filter(bp, filter, vnic0);
+		if (filter1 == NULL) {
+			rc = -ENOSPC;
+			goto ret;
+		}
+		filter->fw_l2_filter_id = filter1->fw_l2_filter_id;
+		filter->flags = HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_METER;
+		break;
+	case RTE_FLOW_ACTION_TYPE_VF:
+		act_vf = (const struct rte_flow_action_vf *)act->conf;
+		vf = act_vf->id;
+		if (!BNXT_PF(bp)) {
+			rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ACTION,
+				   act,
+				   "Configuring on a VF!");
+			rc = -rte_errno;
+			goto ret;
+		}
+
+		if (vf >= bp->pdev->max_vfs) {
+			rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ACTION,
+				   act,
+				   "Incorrect VF id!");
+			rc = -rte_errno;
+			goto ret;
+		}
+
+		filter->mirror_vnic_id =
+		dflt_vnic = bnxt_hwrm_func_qcfg_vf_dflt_vnic_id(bp, vf);
+		if (dflt_vnic < 0) {
+			/* This simply indicates there's no driver loaded.
+			 * This is not an error.
+			 */
+			rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ACTION,
+				   act,
+				   "Unable to get default VNIC for VF");
+			rc = -rte_errno;
+			goto ret;
+		}
+		filter->mirror_vnic_id = dflt_vnic;
+		filter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_MIRROR_VNIC_ID;
+
+		vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+		filter1 = bnxt_get_l2_filter(bp, filter, vnic0);
+		if (filter1 == NULL) {
+			rc = -ENOSPC;
+			goto ret;
+		}
+		filter->fw_l2_filter_id = filter1->fw_l2_filter_id;
+		break;
+
+	default:
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ACTION, act,
+				   "Invalid action.");
+		rc = -rte_errno;
+		goto ret;
+	}
+
+//done:
+	act = nxt_non_void_action(++act);
+	if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ACTION,
+				   act, "Invalid action.");
+		rc = -rte_errno;
+		goto ret;
+	}
+ret:
+	return rc;
+}
+
+static int
+bnxt_flow_validate(struct rte_eth_dev *dev,
+		const struct rte_flow_attr *attr,
+		const struct rte_flow_item pattern[],
+		const struct rte_flow_action actions[],
+		struct rte_flow_error *error)
+{
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	struct bnxt_filter_info *filter;
+	int ret = 0;
+
+	ret = bnxt_flow_agrs_validate(attr, pattern, actions, error);
+	if (ret != 0)
+		return ret;
+
+	filter = bnxt_get_unused_filter(bp);
+	if (filter == NULL) {
+		RTE_LOG(ERR, PMD, "Not enough resources for a new flow.\n");
+		return -ENOMEM;
+	}
+
+	ret = bnxt_validate_and_parse_flow(dev, pattern, actions, attr,
+					   error, filter);
+	/* No need to hold on to this filter if we are just validating flow */
+	filter->fw_l2_filter_id = -1;
+	bnxt_free_filter(bp, filter);
+
+	return ret;
+}
+
+static int
+bnxt_match_filter(struct bnxt *bp, struct bnxt_filter_info *nf)
+{
+	struct bnxt_filter_info *mf;
+	struct rte_flow *flow;
+	int i;
+
+	for (i = bp->nr_vnics - 1; i >= 0; i--) {
+		struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
+
+		STAILQ_FOREACH(flow, &vnic->flow_list, next) {
+			mf = flow->filter;
+
+			if (mf->filter_type == nf->filter_type &&
+			    mf->flags == nf->flags &&
+			    mf->src_port == nf->src_port &&
+			    mf->src_port_mask == nf->src_port_mask &&
+			    mf->dst_port == nf->dst_port &&
+			    mf->dst_port_mask == nf->dst_port_mask &&
+			    mf->ip_protocol == nf->ip_protocol &&
+			    mf->ip_addr_type == nf->ip_addr_type &&
+			    mf->ethertype == nf->ethertype &&
+			    mf->vni == nf->vni &&
+			    mf->tunnel_type == nf->tunnel_type &&
+			    mf->l2_ovlan == nf->l2_ovlan &&
+			    mf->l2_ovlan_mask == nf->l2_ovlan_mask &&
+			    mf->l2_ivlan == nf->l2_ivlan &&
+			    mf->l2_ivlan_mask == nf->l2_ivlan_mask &&
+			    !memcmp(mf->l2_addr, nf->l2_addr, ETHER_ADDR_LEN) &&
+			    !memcmp(mf->l2_addr_mask, nf->l2_addr_mask,
+				    ETHER_ADDR_LEN) &&
+			    !memcmp(mf->src_macaddr, nf->src_macaddr,
+				    ETHER_ADDR_LEN) &&
+			    !memcmp(mf->dst_macaddr, nf->dst_macaddr,
+				    ETHER_ADDR_LEN) &&
+			    !memcmp(mf->src_ipaddr, nf->src_ipaddr,
+				    sizeof(nf->src_ipaddr)) &&
+			    !memcmp(mf->src_ipaddr_mask, nf->src_ipaddr_mask,
+				    sizeof(nf->src_ipaddr_mask)) &&
+			    !memcmp(mf->dst_ipaddr, nf->dst_ipaddr,
+				    sizeof(nf->dst_ipaddr)) &&
+			    !memcmp(mf->dst_ipaddr_mask, nf->dst_ipaddr_mask,
+				    sizeof(nf->dst_ipaddr_mask)))
+				return -EEXIST;
+		}
+	}
+	return 0;
+}
+
+static struct rte_flow *
+bnxt_flow_create(struct rte_eth_dev *dev,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item pattern[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error)
+{
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	struct bnxt_filter_info *filter;
+	struct bnxt_vnic_info *vnic = NULL;
+	struct rte_flow *flow;
+	unsigned int i;
+	int ret = 0;
+
+	flow = rte_zmalloc("bnxt_flow", sizeof(struct rte_flow), 0);
+	if (!flow) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+				   "Failed to allocate memory");
+		return flow;
+	}
+
+	ret = bnxt_flow_agrs_validate(attr, pattern, actions, error);
+	if (ret != 0) {
+		RTE_LOG(ERR, PMD, "Not a validate flow.\n");
+		goto free_flow;
+	}
+
+	filter = bnxt_get_unused_filter(bp);
+	if (filter == NULL) {
+		RTE_LOG(ERR, PMD, "Not enough resources for a new flow.\n");
+		goto free_flow;
+	}
+
+	ret = bnxt_validate_and_parse_flow(dev, pattern, actions, attr,
+					   error, filter);
+	if (ret != 0)
+		goto free_filter;
+
+	ret = bnxt_match_filter(bp, filter);
+	if (ret != 0) {
+		RTE_LOG(DEBUG, PMD, "Flow already exists.\n");
+		goto free_filter;
+	}
+
+	if (filter->filter_type == HWRM_CFA_EM_FILTER) {
+		filter->enables |=
+			HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_L2_FILTER_ID;
+		ret = bnxt_hwrm_set_em_filter(bp, filter->dst_id, filter);
+	}
+	if (filter->filter_type == HWRM_CFA_NTUPLE_FILTER) {
+		filter->enables |=
+			HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_L2_FILTER_ID;
+		ret = bnxt_hwrm_set_ntuple_filter(bp, filter->dst_id, filter);
+	}
+
+	for (i = 0; i < bp->nr_vnics; i++) {
+		vnic = &bp->vnic_info[i];
+		if (filter->dst_id == vnic->fw_vnic_id)
+			break;
+	}
+
+	if (!ret) {
+		flow->filter = filter;
+		flow->vnic = vnic;
+		RTE_LOG(ERR, PMD, "Successfully created flow.\n");
+		STAILQ_INSERT_TAIL(&vnic->flow_list, flow, next);
+		return flow;
+	}
+free_filter:
+	filter->fw_l2_filter_id = -1;
+	bnxt_free_filter(bp, filter);
+free_flow:
+	if (ret == -EEXIST)
+		rte_flow_error_set(error, ret,
+				   RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+				   "Matching Flow exists.");
+	else
+		rte_flow_error_set(error, -ret,
+				   RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+			   "Failed to create flow.");
+	rte_free(flow);
+	flow = NULL;
+	return flow;
+}
+
+static int
+bnxt_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error)
+{
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	struct bnxt_filter_info *filter = flow->filter;
+	struct bnxt_vnic_info *vnic = flow->vnic;
+	int ret = 0;
+
+	ret = bnxt_match_filter(bp, filter);
+	if (ret == 0)
+		RTE_LOG(ERR, PMD, "Could not find matching flow\n");
+	if (filter->filter_type == HWRM_CFA_EM_FILTER)
+		ret = bnxt_hwrm_clear_em_filter(bp, filter);
+	if (filter->filter_type == HWRM_CFA_NTUPLE_FILTER)
+		ret = bnxt_hwrm_clear_ntuple_filter(bp, filter);
+
+	if (!ret) {
+		STAILQ_REMOVE(&vnic->flow_list, flow, rte_flow, next);
+		rte_free(flow);
+	} else {
+		rte_flow_error_set(error, -ret,
+				   RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+				   "Failed to destroy flow.");
+	}
+
+	return ret;
+}
+
+static int
+bnxt_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
+	struct bnxt_vnic_info *vnic;
+	struct rte_flow *flow;
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < bp->nr_vnics; i++) {
+		vnic = &bp->vnic_info[i];
+		STAILQ_FOREACH(flow, &vnic->flow_list, next) {
+			struct bnxt_filter_info *filter = flow->filter;
+
+			if (filter->filter_type == HWRM_CFA_EM_FILTER)
+				ret = bnxt_hwrm_clear_em_filter(bp, filter);
+			if (filter->filter_type == HWRM_CFA_NTUPLE_FILTER)
+				ret = bnxt_hwrm_clear_ntuple_filter(bp, filter);
+
+			if (ret) {
+				rte_flow_error_set(error, -ret,
+						   RTE_FLOW_ERROR_TYPE_HANDLE,
+						   NULL,
+						   "Failed to flush flow in HW.");
+				return -rte_errno;
+			}
+
+			STAILQ_REMOVE(&vnic->flow_list, flow,
+				      rte_flow, next);
+			rte_free(flow);
+		}
+	}
+
+	return ret;
+}
+
+const struct rte_flow_ops bnxt_flow_ops = {
+	.validate = bnxt_flow_validate,
+	.create = bnxt_flow_create,
+	.destroy = bnxt_flow_destroy,
+	.flush = bnxt_flow_flush,
+};
diff --git a/drivers/net/bnxt/bnxt_filter.h b/drivers/net/bnxt/bnxt_filter.h
index 613b2eea..2591a87e 100644
--- a/drivers/net/bnxt/bnxt_filter.h
+++ b/drivers/net/bnxt/bnxt_filter.h
@@ -40,8 +40,15 @@ struct bnxt;
 struct bnxt_filter_info {
 	STAILQ_ENTRY(bnxt_filter_info)	next;
 	uint64_t		fw_l2_filter_id;
+	uint64_t		fw_em_filter_id;
+	uint64_t		fw_ntuple_filter_id;
 #define INVALID_MAC_INDEX	((uint16_t)-1)
 	uint16_t		mac_index;
+#define HWRM_CFA_L2_FILTER	0
+#define HWRM_CFA_EM_FILTER	1
+#define HWRM_CFA_NTUPLE_FILTER	2
+	uint8_t                 filter_type;    //L2 or EM or NTUPLE filter
+	uint32_t                dst_id;
 
 	/* Filter Characteristics */
 	uint32_t		flags;
@@ -65,6 +72,19 @@ struct bnxt_filter_info {
 	uint64_t		l2_filter_id_hint;
 	uint32_t		src_id;
 	uint8_t			src_type;
+	uint8_t                 src_macaddr[6];
+	uint8_t                 dst_macaddr[6];
+	uint32_t                dst_ipaddr[4];
+	uint32_t                dst_ipaddr_mask[4];
+	uint32_t                src_ipaddr[4];
+	uint32_t                src_ipaddr_mask[4];
+	uint16_t                dst_port;
+	uint16_t                dst_port_mask;
+	uint16_t                src_port;
+	uint16_t                src_port_mask;
+	uint16_t                ip_protocol;
+	uint16_t                ip_addr_type;
+	uint16_t                ethertype;
 };
 
 struct bnxt_filter_info *bnxt_alloc_filter(struct bnxt *bp);
@@ -73,5 +93,73 @@ void bnxt_init_filters(struct bnxt *bp);
 void bnxt_free_all_filters(struct bnxt *bp);
 void bnxt_free_filter_mem(struct bnxt *bp);
 int bnxt_alloc_filter_mem(struct bnxt *bp);
+struct bnxt_filter_info *bnxt_get_unused_filter(struct bnxt *bp);
+void bnxt_free_filter(struct bnxt *bp, struct bnxt_filter_info *filter);
+struct bnxt_filter_info *bnxt_get_l2_filter(struct bnxt *bp,
+		struct bnxt_filter_info *nf, struct bnxt_vnic_info *vnic);
 
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_MACADDR	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_MACADDR
+#define EM_FLOW_ALLOC_INPUT_EN_SRC_MACADDR	\
+	HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_MACADDR
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_DST_MACADDR	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_MACADDR
+#define EM_FLOW_ALLOC_INPUT_EN_DST_MACADDR	\
+	HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_MACADDR
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_ETHERTYPE   \
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_ETHERTYPE
+#define EM_FLOW_ALLOC_INPUT_EN_ETHERTYPE       \
+	HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_ETHERTYPE
+#define EM_FLOW_ALLOC_INPUT_EN_OVLAN_VID       \
+	HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_OVLAN_VID
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR  \
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_IPADDR_MASK     \
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR_MASK
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR  \
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_DST_IPADDR_MASK     \
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR_MASK
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT    \
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_SRC_PORT_MASK       \
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT_MASK
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT    \
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_DST_PORT_MASK       \
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT_MASK
+#define NTUPLE_FLTR_ALLOC_IN_EN_IP_PROTO	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_IP_PROTOCOL
+#define EM_FLOW_ALLOC_INPUT_EN_SRC_IPADDR	\
+	HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_IPADDR
+#define EM_FLOW_ALLOC_INPUT_EN_DST_IPADDR	\
+	HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_IPADDR
+#define EM_FLOW_ALLOC_INPUT_EN_SRC_PORT	\
+	HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_PORT
+#define EM_FLOW_ALLOC_INPUT_EN_DST_PORT	\
+	HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_PORT
+#define EM_FLOW_ALLOC_INPUT_EN_IP_PROTO	\
+	HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IP_PROTOCOL
+#define EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV6	\
+	HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV6
+#define NTUPLE_FLTR_ALLOC_INPUT_IP_ADDR_TYPE_IPV6	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV6
+#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN
+#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NVGRE
+#define L2_FILTER_ALLOC_INPUT_EN_L2_ADDR_MASK	\
+	HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR_MASK
+#define NTUPLE_FLTR_ALLOC_INPUT_IP_PROTOCOL_UDP	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_UDP
+#define NTUPLE_FLTR_ALLOC_INPUT_IP_PROTOCOL_TCP	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_TCP
+#define NTUPLE_FLTR_ALLOC_INPUT_IP_PROTOCOL_UNKNOWN	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_UNKNOWN
+#define NTUPLE_FLTR_ALLOC_INPUT_IP_ADDR_TYPE_IPV4	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV4
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_MIRROR_VNIC_ID	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID
+#define NTUPLE_FLTR_ALLOC_INPUT_EN_MIRROR_VNIC_ID	\
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID
 #endif
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index e710e636..bf1fb469 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -54,7 +54,7 @@
 
 #include <rte_io.h>
 
-#define HWRM_CMD_TIMEOUT		2000
+#define HWRM_CMD_TIMEOUT		10000
 
 struct bnxt_plcmodes_cfg {
 	uint32_t	flags;
@@ -95,7 +95,7 @@ static int page_roundup(size_t size)
  * command was failed by the ChiMP.
  */
 
-static int bnxt_hwrm_send_message_locked(struct bnxt *bp, void *msg,
+static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 					uint32_t msg_len)
 {
 	unsigned int i;
@@ -171,52 +171,58 @@ err_ret:
 	return -1;
 }
 
-static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg, uint32_t msg_len)
-{
-	int rc;
-
-	rte_spinlock_lock(&bp->hwrm_lock);
-	rc = bnxt_hwrm_send_message_locked(bp, msg, msg_len);
-	rte_spinlock_unlock(&bp->hwrm_lock);
-	return rc;
-}
-
-#define HWRM_PREP(req, type, cr, resp) \
+/*
+ * HWRM_PREP() should be used to prepare *ALL* HWRM commands.  It grabs the
+ * spinlock, and does initial processing.
+ *
+ * HWRM_CHECK_RESULT() returns errors on failure and may not be used.  It
+ * releases the spinlock only if it returns.  If the regular int return codes
+ * are not used by the function, HWRM_CHECK_RESULT() should not be used
+ * directly, rather it should be copied and modified to suit the function.
+ *
+ * HWRM_UNLOCK() must be called after all response processing is completed.
+ */
+#define HWRM_PREP(req, type) do { \
+	rte_spinlock_lock(&bp->hwrm_lock); \
 	memset(bp->hwrm_cmd_resp_addr, 0, bp->max_resp_len); \
 	req.req_type = rte_cpu_to_le_16(HWRM_##type); \
-	req.cmpl_ring = rte_cpu_to_le_16(cr); \
+	req.cmpl_ring = rte_cpu_to_le_16(-1); \
 	req.seq_id = rte_cpu_to_le_16(bp->hwrm_cmd_seq++); \
 	req.target_id = rte_cpu_to_le_16(0xffff); \
-	req.resp_addr = rte_cpu_to_le_64(bp->hwrm_cmd_resp_dma_addr)
-
-#define HWRM_CHECK_RESULT \
-	{ \
-		if (rc) { \
-			RTE_LOG(ERR, PMD, "%s failed rc:%d\n", \
-				__func__, rc); \
-			return rc; \
+	req.resp_addr = rte_cpu_to_le_64(bp->hwrm_cmd_resp_dma_addr); \
+} while (0)
+
+#define HWRM_CHECK_RESULT() do {\
+	if (rc) { \
+		RTE_LOG(ERR, PMD, "%s failed rc:%d\n", \
+			__func__, rc); \
+		rte_spinlock_unlock(&bp->hwrm_lock); \
+		return rc; \
+	} \
+	if (resp->error_code) { \
+		rc = rte_le_to_cpu_16(resp->error_code); \
+		if (resp->resp_len >= 16) { \
+			struct hwrm_err_output *tmp_hwrm_err_op = \
+						(void *)resp; \
+			RTE_LOG(ERR, PMD, \
+				"%s error %d:%d:%08x:%04x\n", \
+				__func__, \
+				rc, tmp_hwrm_err_op->cmd_err, \
+				rte_le_to_cpu_32(\
+					tmp_hwrm_err_op->opaque_0), \
+				rte_le_to_cpu_16(\
+					tmp_hwrm_err_op->opaque_1)); \
 		} \
-		if (resp->error_code) { \
-			rc = rte_le_to_cpu_16(resp->error_code); \
-			if (resp->resp_len >= 16) { \
-				struct hwrm_err_output *tmp_hwrm_err_op = \
-							(void *)resp; \
-				RTE_LOG(ERR, PMD, \
-					"%s error %d:%d:%08x:%04x\n", \
-					__func__, \
-					rc, tmp_hwrm_err_op->cmd_err, \
-					rte_le_to_cpu_32(\
-						tmp_hwrm_err_op->opaque_0), \
-					rte_le_to_cpu_16(\
-						tmp_hwrm_err_op->opaque_1)); \
-			} \
-			else { \
-				RTE_LOG(ERR, PMD, \
-					"%s error %d\n", __func__, rc); \
-			} \
-			return rc; \
+		else { \
+			RTE_LOG(ERR, PMD, \
+				"%s error %d\n", __func__, rc); \
 		} \
-	}
+		rte_spinlock_unlock(&bp->hwrm_lock); \
+		return rc; \
+	} \
+} while (0)
+
+#define HWRM_UNLOCK()		rte_spinlock_unlock(&bp->hwrm_lock)
 
 int bnxt_hwrm_cfa_l2_clear_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 {
@@ -224,13 +230,14 @@ int bnxt_hwrm_cfa_l2_clear_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 	struct hwrm_cfa_l2_set_rx_mask_input req = {.req_type = 0 };
 	struct hwrm_cfa_l2_set_rx_mask_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, CFA_L2_SET_RX_MASK, -1, resp);
+	HWRM_PREP(req, CFA_L2_SET_RX_MASK);
 	req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 	req.mask = 0;
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -245,14 +252,14 @@ int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp,
 	struct hwrm_cfa_l2_set_rx_mask_output *resp = bp->hwrm_cmd_resp_addr;
 	uint32_t mask = 0;
 
-	HWRM_PREP(req, CFA_L2_SET_RX_MASK, -1, resp);
+	HWRM_PREP(req, CFA_L2_SET_RX_MASK);
 	req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
 	/* FIXME add multicast flag, when multicast adding options is supported
 	 * by ethtool.
 	 */
 	if (vnic->flags & BNXT_VNIC_INFO_BCAST)
-		mask = HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_BCAST;
+		mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_BCAST;
 	if (vnic->flags & BNXT_VNIC_INFO_UNTAGGED)
 		mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_VLAN_NONVLAN;
 	if (vnic->flags & BNXT_VNIC_INFO_PROMISC)
@@ -269,16 +276,16 @@ int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp,
 	if (vlan_table) {
 		if (!(mask & HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_VLAN_NONVLAN))
 			mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_VLANONLY;
-		req.vlan_tag_tbl_addr = rte_cpu_to_le_16(
-			 rte_mem_virt2phy(vlan_table));
+		req.vlan_tag_tbl_addr = rte_cpu_to_le_64(
+			 rte_mem_virt2iova(vlan_table));
 		req.num_vlan_tags = rte_cpu_to_le_32((uint32_t)vlan_count);
 	}
-	req.mask = rte_cpu_to_le_32(HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_BCAST |
-				    mask);
+	req.mask = rte_cpu_to_le_32(mask);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -307,21 +314,22 @@ int bnxt_hwrm_cfa_vlan_antispoof_cfg(struct bnxt *bp, uint16_t fid,
 				return 0;
 		}
 	}
-	HWRM_PREP(req, CFA_VLAN_ANTISPOOF_CFG, -1, resp);
+	HWRM_PREP(req, CFA_VLAN_ANTISPOOF_CFG);
 	req.fid = rte_cpu_to_le_16(fid);
 
 	req.vlan_tag_mask_tbl_addr =
-		rte_cpu_to_le_64(rte_mem_virt2phy(vlan_table));
+		rte_cpu_to_le_64(rte_mem_virt2iova(vlan_table));
 	req.num_vlan_entries = rte_cpu_to_le_32((uint32_t)vlan_count);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
 
-int bnxt_hwrm_clear_filter(struct bnxt *bp,
+int bnxt_hwrm_clear_l2_filter(struct bnxt *bp,
 			   struct bnxt_filter_info *filter)
 {
 	int rc = 0;
@@ -331,32 +339,50 @@ int bnxt_hwrm_clear_filter(struct bnxt *bp,
 	if (filter->fw_l2_filter_id == UINT64_MAX)
 		return 0;
 
-	HWRM_PREP(req, CFA_L2_FILTER_FREE, -1, resp);
+	HWRM_PREP(req, CFA_L2_FILTER_FREE);
 
 	req.l2_filter_id = rte_cpu_to_le_64(filter->fw_l2_filter_id);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	filter->fw_l2_filter_id = -1;
 
 	return 0;
 }
 
-int bnxt_hwrm_set_filter(struct bnxt *bp,
+int bnxt_hwrm_set_l2_filter(struct bnxt *bp,
 			 uint16_t dst_id,
 			 struct bnxt_filter_info *filter)
 {
 	int rc = 0;
 	struct hwrm_cfa_l2_filter_alloc_input req = {.req_type = 0 };
 	struct hwrm_cfa_l2_filter_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+	struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf;
+	const struct rte_eth_vmdq_rx_conf *conf =
+		    &dev_conf->rx_adv_conf.vmdq_rx_conf;
 	uint32_t enables = 0;
+	uint16_t j = dst_id - 1;
+
+	//TODO: Is there a better way to add VLANs to each VNIC in case of VMDQ
+	if ((dev_conf->rxmode.mq_mode & ETH_MQ_RX_VMDQ_FLAG) &&
+	    conf->pool_map[j].pools & (1UL << j)) {
+		RTE_LOG(DEBUG, PMD,
+			"Add vlan %u to vmdq pool %u\n",
+			conf->pool_map[j].vlan_id, j);
+
+		filter->l2_ivlan = conf->pool_map[j].vlan_id;
+		filter->enables |=
+			HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN |
+			HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN_MASK;
+	}
 
 	if (filter->fw_l2_filter_id != UINT64_MAX)
-		bnxt_hwrm_clear_filter(bp, filter);
+		bnxt_hwrm_clear_l2_filter(bp, filter);
 
-	HWRM_PREP(req, CFA_L2_FILTER_ALLOC, -1, resp);
+	HWRM_PREP(req, CFA_L2_FILTER_ALLOC);
 
 	req.flags = rte_cpu_to_le_32(filter->flags);
 
@@ -376,8 +402,14 @@ int bnxt_hwrm_set_filter(struct bnxt *bp,
 	    HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_OVLAN)
 		req.l2_ovlan = filter->l2_ovlan;
 	if (enables &
+	    HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN)
+		req.l2_ovlan = filter->l2_ivlan;
+	if (enables &
 	    HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_OVLAN_MASK)
 		req.l2_ovlan_mask = filter->l2_ovlan_mask;
+	if (enables &
+	    HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN_MASK)
+		req.l2_ovlan_mask = filter->l2_ivlan_mask;
 	if (enables & HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_SRC_ID)
 		req.src_id = rte_cpu_to_le_32(filter->src_id);
 	if (enables & HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_SRC_TYPE)
@@ -387,9 +419,10 @@ int bnxt_hwrm_set_filter(struct bnxt *bp,
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	filter->fw_l2_filter_id = rte_le_to_cpu_64(resp->l2_filter_id);
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -402,13 +435,13 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	uint16_t new_max_vfs;
 	int i;
 
-	HWRM_PREP(req, FUNC_QCAPS, -1, resp);
+	HWRM_PREP(req, FUNC_QCAPS);
 
 	req.fid = rte_cpu_to_le_16(0xffff);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	bp->max_ring_grps = rte_le_to_cpu_32(resp->max_hw_ring_grps);
 	if (BNXT_PF(bp)) {
@@ -469,6 +502,7 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	bp->max_stat_ctx = rte_le_to_cpu_16(resp->max_stat_ctx);
 	if (BNXT_PF(bp))
 		bp->pf.total_vnics = rte_le_to_cpu_16(resp->max_vnics);
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -479,13 +513,14 @@ int bnxt_hwrm_func_reset(struct bnxt *bp)
 	struct hwrm_func_reset_input req = {.req_type = 0 };
 	struct hwrm_func_reset_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, FUNC_RESET, -1, resp);
+	HWRM_PREP(req, FUNC_RESET);
 
 	req.enables = rte_cpu_to_le_32(0);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -499,7 +534,7 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 	if (bp->flags & BNXT_FLAG_REGISTERED)
 		return 0;
 
-	HWRM_PREP(req, FUNC_DRV_RGTR, -1, resp);
+	HWRM_PREP(req, FUNC_DRV_RGTR);
 	req.enables = rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER |
 			HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD);
 	req.ver_maj = RTE_VER_YEAR;
@@ -519,7 +554,8 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	bp->flags |= BNXT_FLAG_REGISTERED;
 
@@ -538,19 +574,15 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
 	uint32_t dev_caps_cfg;
 
 	bp->max_req_len = HWRM_MAX_REQ_LEN;
-	HWRM_PREP(req, VER_GET, -1, resp);
+	HWRM_PREP(req, VER_GET);
 
 	req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
 	req.hwrm_intf_min = HWRM_VERSION_MINOR;
 	req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
 
-	/*
-	 * Hold the lock since we may be adjusting the response pointers.
-	 */
-	rte_spinlock_lock(&bp->hwrm_lock);
-	rc = bnxt_hwrm_send_message_locked(bp, &req, sizeof(req));
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	RTE_LOG(INFO, PMD, "%d.%d.%d:%d.%d.%d\n",
 		resp->hwrm_intf_maj, resp->hwrm_intf_min,
@@ -612,7 +644,7 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
 		}
 		rte_mem_lock_page(bp->hwrm_cmd_resp_addr);
 		bp->hwrm_cmd_resp_dma_addr =
-			rte_mem_virt2phy(bp->hwrm_cmd_resp_addr);
+			rte_mem_virt2iova(bp->hwrm_cmd_resp_addr);
 		if (bp->hwrm_cmd_resp_dma_addr == 0) {
 			RTE_LOG(ERR, PMD,
 			"Unable to map response buffer to physical memory.\n");
@@ -638,7 +670,7 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
 		}
 		rte_mem_lock_page(bp->hwrm_short_cmd_req_addr);
 		bp->hwrm_short_cmd_req_dma_addr =
-			rte_mem_virt2phy(bp->hwrm_short_cmd_req_addr);
+			rte_mem_virt2iova(bp->hwrm_short_cmd_req_addr);
 		if (bp->hwrm_short_cmd_req_dma_addr == 0) {
 			rte_free(bp->hwrm_short_cmd_req_addr);
 			RTE_LOG(ERR, PMD,
@@ -651,7 +683,7 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
 	}
 
 error:
-	rte_spinlock_unlock(&bp->hwrm_lock);
+	HWRM_UNLOCK();
 	return rc;
 }
 
@@ -664,12 +696,13 @@ int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, uint32_t flags)
 	if (!(bp->flags & BNXT_FLAG_REGISTERED))
 		return 0;
 
-	HWRM_PREP(req, FUNC_DRV_UNRGTR, -1, resp);
+	HWRM_PREP(req, FUNC_DRV_UNRGTR);
 	req.flags = flags;
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	bp->flags &= ~BNXT_FLAG_REGISTERED;
 
@@ -685,7 +718,7 @@ static int bnxt_hwrm_port_phy_cfg(struct bnxt *bp, struct bnxt_link_info *conf)
 	uint32_t link_speed_mask =
 		HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_LINK_SPEED_MASK;
 
-	HWRM_PREP(req, PORT_PHY_CFG, -1, resp);
+	HWRM_PREP(req, PORT_PHY_CFG);
 
 	if (conf->link_up) {
 		req.flags = rte_cpu_to_le_32(conf->phy_flags);
@@ -729,7 +762,8 @@ static int bnxt_hwrm_port_phy_cfg(struct bnxt *bp, struct bnxt_link_info *conf)
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -741,18 +775,18 @@ static int bnxt_hwrm_port_phy_qcfg(struct bnxt *bp,
 	struct hwrm_port_phy_qcfg_input req = {0};
 	struct hwrm_port_phy_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, PORT_PHY_QCFG, -1, resp);
+	HWRM_PREP(req, PORT_PHY_QCFG);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	link_info->phy_link_status = resp->link;
 	link_info->link_up =
 		(link_info->phy_link_status ==
 		 HWRM_PORT_PHY_QCFG_OUTPUT_LINK_LINK) ? 1 : 0;
 	link_info->link_speed = rte_le_to_cpu_16(resp->link_speed);
-	link_info->duplex = resp->duplex;
+	link_info->duplex = resp->duplex_cfg;
 	link_info->pause = resp->pause;
 	link_info->auto_pause = resp->auto_pause;
 	link_info->force_pause = resp->force_pause;
@@ -765,6 +799,8 @@ static int bnxt_hwrm_port_phy_qcfg(struct bnxt *bp,
 	link_info->phy_ver[1] = resp->phy_min;
 	link_info->phy_ver[2] = resp->phy_bld;
 
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -774,11 +810,11 @@ int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
 	struct hwrm_queue_qportcfg_input req = {.req_type = 0 };
 	struct hwrm_queue_qportcfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, QUEUE_QPORTCFG, -1, resp);
+	HWRM_PREP(req, QUEUE_QPORTCFG);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 #define GET_QUEUE_INFO(x) \
 	bp->cos_queue[x].id = resp->queue_id##x; \
@@ -793,6 +829,8 @@ int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
 	GET_QUEUE_INFO(6);
 	GET_QUEUE_INFO(7);
 
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -806,7 +844,7 @@ int bnxt_hwrm_ring_alloc(struct bnxt *bp,
 	struct hwrm_ring_alloc_input req = {.req_type = 0 };
 	struct hwrm_ring_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, RING_ALLOC, -1, resp);
+	HWRM_PREP(req, RING_ALLOC);
 
 	req.page_tbl_addr = rte_cpu_to_le_64(ring->bd_dma);
 	req.fbo = rte_cpu_to_le_32(0);
@@ -837,6 +875,7 @@ int bnxt_hwrm_ring_alloc(struct bnxt *bp,
 	default:
 		RTE_LOG(ERR, PMD, "hwrm alloc invalid ring type %d\n",
 			ring_type);
+		HWRM_UNLOCK();
 		return -1;
 	}
 	req.enables = rte_cpu_to_le_32(enables);
@@ -850,22 +889,27 @@ int bnxt_hwrm_ring_alloc(struct bnxt *bp,
 		case HWRM_RING_FREE_INPUT_RING_TYPE_L2_CMPL:
 			RTE_LOG(ERR, PMD,
 				"hwrm_ring_alloc cp failed. rc:%d\n", rc);
+			HWRM_UNLOCK();
 			return rc;
 		case HWRM_RING_FREE_INPUT_RING_TYPE_RX:
 			RTE_LOG(ERR, PMD,
 				"hwrm_ring_alloc rx failed. rc:%d\n", rc);
+			HWRM_UNLOCK();
 			return rc;
 		case HWRM_RING_FREE_INPUT_RING_TYPE_TX:
 			RTE_LOG(ERR, PMD,
 				"hwrm_ring_alloc tx failed. rc:%d\n", rc);
+			HWRM_UNLOCK();
 			return rc;
 		default:
 			RTE_LOG(ERR, PMD, "Invalid ring. rc:%d\n", rc);
+			HWRM_UNLOCK();
 			return rc;
 		}
 	}
 
 	ring->fw_ring_id = rte_le_to_cpu_16(resp->ring_id);
+	HWRM_UNLOCK();
 	return rc;
 }
 
@@ -876,7 +920,7 @@ int bnxt_hwrm_ring_free(struct bnxt *bp,
 	struct hwrm_ring_free_input req = {.req_type = 0 };
 	struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, RING_FREE, -1, resp);
+	HWRM_PREP(req, RING_FREE);
 
 	req.ring_type = ring_type;
 	req.ring_id = rte_cpu_to_le_16(ring->fw_ring_id);
@@ -886,6 +930,7 @@ int bnxt_hwrm_ring_free(struct bnxt *bp,
 	if (rc || resp->error_code) {
 		if (rc == 0 && resp->error_code)
 			rc = rte_le_to_cpu_16(resp->error_code);
+		HWRM_UNLOCK();
 
 		switch (ring_type) {
 		case HWRM_RING_FREE_INPUT_RING_TYPE_L2_CMPL:
@@ -905,6 +950,7 @@ int bnxt_hwrm_ring_free(struct bnxt *bp,
 			return rc;
 		}
 	}
+	HWRM_UNLOCK();
 	return 0;
 }
 
@@ -914,7 +960,7 @@ int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp, unsigned int idx)
 	struct hwrm_ring_grp_alloc_input req = {.req_type = 0 };
 	struct hwrm_ring_grp_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, RING_GRP_ALLOC, -1, resp);
+	HWRM_PREP(req, RING_GRP_ALLOC);
 
 	req.cr = rte_cpu_to_le_16(bp->grp_info[idx].cp_fw_ring_id);
 	req.rr = rte_cpu_to_le_16(bp->grp_info[idx].rx_fw_ring_id);
@@ -923,11 +969,13 @@ int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp, unsigned int idx)
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	bp->grp_info[idx].fw_grp_id =
 	    rte_le_to_cpu_16(resp->ring_group_id);
 
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -937,13 +985,14 @@ int bnxt_hwrm_ring_grp_free(struct bnxt *bp, unsigned int idx)
 	struct hwrm_ring_grp_free_input req = {.req_type = 0 };
 	struct hwrm_ring_grp_free_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, RING_GRP_FREE, -1, resp);
+	HWRM_PREP(req, RING_GRP_FREE);
 
 	req.ring_group_id = rte_cpu_to_le_16(bp->grp_info[idx].fw_grp_id);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	bp->grp_info[idx].fw_grp_id = INVALID_HW_RING_ID;
 	return rc;
@@ -958,13 +1007,14 @@ int bnxt_hwrm_stat_clear(struct bnxt *bp, struct bnxt_cp_ring_info *cpr)
 	if (cpr->hw_stats_ctx_id == (uint32_t)HWRM_NA_SIGNATURE)
 		return rc;
 
-	HWRM_PREP(req, STAT_CTX_CLR_STATS, -1, resp);
+	HWRM_PREP(req, STAT_CTX_CLR_STATS);
 
 	req.stat_ctx_id = rte_cpu_to_le_16(cpr->hw_stats_ctx_id);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -976,7 +1026,7 @@ int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	struct hwrm_stat_ctx_alloc_input req = {.req_type = 0 };
 	struct hwrm_stat_ctx_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, STAT_CTX_ALLOC, -1, resp);
+	HWRM_PREP(req, STAT_CTX_ALLOC);
 
 	req.update_period_ms = rte_cpu_to_le_32(0);
 
@@ -985,10 +1035,13 @@ int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	cpr->hw_stats_ctx_id = rte_le_to_cpu_16(resp->stat_ctx_id);
 
+	HWRM_UNLOCK();
+	bp->grp_info[idx].fw_stats_ctx = cpr->hw_stats_ctx_id;
+
 	return rc;
 }
 
@@ -999,13 +1052,14 @@ int bnxt_hwrm_stat_ctx_free(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	struct hwrm_stat_ctx_free_input req = {.req_type = 0 };
 	struct hwrm_stat_ctx_free_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, STAT_CTX_FREE, -1, resp);
+	HWRM_PREP(req, STAT_CTX_FREE);
 
 	req.stat_ctx_id = rte_cpu_to_le_16(cpr->hw_stats_ctx_id);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -1027,15 +1081,16 @@ int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 	vnic->lb_rule = (uint16_t)HWRM_NA_SIGNATURE;
 	vnic->mru = bp->eth_dev->data->mtu + ETHER_HDR_LEN +
 				ETHER_CRC_LEN + VLAN_TAG_SIZE;
-	HWRM_PREP(req, VNIC_ALLOC, -1, resp);
+	HWRM_PREP(req, VNIC_ALLOC);
 
 	if (vnic->func_default)
 		req.flags = HWRM_VNIC_ALLOC_INPUT_FLAGS_DEFAULT;
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	vnic->fw_vnic_id = rte_le_to_cpu_16(resp->vnic_id);
+	HWRM_UNLOCK();
 	RTE_LOG(DEBUG, PMD, "VNIC ID %x\n", vnic->fw_vnic_id);
 	return rc;
 }
@@ -1048,13 +1103,13 @@ static int bnxt_hwrm_vnic_plcmodes_qcfg(struct bnxt *bp,
 	struct hwrm_vnic_plcmodes_qcfg_input req = {.req_type = 0 };
 	struct hwrm_vnic_plcmodes_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, VNIC_PLCMODES_QCFG, -1, resp);
+	HWRM_PREP(req, VNIC_PLCMODES_QCFG);
 
 	req.vnic_id = rte_cpu_to_le_32(vnic->fw_vnic_id);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	pmode->flags = rte_le_to_cpu_32(resp->flags);
 	/* dflt_vnic bit doesn't exist in the _cfg command */
@@ -1063,6 +1118,8 @@ static int bnxt_hwrm_vnic_plcmodes_qcfg(struct bnxt *bp,
 	pmode->hds_offset = rte_le_to_cpu_16(resp->hds_offset);
 	pmode->hds_threshold = rte_le_to_cpu_16(resp->hds_threshold);
 
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -1074,7 +1131,7 @@ static int bnxt_hwrm_vnic_plcmodes_cfg(struct bnxt *bp,
 	struct hwrm_vnic_plcmodes_cfg_input req = {.req_type = 0 };
 	struct hwrm_vnic_plcmodes_cfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, VNIC_PLCMODES_CFG, -1, resp);
+	HWRM_PREP(req, VNIC_PLCMODES_CFG);
 
 	req.vnic_id = rte_cpu_to_le_32(vnic->fw_vnic_id);
 	req.flags = rte_cpu_to_le_32(pmode->flags);
@@ -1089,7 +1146,8 @@ static int bnxt_hwrm_vnic_plcmodes_cfg(struct bnxt *bp,
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -1099,7 +1157,7 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 	int rc = 0;
 	struct hwrm_vnic_cfg_input req = {.req_type = 0 };
 	struct hwrm_vnic_cfg_output *resp = bp->hwrm_cmd_resp_addr;
-	uint32_t ctx_enable_flag = HWRM_VNIC_CFG_INPUT_ENABLES_RSS_RULE;
+	uint32_t ctx_enable_flag = 0;
 	struct bnxt_plcmodes_cfg pmodes;
 
 	if (vnic->fw_vnic_id == INVALID_HW_RING_ID) {
@@ -1111,18 +1169,19 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 	if (rc)
 		return rc;
 
-	HWRM_PREP(req, VNIC_CFG, -1, resp);
+	HWRM_PREP(req, VNIC_CFG);
 
 	/* Only RSS support for now TBD: COS & LB */
 	req.enables =
-	    rte_cpu_to_le_32(HWRM_VNIC_CFG_INPUT_ENABLES_DFLT_RING_GRP |
-			     HWRM_VNIC_CFG_INPUT_ENABLES_MRU);
+	    rte_cpu_to_le_32(HWRM_VNIC_CFG_INPUT_ENABLES_DFLT_RING_GRP);
 	if (vnic->lb_rule != 0xffff)
-		ctx_enable_flag = HWRM_VNIC_CFG_INPUT_ENABLES_LB_RULE;
+		ctx_enable_flag |= HWRM_VNIC_CFG_INPUT_ENABLES_LB_RULE;
 	if (vnic->cos_rule != 0xffff)
-		ctx_enable_flag = HWRM_VNIC_CFG_INPUT_ENABLES_COS_RULE;
-	if (vnic->rss_rule != 0xffff)
-		ctx_enable_flag = HWRM_VNIC_CFG_INPUT_ENABLES_RSS_RULE;
+		ctx_enable_flag |= HWRM_VNIC_CFG_INPUT_ENABLES_COS_RULE;
+	if (vnic->rss_rule != 0xffff) {
+		ctx_enable_flag |= HWRM_VNIC_CFG_INPUT_ENABLES_MRU;
+		ctx_enable_flag |= HWRM_VNIC_CFG_INPUT_ENABLES_RSS_RULE;
+	}
 	req.enables |= rte_cpu_to_le_32(ctx_enable_flag);
 	req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 	req.dflt_ring_grp = rte_cpu_to_le_16(vnic->dflt_ring_grp);
@@ -1151,7 +1210,8 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	rc = bnxt_hwrm_vnic_plcmodes_cfg(bp, vnic, &pmodes);
 
@@ -1169,7 +1229,7 @@ int bnxt_hwrm_vnic_qcfg(struct bnxt *bp, struct bnxt_vnic_info *vnic,
 		RTE_LOG(DEBUG, PMD, "VNIC QCFG ID %d\n", vnic->fw_vnic_id);
 		return rc;
 	}
-	HWRM_PREP(req, VNIC_QCFG, -1, resp);
+	HWRM_PREP(req, VNIC_QCFG);
 
 	req.enables =
 		rte_cpu_to_le_32(HWRM_VNIC_QCFG_INPUT_ENABLES_VF_ID_VALID);
@@ -1178,7 +1238,7 @@ int bnxt_hwrm_vnic_qcfg(struct bnxt *bp, struct bnxt_vnic_info *vnic,
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	vnic->dflt_ring_grp = rte_le_to_cpu_16(resp->dflt_ring_grp);
 	vnic->rss_rule = rte_le_to_cpu_16(resp->rss_rule);
@@ -1198,6 +1258,8 @@ int bnxt_hwrm_vnic_qcfg(struct bnxt *bp, struct bnxt_vnic_info *vnic,
 	vnic->rss_dflt_cr = rte_le_to_cpu_32(resp->flags) &
 			HWRM_VNIC_QCFG_OUTPUT_FLAGS_RSS_DFLT_CR_MODE;
 
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -1208,13 +1270,14 @@ int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 	struct hwrm_vnic_rss_cos_lb_ctx_alloc_output *resp =
 						bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, VNIC_RSS_COS_LB_CTX_ALLOC, -1, resp);
+	HWRM_PREP(req, VNIC_RSS_COS_LB_CTX_ALLOC);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	vnic->rss_rule = rte_le_to_cpu_16(resp->rss_cos_lb_ctx_id);
+	HWRM_UNLOCK();
 	RTE_LOG(DEBUG, PMD, "VNIC RSS Rule %x\n", vnic->rss_rule);
 
 	return rc;
@@ -1231,13 +1294,14 @@ int bnxt_hwrm_vnic_ctx_free(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 		RTE_LOG(DEBUG, PMD, "VNIC RSS Rule %x\n", vnic->rss_rule);
 		return rc;
 	}
-	HWRM_PREP(req, VNIC_RSS_COS_LB_CTX_FREE, -1, resp);
+	HWRM_PREP(req, VNIC_RSS_COS_LB_CTX_FREE);
 
 	req.rss_cos_lb_ctx_id = rte_cpu_to_le_16(vnic->rss_rule);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	vnic->rss_rule = INVALID_HW_RING_ID;
 
@@ -1255,13 +1319,14 @@ int bnxt_hwrm_vnic_free(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 		return rc;
 	}
 
-	HWRM_PREP(req, VNIC_FREE, -1, resp);
+	HWRM_PREP(req, VNIC_FREE);
 
 	req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	vnic->fw_vnic_id = INVALID_HW_RING_ID;
 	return rc;
@@ -1274,7 +1339,7 @@ int bnxt_hwrm_vnic_rss_cfg(struct bnxt *bp,
 	struct hwrm_vnic_rss_cfg_input req = {.req_type = 0 };
 	struct hwrm_vnic_rss_cfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, VNIC_RSS_CFG, -1, resp);
+	HWRM_PREP(req, VNIC_RSS_CFG);
 
 	req.hash_type = rte_cpu_to_le_32(vnic->hash_type);
 
@@ -1286,7 +1351,8 @@ int bnxt_hwrm_vnic_rss_cfg(struct bnxt *bp,
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -1299,7 +1365,7 @@ int bnxt_hwrm_vnic_plcmode_cfg(struct bnxt *bp,
 	struct hwrm_vnic_plcmodes_cfg_output *resp = bp->hwrm_cmd_resp_addr;
 	uint16_t size;
 
-	HWRM_PREP(req, VNIC_PLCMODES_CFG, -1, resp);
+	HWRM_PREP(req, VNIC_PLCMODES_CFG);
 
 	req.flags = rte_cpu_to_le_32(
 			HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_JUMBO_PLACEMENT);
@@ -1315,7 +1381,8 @@ int bnxt_hwrm_vnic_plcmode_cfg(struct bnxt *bp,
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -1327,7 +1394,7 @@ int bnxt_hwrm_vnic_tpa_cfg(struct bnxt *bp,
 	struct hwrm_vnic_tpa_cfg_input req = {.req_type = 0 };
 	struct hwrm_vnic_tpa_cfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, VNIC_TPA_CFG, -1, resp);
+	HWRM_PREP(req, VNIC_TPA_CFG);
 
 	if (enable) {
 		req.enables = rte_cpu_to_le_32(
@@ -1350,7 +1417,8 @@ int bnxt_hwrm_vnic_tpa_cfg(struct bnxt *bp,
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -1367,10 +1435,11 @@ int bnxt_hwrm_func_vf_mac(struct bnxt *bp, uint16_t vf, const uint8_t *mac_addr)
 	memcpy(req.dflt_mac_addr, mac_addr, sizeof(req.dflt_mac_addr));
 	req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
 
-	HWRM_PREP(req, FUNC_CFG, -1, resp);
+	HWRM_PREP(req, FUNC_CFG);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	bp->pf.vf_info[vf].random_mac = false;
 
@@ -1384,17 +1453,19 @@ int bnxt_hwrm_func_qstats_tx_drop(struct bnxt *bp, uint16_t fid,
 	struct hwrm_func_qstats_input req = {.req_type = 0};
 	struct hwrm_func_qstats_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, FUNC_QSTATS, -1, resp);
+	HWRM_PREP(req, FUNC_QSTATS);
 
 	req.fid = rte_cpu_to_le_16(fid);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	if (dropped)
 		*dropped = rte_le_to_cpu_64(resp->tx_drop_pkts);
 
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -1405,13 +1476,13 @@ int bnxt_hwrm_func_qstats(struct bnxt *bp, uint16_t fid,
 	struct hwrm_func_qstats_input req = {.req_type = 0};
 	struct hwrm_func_qstats_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, FUNC_QSTATS, -1, resp);
+	HWRM_PREP(req, FUNC_QSTATS);
 
 	req.fid = rte_cpu_to_le_16(fid);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	stats->ipackets = rte_le_to_cpu_64(resp->rx_ucast_pkts);
 	stats->ipackets += rte_le_to_cpu_64(resp->rx_mcast_pkts);
@@ -1432,6 +1503,8 @@ int bnxt_hwrm_func_qstats(struct bnxt *bp, uint16_t fid,
 
 	stats->imissed = rte_le_to_cpu_64(resp->rx_drop_pkts);
 
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -1441,13 +1514,14 @@ int bnxt_hwrm_func_clr_stats(struct bnxt *bp, uint16_t fid)
 	struct hwrm_func_clr_stats_input req = {.req_type = 0};
 	struct hwrm_func_clr_stats_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, FUNC_CLR_STATS, -1, resp);
+	HWRM_PREP(req, FUNC_CLR_STATS);
 
 	req.fid = rte_cpu_to_le_16(fid);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -1542,12 +1616,8 @@ int bnxt_free_all_hwrm_ring_grps(struct bnxt *bp)
 
 	for (idx = 0; idx < bp->rx_cp_nr_rings; idx++) {
 
-		if (bp->grp_info[idx].fw_grp_id == INVALID_HW_RING_ID) {
-			RTE_LOG(ERR, PMD,
-				"Attempt to free invalid ring group %d\n",
-				idx);
+		if (bp->grp_info[idx].fw_grp_id == INVALID_HW_RING_ID)
 			continue;
-		}
 
 		rc = bnxt_hwrm_ring_grp_free(bp, idx);
 
@@ -1683,7 +1753,7 @@ int bnxt_alloc_hwrm_resources(struct bnxt *bp)
 	if (bp->hwrm_cmd_resp_addr == NULL)
 		return -ENOMEM;
 	bp->hwrm_cmd_resp_dma_addr =
-		rte_mem_virt2phy(bp->hwrm_cmd_resp_addr);
+		rte_mem_virt2iova(bp->hwrm_cmd_resp_addr);
 	if (bp->hwrm_cmd_resp_dma_addr == 0) {
 		RTE_LOG(ERR, PMD,
 			"unable to map response address to physical memory\n");
@@ -1700,9 +1770,39 @@ int bnxt_clear_hwrm_vnic_filters(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 	int rc = 0;
 
 	STAILQ_FOREACH(filter, &vnic->filter, next) {
-		rc = bnxt_hwrm_clear_filter(bp, filter);
-		if (rc)
-			break;
+		if (filter->filter_type == HWRM_CFA_EM_FILTER)
+			rc = bnxt_hwrm_clear_em_filter(bp, filter);
+		else if (filter->filter_type == HWRM_CFA_NTUPLE_FILTER)
+			rc = bnxt_hwrm_clear_ntuple_filter(bp, filter);
+		else
+			rc = bnxt_hwrm_clear_l2_filter(bp, filter);
+		//if (rc)
+			//break;
+	}
+	return rc;
+}
+
+static int
+bnxt_clear_hwrm_vnic_flows(struct bnxt *bp, struct bnxt_vnic_info *vnic)
+{
+	struct bnxt_filter_info *filter;
+	struct rte_flow *flow;
+	int rc = 0;
+
+	STAILQ_FOREACH(flow, &vnic->flow_list, next) {
+		filter = flow->filter;
+		RTE_LOG(ERR, PMD, "filter type %d\n", filter->filter_type);
+		if (filter->filter_type == HWRM_CFA_EM_FILTER)
+			rc = bnxt_hwrm_clear_em_filter(bp, filter);
+		else if (filter->filter_type == HWRM_CFA_NTUPLE_FILTER)
+			rc = bnxt_hwrm_clear_ntuple_filter(bp, filter);
+		else
+			rc = bnxt_hwrm_clear_l2_filter(bp, filter);
+
+		STAILQ_REMOVE(&vnic->flow_list, flow, rte_flow, next);
+		rte_free(flow);
+		//if (rc)
+			//break;
 	}
 	return rc;
 }
@@ -1713,7 +1813,15 @@ int bnxt_set_hwrm_vnic_filters(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 	int rc = 0;
 
 	STAILQ_FOREACH(filter, &vnic->filter, next) {
-		rc = bnxt_hwrm_set_filter(bp, vnic->fw_vnic_id, filter);
+		if (filter->filter_type == HWRM_CFA_EM_FILTER)
+			rc = bnxt_hwrm_set_em_filter(bp, filter->dst_id,
+						     filter);
+		else if (filter->filter_type == HWRM_CFA_NTUPLE_FILTER)
+			rc = bnxt_hwrm_set_ntuple_filter(bp, filter->dst_id,
+							 filter);
+		else
+			rc = bnxt_hwrm_set_l2_filter(bp, vnic->fw_vnic_id,
+						     filter);
 		if (rc)
 			break;
 	}
@@ -1734,20 +1842,20 @@ void bnxt_free_tunnel_ports(struct bnxt *bp)
 
 void bnxt_free_all_hwrm_resources(struct bnxt *bp)
 {
-	struct bnxt_vnic_info *vnic;
-	unsigned int i;
+	int i;
 
 	if (bp->vnic_info == NULL)
 		return;
 
-	vnic = &bp->vnic_info[0];
-	if (BNXT_PF(bp))
-		bnxt_hwrm_cfa_l2_clear_rx_mask(bp, vnic);
-
-	/* VNIC resources */
-	for (i = 0; i < bp->nr_vnics; i++) {
+	/*
+	 * Cleanup VNICs in reverse order, to make sure the L2 filter
+	 * from vnic0 is last to be cleaned up.
+	 */
+	for (i = bp->nr_vnics - 1; i >= 0; i--) {
 		struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
 
+		bnxt_clear_hwrm_vnic_flows(bp, vnic);
+
 		bnxt_clear_hwrm_vnic_filters(bp, vnic);
 
 		bnxt_hwrm_vnic_ctx_free(bp, vnic);
@@ -1833,7 +1941,7 @@ static uint16_t bnxt_parse_eth_link_speed(uint32_t conf_link_speed)
 		ETH_LINK_SPEED_10G | ETH_LINK_SPEED_20G | ETH_LINK_SPEED_25G | \
 		ETH_LINK_SPEED_40G | ETH_LINK_SPEED_50G)
 
-static int bnxt_valid_link_speed(uint32_t link_speed, uint8_t port_id)
+static int bnxt_valid_link_speed(uint32_t link_speed, uint16_t port_id)
 {
 	uint32_t one_speed;
 
@@ -2038,12 +2146,12 @@ int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 	struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
 	int rc = 0;
 
-	HWRM_PREP(req, FUNC_QCFG, -1, resp);
+	HWRM_PREP(req, FUNC_QCFG);
 	req.fid = rte_cpu_to_le_16(0xffff);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	/* Hard Coded.. 0xfff VLAN ID mask */
 	bp->vlan = rte_le_to_cpu_16(resp->vlan) & 0xfff;
@@ -2059,6 +2167,8 @@ int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 		break;
 	}
 
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -2118,10 +2228,12 @@ static int bnxt_hwrm_pf_func_cfg(struct bnxt *bp, int tx_rings)
 	req.num_hw_ring_grps = rte_cpu_to_le_16(bp->max_ring_grps);
 	req.fid = rte_cpu_to_le_16(0xffff);
 
-	HWRM_PREP(req, FUNC_CFG, -1, resp);
+	HWRM_PREP(req, FUNC_CFG);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2187,7 +2299,7 @@ static void reserve_resources_from_vf(struct bnxt *bp,
 	int rc;
 
 	/* Get the actual allocated values now */
-	HWRM_PREP(req, FUNC_QCAPS, -1, resp);
+	HWRM_PREP(req, FUNC_QCAPS);
 	req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
@@ -2212,6 +2324,8 @@ static void reserve_resources_from_vf(struct bnxt *bp,
 	 */
 	//bp->max_vnics -= rte_le_to_cpu_16(esp->max_vnics);
 	bp->max_ring_grps -= rte_le_to_cpu_16(resp->max_hw_ring_grps);
+
+	HWRM_UNLOCK();
 }
 
 int bnxt_hwrm_func_qcfg_current_vf_vlan(struct bnxt *bp, int vf)
@@ -2221,7 +2335,7 @@ int bnxt_hwrm_func_qcfg_current_vf_vlan(struct bnxt *bp, int vf)
 	int rc;
 
 	/* Check for zero MAC address */
-	HWRM_PREP(req, FUNC_QCFG, -1, resp);
+	HWRM_PREP(req, FUNC_QCFG);
 	req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 	if (rc) {
@@ -2232,7 +2346,11 @@ int bnxt_hwrm_func_qcfg_current_vf_vlan(struct bnxt *bp, int vf)
 		RTE_LOG(ERR, PMD, "hwrm_func_qcfg error %d\n", rc);
 		return -1;
 	}
-	return rte_le_to_cpu_16(resp->vlan);
+	rc = rte_le_to_cpu_16(resp->vlan);
+
+	HWRM_UNLOCK();
+
+	return rc;
 }
 
 static int update_pf_resource_max(struct bnxt *bp)
@@ -2242,15 +2360,17 @@ static int update_pf_resource_max(struct bnxt *bp)
 	int rc;
 
 	/* And copy the allocated numbers into the pf struct */
-	HWRM_PREP(req, FUNC_QCFG, -1, resp);
+	HWRM_PREP(req, FUNC_QCFG);
 	req.fid = rte_cpu_to_le_16(0xffff);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	/* Only TX ring value reflects actual allocation? TODO */
 	bp->max_tx_rings = rte_le_to_cpu_16(resp->alloc_tx_rings);
 	bp->pf.evb_mode = resp->evb_mode;
 
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -2342,7 +2462,7 @@ int bnxt_hwrm_allocate_vfs(struct bnxt *bp, int num_vfs)
 	for (i = 0; i < num_vfs; i++) {
 		add_random_mac_if_needed(bp, &req, i);
 
-		HWRM_PREP(req, FUNC_CFG, -1, resp);
+		HWRM_PREP(req, FUNC_CFG);
 		req.flags = rte_cpu_to_le_32(bp->pf.vf_info[i].func_cfg_flags);
 		req.fid = rte_cpu_to_le_16(bp->pf.vf_info[i].fid);
 		rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
@@ -2357,9 +2477,12 @@ int bnxt_hwrm_allocate_vfs(struct bnxt *bp, int num_vfs)
 			RTE_LOG(ERR, PMD,
 				"Not all VFs available. (%d, %d)\n",
 				rc, resp->error_code);
+			HWRM_UNLOCK();
 			break;
 		}
 
+		HWRM_UNLOCK();
+
 		reserve_resources_from_vf(bp, &req, i);
 		bp->pf.active_vfs++;
 		bnxt_hwrm_func_clr_stats(bp, bp->pf.vf_info[i].fid);
@@ -2392,14 +2515,15 @@ int bnxt_hwrm_pf_evb_mode(struct bnxt *bp)
 	struct hwrm_func_cfg_output *resp = bp->hwrm_cmd_resp_addr;
 	int rc;
 
-	HWRM_PREP(req, FUNC_CFG, -1, resp);
+	HWRM_PREP(req, FUNC_CFG);
 
 	req.fid = rte_cpu_to_le_16(0xffff);
 	req.enables = rte_cpu_to_le_32(HWRM_FUNC_CFG_INPUT_ENABLES_EVB_MODE);
 	req.evb_mode = bp->pf.evb_mode;
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2411,11 +2535,11 @@ int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, uint16_t port,
 	struct hwrm_tunnel_dst_port_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 	int rc = 0;
 
-	HWRM_PREP(req, TUNNEL_DST_PORT_ALLOC, -1, resp);
+	HWRM_PREP(req, TUNNEL_DST_PORT_ALLOC);
 	req.tunnel_type = tunnel_type;
 	req.tunnel_dst_port_val = port;
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	switch (tunnel_type) {
 	case HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_VXLAN:
@@ -2429,6 +2553,9 @@ int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, uint16_t port,
 	default:
 		break;
 	}
+
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -2439,11 +2566,14 @@ int bnxt_hwrm_tunnel_dst_port_free(struct bnxt *bp, uint16_t port,
 	struct hwrm_tunnel_dst_port_free_output *resp = bp->hwrm_cmd_resp_addr;
 	int rc = 0;
 
-	HWRM_PREP(req, TUNNEL_DST_PORT_FREE, -1, resp);
+	HWRM_PREP(req, TUNNEL_DST_PORT_FREE);
+
 	req.tunnel_type = tunnel_type;
 	req.tunnel_dst_port_id = rte_cpu_to_be_16(port);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2455,11 +2585,14 @@ int bnxt_hwrm_func_cfg_vf_set_flags(struct bnxt *bp, uint16_t vf,
 	struct hwrm_func_cfg_input req = {0};
 	int rc;
 
-	HWRM_PREP(req, FUNC_CFG, -1, resp);
+	HWRM_PREP(req, FUNC_CFG);
+
 	req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
 	req.flags = rte_cpu_to_le_32(flags);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2482,14 +2615,14 @@ int bnxt_hwrm_func_buf_rgtr(struct bnxt *bp)
 	struct hwrm_func_buf_rgtr_input req = {.req_type = 0 };
 	struct hwrm_func_buf_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, FUNC_BUF_RGTR, -1, resp);
+	HWRM_PREP(req, FUNC_BUF_RGTR);
 
 	req.req_buf_num_pages = rte_cpu_to_le_16(1);
 	req.req_buf_page_size = rte_cpu_to_le_16(
 			 page_getenum(bp->pf.active_vfs * HWRM_MAX_REQ_LEN));
 	req.req_buf_len = rte_cpu_to_le_16(HWRM_MAX_REQ_LEN);
 	req.req_buf_page_addr[0] =
-		rte_cpu_to_le_64(rte_mem_virt2phy(bp->pf.vf_req_buf));
+		rte_cpu_to_le_64(rte_mem_virt2iova(bp->pf.vf_req_buf));
 	if (req.req_buf_page_addr[0] == 0) {
 		RTE_LOG(ERR, PMD,
 			"unable to map buffer address to physical memory\n");
@@ -2498,7 +2631,8 @@ int bnxt_hwrm_func_buf_rgtr(struct bnxt *bp)
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2509,11 +2643,12 @@ int bnxt_hwrm_func_buf_unrgtr(struct bnxt *bp)
 	struct hwrm_func_buf_unrgtr_input req = {.req_type = 0 };
 	struct hwrm_func_buf_unrgtr_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, FUNC_BUF_UNRGTR, -1, resp);
+	HWRM_PREP(req, FUNC_BUF_UNRGTR);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2524,7 +2659,8 @@ int bnxt_hwrm_func_cfg_def_cp(struct bnxt *bp)
 	struct hwrm_func_cfg_input req = {0};
 	int rc;
 
-	HWRM_PREP(req, FUNC_CFG, -1, resp);
+	HWRM_PREP(req, FUNC_CFG);
+
 	req.fid = rte_cpu_to_le_16(0xffff);
 	req.flags = rte_cpu_to_le_32(bp->pf.func_cfg_flags);
 	req.enables = rte_cpu_to_le_32(
@@ -2532,7 +2668,9 @@ int bnxt_hwrm_func_cfg_def_cp(struct bnxt *bp)
 	req.async_event_cr = rte_cpu_to_le_16(
 			bp->def_cp_ring->cp_ring_struct->fw_ring_id);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2543,13 +2681,16 @@ int bnxt_hwrm_vf_func_cfg_def_cp(struct bnxt *bp)
 	struct hwrm_func_vf_cfg_input req = {0};
 	int rc;
 
-	HWRM_PREP(req, FUNC_VF_CFG, -1, resp);
+	HWRM_PREP(req, FUNC_VF_CFG);
+
 	req.enables = rte_cpu_to_le_32(
 			HWRM_FUNC_CFG_INPUT_ENABLES_ASYNC_EVENT_CR);
 	req.async_event_cr = rte_cpu_to_le_16(
 			bp->def_cp_ring->cp_ring_struct->fw_ring_id);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2562,7 +2703,7 @@ int bnxt_hwrm_set_default_vlan(struct bnxt *bp, int vf, uint8_t is_vf)
 	uint32_t func_cfg_flags;
 	int rc = 0;
 
-	HWRM_PREP(req, FUNC_CFG, -1, resp);
+	HWRM_PREP(req, FUNC_CFG);
 
 	if (is_vf) {
 		dflt_vlan = bp->pf.vf_info[vf].dflt_vlan;
@@ -2580,7 +2721,9 @@ int bnxt_hwrm_set_default_vlan(struct bnxt *bp, int vf, uint8_t is_vf)
 	req.dflt_vlan = rte_cpu_to_le_16(dflt_vlan);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2592,13 +2735,16 @@ int bnxt_hwrm_func_bw_cfg(struct bnxt *bp, uint16_t vf,
 	struct hwrm_func_cfg_input req = {0};
 	int rc;
 
-	HWRM_PREP(req, FUNC_CFG, -1, resp);
+	HWRM_PREP(req, FUNC_CFG);
+
 	req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
 	req.enables |= rte_cpu_to_le_32(enables);
 	req.flags = rte_cpu_to_le_32(bp->pf.vf_info[vf].func_cfg_flags);
 	req.max_bw = rte_cpu_to_le_32(max_bw);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2609,14 +2755,17 @@ int bnxt_hwrm_set_vf_vlan(struct bnxt *bp, int vf)
 	struct hwrm_func_cfg_output *resp = bp->hwrm_cmd_resp_addr;
 	int rc = 0;
 
-	HWRM_PREP(req, FUNC_CFG, -1, resp);
+	HWRM_PREP(req, FUNC_CFG);
+
 	req.flags = rte_cpu_to_le_32(bp->pf.vf_info[vf].func_cfg_flags);
 	req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
 	req.enables |= rte_cpu_to_le_32(HWRM_FUNC_CFG_INPUT_ENABLES_DFLT_VLAN);
 	req.dflt_vlan = rte_cpu_to_le_16(bp->pf.vf_info[vf].dflt_vlan);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2631,14 +2780,15 @@ int bnxt_hwrm_reject_fwd_resp(struct bnxt *bp, uint16_t target_id,
 	if (ec_size > sizeof(req.encap_request))
 		return -1;
 
-	HWRM_PREP(req, REJECT_FWD_RESP, -1, resp);
+	HWRM_PREP(req, REJECT_FWD_RESP);
 
 	req.encap_resp_target_id = rte_cpu_to_le_16(target_id);
 	memcpy(req.encap_request, encaped, ec_size);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2650,13 +2800,17 @@ int bnxt_hwrm_func_qcfg_vf_default_mac(struct bnxt *bp, uint16_t vf,
 	struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
 	int rc;
 
-	HWRM_PREP(req, FUNC_QCFG, -1, resp);
+	HWRM_PREP(req, FUNC_QCFG);
+
 	req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
 
 	memcpy(mac->addr_bytes, resp->mac_address, ETHER_ADDR_LEN);
+
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -2670,50 +2824,55 @@ int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, uint16_t target_id,
 	if (ec_size > sizeof(req.encap_request))
 		return -1;
 
-	HWRM_PREP(req, EXEC_FWD_RESP, -1, resp);
+	HWRM_PREP(req, EXEC_FWD_RESP);
 
 	req.encap_resp_target_id = rte_cpu_to_le_16(target_id);
 	memcpy(req.encap_request, encaped, ec_size);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
 
 int bnxt_hwrm_ctx_qstats(struct bnxt *bp, uint32_t cid, int idx,
-			 struct rte_eth_stats *stats)
+			 struct rte_eth_stats *stats, uint8_t rx)
 {
 	int rc = 0;
 	struct hwrm_stat_ctx_query_input req = {.req_type = 0};
 	struct hwrm_stat_ctx_query_output *resp = bp->hwrm_cmd_resp_addr;
 
-	HWRM_PREP(req, STAT_CTX_QUERY, -1, resp);
+	HWRM_PREP(req, STAT_CTX_QUERY);
 
 	req.stat_ctx_id = rte_cpu_to_le_32(cid);
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
-	HWRM_CHECK_RESULT;
-
-	stats->q_ipackets[idx] = rte_le_to_cpu_64(resp->rx_ucast_pkts);
-	stats->q_ipackets[idx] += rte_le_to_cpu_64(resp->rx_mcast_pkts);
-	stats->q_ipackets[idx] += rte_le_to_cpu_64(resp->rx_bcast_pkts);
-	stats->q_ibytes[idx] = rte_le_to_cpu_64(resp->rx_ucast_bytes);
-	stats->q_ibytes[idx] += rte_le_to_cpu_64(resp->rx_mcast_bytes);
-	stats->q_ibytes[idx] += rte_le_to_cpu_64(resp->rx_bcast_bytes);
+	HWRM_CHECK_RESULT();
+
+	if (rx) {
+		stats->q_ipackets[idx] = rte_le_to_cpu_64(resp->rx_ucast_pkts);
+		stats->q_ipackets[idx] += rte_le_to_cpu_64(resp->rx_mcast_pkts);
+		stats->q_ipackets[idx] += rte_le_to_cpu_64(resp->rx_bcast_pkts);
+		stats->q_ibytes[idx] = rte_le_to_cpu_64(resp->rx_ucast_bytes);
+		stats->q_ibytes[idx] += rte_le_to_cpu_64(resp->rx_mcast_bytes);
+		stats->q_ibytes[idx] += rte_le_to_cpu_64(resp->rx_bcast_bytes);
+		stats->q_errors[idx] = rte_le_to_cpu_64(resp->rx_err_pkts);
+		stats->q_errors[idx] += rte_le_to_cpu_64(resp->rx_drop_pkts);
+	} else {
+		stats->q_opackets[idx] = rte_le_to_cpu_64(resp->tx_ucast_pkts);
+		stats->q_opackets[idx] += rte_le_to_cpu_64(resp->tx_mcast_pkts);
+		stats->q_opackets[idx] += rte_le_to_cpu_64(resp->tx_bcast_pkts);
+		stats->q_obytes[idx] = rte_le_to_cpu_64(resp->tx_ucast_bytes);
+		stats->q_obytes[idx] += rte_le_to_cpu_64(resp->tx_mcast_bytes);
+		stats->q_obytes[idx] += rte_le_to_cpu_64(resp->tx_bcast_bytes);
+		stats->q_errors[idx] += rte_le_to_cpu_64(resp->tx_err_pkts);
+	}
 
-	stats->q_opackets[idx] = rte_le_to_cpu_64(resp->tx_ucast_pkts);
-	stats->q_opackets[idx] += rte_le_to_cpu_64(resp->tx_mcast_pkts);
-	stats->q_opackets[idx] += rte_le_to_cpu_64(resp->tx_bcast_pkts);
-	stats->q_obytes[idx] = rte_le_to_cpu_64(resp->tx_ucast_bytes);
-	stats->q_obytes[idx] += rte_le_to_cpu_64(resp->tx_mcast_bytes);
-	stats->q_obytes[idx] += rte_le_to_cpu_64(resp->tx_bcast_bytes);
 
-	stats->q_errors[idx] = rte_le_to_cpu_64(resp->rx_err_pkts);
-	stats->q_errors[idx] += rte_le_to_cpu_64(resp->tx_err_pkts);
-	stats->q_errors[idx] += rte_le_to_cpu_64(resp->rx_drop_pkts);
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -2728,12 +2887,16 @@ int bnxt_hwrm_port_qstats(struct bnxt *bp)
 	if (!(bp->flags & BNXT_FLAG_PORT_STATS))
 		return 0;
 
-	HWRM_PREP(req, PORT_QSTATS, -1, resp);
+	HWRM_PREP(req, PORT_QSTATS);
+
 	req.port_id = rte_cpu_to_le_16(pf->port_id);
 	req.tx_stat_host_addr = rte_cpu_to_le_64(bp->hw_tx_port_stats_map);
 	req.rx_stat_host_addr = rte_cpu_to_le_64(bp->hw_rx_port_stats_map);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -2747,10 +2910,14 @@ int bnxt_hwrm_port_clr_stats(struct bnxt *bp)
 	if (!(bp->flags & BNXT_FLAG_PORT_STATS))
 		return 0;
 
-	HWRM_PREP(req, PORT_CLR_STATS, -1, resp);
+	HWRM_PREP(req, PORT_CLR_STATS);
+
 	req.port_id = rte_cpu_to_le_16(pf->port_id);
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -2763,10 +2930,11 @@ int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
 	if (BNXT_VF(bp))
 		return 0;
 
-	HWRM_PREP(req, PORT_LED_QCAPS, -1, resp);
+	HWRM_PREP(req, PORT_LED_QCAPS);
 	req.port_id = bp->pf.port_id;
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
 
 	if (resp->num_leds > 0 && resp->num_leds < BNXT_MAX_LED) {
 		unsigned int i;
@@ -2786,6 +2954,9 @@ int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
 			}
 		}
 	}
+
+	HWRM_UNLOCK();
+
 	return rc;
 }
 
@@ -2801,7 +2972,8 @@ int bnxt_hwrm_port_led_cfg(struct bnxt *bp, bool led_on)
 	if (!bp->num_leds || BNXT_VF(bp))
 		return -EOPNOTSUPP;
 
-	HWRM_PREP(req, PORT_LED_CFG, -1, resp);
+	HWRM_PREP(req, PORT_LED_CFG);
+
 	if (led_on) {
 		led_state = HWRM_PORT_LED_CFG_INPUT_LED0_STATE_BLINKALT;
 		duration = rte_cpu_to_le_16(500);
@@ -2819,8 +2991,171 @@ int bnxt_hwrm_port_led_cfg(struct bnxt *bp, bool led_on)
 	}
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
 
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return rc;
+}
+
+int bnxt_hwrm_nvm_get_dir_info(struct bnxt *bp, uint32_t *entries,
+			       uint32_t *length)
+{
+	int rc;
+	struct hwrm_nvm_get_dir_info_input req = {0};
+	struct hwrm_nvm_get_dir_info_output *resp = bp->hwrm_cmd_resp_addr;
+
+	HWRM_PREP(req, NVM_GET_DIR_INFO);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	if (!rc) {
+		*entries = rte_le_to_cpu_32(resp->entries);
+		*length = rte_le_to_cpu_32(resp->entry_length);
+	}
+	return rc;
+}
+
+int bnxt_get_nvram_directory(struct bnxt *bp, uint32_t len, uint8_t *data)
+{
+	int rc;
+	uint32_t dir_entries;
+	uint32_t entry_length;
+	uint8_t *buf;
+	size_t buflen;
+	rte_iova_t dma_handle;
+	struct hwrm_nvm_get_dir_entries_input req = {0};
+	struct hwrm_nvm_get_dir_entries_output *resp = bp->hwrm_cmd_resp_addr;
+
+	rc = bnxt_hwrm_nvm_get_dir_info(bp, &dir_entries, &entry_length);
+	if (rc != 0)
+		return rc;
+
+	*data++ = dir_entries;
+	*data++ = entry_length;
+	len -= 2;
+	memset(data, 0xff, len);
+
+	buflen = dir_entries * entry_length;
+	buf = rte_malloc("nvm_dir", buflen, 0);
+	rte_mem_lock_page(buf);
+	if (buf == NULL)
+		return -ENOMEM;
+	dma_handle = rte_mem_virt2iova(buf);
+	if (dma_handle == 0) {
+		RTE_LOG(ERR, PMD,
+			"unable to map response address to physical memory\n");
+		return -ENOMEM;
+	}
+	HWRM_PREP(req, NVM_GET_DIR_ENTRIES);
+	req.host_dest_addr = rte_cpu_to_le_64(dma_handle);
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	if (rc == 0)
+		memcpy(data, buf, len > buflen ? buflen : len);
+
+	rte_free(buf);
+
+	return rc;
+}
+
+int bnxt_hwrm_get_nvram_item(struct bnxt *bp, uint32_t index,
+			     uint32_t offset, uint32_t length,
+			     uint8_t *data)
+{
+	int rc;
+	uint8_t *buf;
+	rte_iova_t dma_handle;
+	struct hwrm_nvm_read_input req = {0};
+	struct hwrm_nvm_read_output *resp = bp->hwrm_cmd_resp_addr;
+
+	buf = rte_malloc("nvm_item", length, 0);
+	rte_mem_lock_page(buf);
+	if (!buf)
+		return -ENOMEM;
+
+	dma_handle = rte_mem_virt2iova(buf);
+	if (dma_handle == 0) {
+		RTE_LOG(ERR, PMD,
+			"unable to map response address to physical memory\n");
+		return -ENOMEM;
+	}
+	HWRM_PREP(req, NVM_READ);
+	req.host_dest_addr = rte_cpu_to_le_64(dma_handle);
+	req.dir_idx = rte_cpu_to_le_16(index);
+	req.offset = rte_cpu_to_le_32(offset);
+	req.len = rte_cpu_to_le_32(length);
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+	if (rc == 0)
+		memcpy(data, buf, length);
+
+	rte_free(buf);
+	return rc;
+}
+
+int bnxt_hwrm_erase_nvram_directory(struct bnxt *bp, uint8_t index)
+{
+	int rc;
+	struct hwrm_nvm_erase_dir_entry_input req = {0};
+	struct hwrm_nvm_erase_dir_entry_output *resp = bp->hwrm_cmd_resp_addr;
+
+	HWRM_PREP(req, NVM_ERASE_DIR_ENTRY);
+	req.dir_idx = rte_cpu_to_le_16(index);
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return rc;
+}
+
+
+int bnxt_hwrm_flash_nvram(struct bnxt *bp, uint16_t dir_type,
+			  uint16_t dir_ordinal, uint16_t dir_ext,
+			  uint16_t dir_attr, const uint8_t *data,
+			  size_t data_len)
+{
+	int rc;
+	struct hwrm_nvm_write_input req = {0};
+	struct hwrm_nvm_write_output *resp = bp->hwrm_cmd_resp_addr;
+	rte_iova_t dma_handle;
+	uint8_t *buf;
+
+	HWRM_PREP(req, NVM_WRITE);
+
+	req.dir_type = rte_cpu_to_le_16(dir_type);
+	req.dir_ordinal = rte_cpu_to_le_16(dir_ordinal);
+	req.dir_ext = rte_cpu_to_le_16(dir_ext);
+	req.dir_attr = rte_cpu_to_le_16(dir_attr);
+	req.dir_data_length = rte_cpu_to_le_32(data_len);
+
+	buf = rte_malloc("nvm_write", data_len, 0);
+	rte_mem_lock_page(buf);
+	if (!buf)
+		return -ENOMEM;
+
+	dma_handle = rte_mem_virt2iova(buf);
+	if (dma_handle == 0) {
+		RTE_LOG(ERR, PMD,
+			"unable to map response address to physical memory\n");
+		return -ENOMEM;
+	}
+	memcpy(buf, data, data_len);
+	req.host_src_addr = rte_cpu_to_le_64(dma_handle);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	rte_free(buf);
 	return rc;
 }
 
@@ -2857,28 +3192,34 @@ static int bnxt_hwrm_func_vf_vnic_query(struct bnxt *bp, uint16_t vf,
 	int rc;
 
 	/* First query all VNIC ids */
-	HWRM_PREP(req, FUNC_VF_VNIC_IDS_QUERY, -1, resp_vf_vnic_ids);
+	HWRM_PREP(req, FUNC_VF_VNIC_IDS_QUERY);
 
 	req.vf_id = rte_cpu_to_le_16(bp->pf.first_vf_id + vf);
 	req.max_vnic_id_cnt = rte_cpu_to_le_32(bp->pf.total_vnics);
-	req.vnic_id_tbl_addr = rte_cpu_to_le_64(rte_mem_virt2phy(vnic_ids));
+	req.vnic_id_tbl_addr = rte_cpu_to_le_64(rte_mem_virt2iova(vnic_ids));
 
 	if (req.vnic_id_tbl_addr == 0) {
+		HWRM_UNLOCK();
 		RTE_LOG(ERR, PMD,
 		"unable to map VNIC ID table address to physical memory\n");
 		return -ENOMEM;
 	}
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 	if (rc) {
+		HWRM_UNLOCK();
 		RTE_LOG(ERR, PMD, "hwrm_func_vf_vnic_query failed rc:%d\n", rc);
 		return -1;
 	} else if (resp->error_code) {
 		rc = rte_le_to_cpu_16(resp->error_code);
+		HWRM_UNLOCK();
 		RTE_LOG(ERR, PMD, "hwrm_func_vf_vnic_query error %d\n", rc);
 		return -1;
 	}
+	rc = rte_le_to_cpu_32(resp->vnic_id_cnt);
+
+	HWRM_UNLOCK();
 
-	return rte_le_to_cpu_32(resp->vnic_id_cnt);
+	return rc;
 }
 
 /*
@@ -2943,7 +3284,8 @@ int bnxt_hwrm_func_cfg_vf_set_vlan_anti_spoof(struct bnxt *bp, uint16_t vf,
 	struct hwrm_func_cfg_input req = {0};
 	int rc;
 
-	HWRM_PREP(req, FUNC_CFG, -1, resp);
+	HWRM_PREP(req, FUNC_CFG);
+
 	req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
 	req.enables |= rte_cpu_to_le_32(
 			HWRM_FUNC_CFG_INPUT_ENABLES_VLAN_ANTISPOOF_MODE);
@@ -2951,7 +3293,9 @@ int bnxt_hwrm_func_cfg_vf_set_vlan_anti_spoof(struct bnxt *bp, uint16_t vf,
 		HWRM_FUNC_CFG_INPUT_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN :
 		HWRM_FUNC_CFG_INPUT_VLAN_ANTISPOOF_MODE_NOCHECK;
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
-	HWRM_CHECK_RESULT;
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
 
 	return rc;
 }
@@ -3004,3 +3348,215 @@ exit:
 	rte_free(vnic_ids);
 	return -1;
 }
+
+int bnxt_hwrm_set_em_filter(struct bnxt *bp,
+			 uint16_t dst_id,
+			 struct bnxt_filter_info *filter)
+{
+	int rc = 0;
+	struct hwrm_cfa_em_flow_alloc_input req = {.req_type = 0 };
+	struct hwrm_cfa_em_flow_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+	uint32_t enables = 0;
+
+	if (filter->fw_em_filter_id != UINT64_MAX)
+		bnxt_hwrm_clear_em_filter(bp, filter);
+
+	HWRM_PREP(req, CFA_EM_FLOW_ALLOC);
+
+	req.flags = rte_cpu_to_le_32(filter->flags);
+
+	enables = filter->enables |
+	      HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_ID;
+	req.dst_id = rte_cpu_to_le_16(dst_id);
+
+	if (filter->ip_addr_type) {
+		req.ip_addr_type = filter->ip_addr_type;
+		enables |= HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IPADDR_TYPE;
+	}
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_L2_FILTER_ID)
+		req.l2_filter_id = rte_cpu_to_le_64(filter->fw_l2_filter_id);
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_MACADDR)
+		memcpy(req.src_macaddr, filter->src_macaddr,
+		       ETHER_ADDR_LEN);
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_MACADDR)
+		memcpy(req.dst_macaddr, filter->dst_macaddr,
+		       ETHER_ADDR_LEN);
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_OVLAN_VID)
+		req.ovlan_vid = filter->l2_ovlan;
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IVLAN_VID)
+		req.ivlan_vid = filter->l2_ivlan;
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_ETHERTYPE)
+		req.ethertype = rte_cpu_to_be_16(filter->ethertype);
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IP_PROTOCOL)
+		req.ip_protocol = filter->ip_protocol;
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_IPADDR)
+		req.src_ipaddr[0] = rte_cpu_to_be_32(filter->src_ipaddr[0]);
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_IPADDR)
+		req.dst_ipaddr[0] = rte_cpu_to_be_32(filter->dst_ipaddr[0]);
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_PORT)
+		req.src_port = rte_cpu_to_be_16(filter->src_port);
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_PORT)
+		req.dst_port = rte_cpu_to_be_16(filter->dst_port);
+	if (enables &
+	    HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID)
+		req.mirror_vnic_id = filter->mirror_vnic_id;
+
+	req.enables = rte_cpu_to_le_32(enables);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+
+	HWRM_CHECK_RESULT();
+
+	filter->fw_em_filter_id = rte_le_to_cpu_64(resp->em_filter_id);
+	HWRM_UNLOCK();
+
+	return rc;
+}
+
+int bnxt_hwrm_clear_em_filter(struct bnxt *bp, struct bnxt_filter_info *filter)
+{
+	int rc = 0;
+	struct hwrm_cfa_em_flow_free_input req = {.req_type = 0 };
+	struct hwrm_cfa_em_flow_free_output *resp = bp->hwrm_cmd_resp_addr;
+
+	if (filter->fw_em_filter_id == UINT64_MAX)
+		return 0;
+
+	RTE_LOG(ERR, PMD, "Clear EM filter\n");
+	HWRM_PREP(req, CFA_EM_FLOW_FREE);
+
+	req.em_filter_id = rte_cpu_to_le_64(filter->fw_em_filter_id);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	filter->fw_em_filter_id = -1;
+	filter->fw_l2_filter_id = -1;
+
+	return 0;
+}
+
+int bnxt_hwrm_set_ntuple_filter(struct bnxt *bp,
+			 uint16_t dst_id,
+			 struct bnxt_filter_info *filter)
+{
+	int rc = 0;
+	struct hwrm_cfa_ntuple_filter_alloc_input req = {.req_type = 0 };
+	struct hwrm_cfa_ntuple_filter_alloc_output *resp =
+						bp->hwrm_cmd_resp_addr;
+	uint32_t enables = 0;
+
+	if (filter->fw_ntuple_filter_id != UINT64_MAX)
+		bnxt_hwrm_clear_ntuple_filter(bp, filter);
+
+	HWRM_PREP(req, CFA_NTUPLE_FILTER_ALLOC);
+
+	req.flags = rte_cpu_to_le_32(filter->flags);
+
+	enables = filter->enables |
+	      HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_ID;
+	req.dst_id = rte_cpu_to_le_16(dst_id);
+
+
+	if (filter->ip_addr_type) {
+		req.ip_addr_type = filter->ip_addr_type;
+		enables |=
+			HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_IPADDR_TYPE;
+	}
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_L2_FILTER_ID)
+		req.l2_filter_id = rte_cpu_to_le_64(filter->fw_l2_filter_id);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_MACADDR)
+		memcpy(req.src_macaddr, filter->src_macaddr,
+		       ETHER_ADDR_LEN);
+	//if (enables &
+	    //HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_MACADDR)
+		//memcpy(req.dst_macaddr, filter->dst_macaddr,
+		       //ETHER_ADDR_LEN);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_ETHERTYPE)
+		req.ethertype = rte_cpu_to_be_16(filter->ethertype);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_IP_PROTOCOL)
+		req.ip_protocol = filter->ip_protocol;
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR)
+		req.src_ipaddr[0] = rte_cpu_to_le_32(filter->src_ipaddr[0]);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR_MASK)
+		req.src_ipaddr_mask[0] =
+			rte_cpu_to_le_32(filter->src_ipaddr_mask[0]);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR)
+		req.dst_ipaddr[0] = rte_cpu_to_le_32(filter->dst_ipaddr[0]);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR_MASK)
+		req.dst_ipaddr_mask[0] =
+			rte_cpu_to_be_32(filter->dst_ipaddr_mask[0]);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT)
+		req.src_port = rte_cpu_to_le_16(filter->src_port);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT_MASK)
+		req.src_port_mask = rte_cpu_to_le_16(filter->src_port_mask);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT)
+		req.dst_port = rte_cpu_to_le_16(filter->dst_port);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT_MASK)
+		req.dst_port_mask = rte_cpu_to_le_16(filter->dst_port_mask);
+	if (enables &
+	    HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID)
+		req.mirror_vnic_id = filter->mirror_vnic_id;
+
+	req.enables = rte_cpu_to_le_32(enables);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+
+	HWRM_CHECK_RESULT();
+
+	filter->fw_ntuple_filter_id = rte_le_to_cpu_64(resp->ntuple_filter_id);
+	HWRM_UNLOCK();
+
+	return rc;
+}
+
+int bnxt_hwrm_clear_ntuple_filter(struct bnxt *bp,
+				struct bnxt_filter_info *filter)
+{
+	int rc = 0;
+	struct hwrm_cfa_ntuple_filter_free_input req = {.req_type = 0 };
+	struct hwrm_cfa_ntuple_filter_free_output *resp =
+						bp->hwrm_cmd_resp_addr;
+
+	if (filter->fw_ntuple_filter_id == UINT64_MAX)
+		return 0;
+
+	HWRM_PREP(req, CFA_NTUPLE_FILTER_FREE);
+
+	req.ntuple_filter_id = rte_cpu_to_le_64(filter->fw_ntuple_filter_id);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	filter->fw_ntuple_filter_id = -1;
+	filter->fw_l2_filter_id = -1;
+
+	return 0;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 51cd0dd4..85083e61 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -51,9 +51,9 @@ int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic,
 int bnxt_hwrm_cfa_vlan_antispoof_cfg(struct bnxt *bp, uint16_t fid,
 			uint16_t vlan_count,
 			struct bnxt_vlan_antispoof_table_entry *vlan_table);
-int bnxt_hwrm_clear_filter(struct bnxt *bp,
+int bnxt_hwrm_clear_l2_filter(struct bnxt *bp,
 			   struct bnxt_filter_info *filter);
-int bnxt_hwrm_set_filter(struct bnxt *bp,
+int bnxt_hwrm_set_l2_filter(struct bnxt *bp,
 			 uint16_t dst_id,
 			 struct bnxt_filter_info *filter);
 int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, uint16_t target_id,
@@ -92,7 +92,7 @@ int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp,
 int bnxt_hwrm_stat_ctx_free(struct bnxt *bp,
 			    struct bnxt_cp_ring_info *cpr, unsigned int idx);
 int bnxt_hwrm_ctx_qstats(struct bnxt *bp, uint32_t cid, int idx,
-			 struct rte_eth_stats *stats);
+			 struct rte_eth_stats *stats, uint8_t rx);
 
 int bnxt_hwrm_ver_get(struct bnxt *bp);
 
@@ -156,4 +156,23 @@ int bnxt_hwrm_func_vf_vnic_query_and_config(struct bnxt *bp, uint16_t vf,
 int bnxt_hwrm_func_cfg_vf_set_vlan_anti_spoof(struct bnxt *bp, uint16_t vf,
 					      bool on);
 int bnxt_hwrm_func_qcfg_vf_dflt_vnic_id(struct bnxt *bp, int vf);
+int bnxt_hwrm_set_em_filter(struct bnxt *bp, uint16_t dst_id,
+			struct bnxt_filter_info *filter);
+int bnxt_hwrm_clear_em_filter(struct bnxt *bp, struct bnxt_filter_info *filter);
+
+int bnxt_hwrm_set_ntuple_filter(struct bnxt *bp, uint16_t dst_id,
+			 struct bnxt_filter_info *filter);
+int bnxt_hwrm_clear_ntuple_filter(struct bnxt *bp,
+				struct bnxt_filter_info *filter);
+int bnxt_get_nvram_directory(struct bnxt *bp, uint32_t len, uint8_t *data);
+int bnxt_hwrm_nvm_get_dir_info(struct bnxt *bp, uint32_t *entries,
+			       uint32_t *length);
+int bnxt_hwrm_get_nvram_item(struct bnxt *bp, uint32_t index,
+			     uint32_t offset, uint32_t length,
+			     uint8_t *data);
+int bnxt_hwrm_erase_nvram_directory(struct bnxt *bp, uint8_t index);
+int bnxt_hwrm_flash_nvram(struct bnxt *bp, uint16_t dir_type,
+			  uint16_t dir_ordinal, uint16_t dir_ext,
+			  uint16_t dir_attr, const uint8_t *data,
+			  size_t data_len);
 #endif
diff --git a/drivers/net/bnxt/bnxt_irq.c b/drivers/net/bnxt/bnxt_irq.c
index 47cda7e5..49436cfd 100644
--- a/drivers/net/bnxt/bnxt_irq.c
+++ b/drivers/net/bnxt/bnxt_irq.c
@@ -50,11 +50,18 @@ static void bnxt_int_handler(void *param)
 	struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)param;
 	struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
 	struct bnxt_cp_ring_info *cpr = bp->def_cp_ring;
-	uint32_t raw_cons = cpr->cp_raw_cons;
-	uint32_t cons;
 	struct cmpl_base *cmp;
+	uint32_t raw_cons;
+	uint32_t cons;
 
+	if (cpr == NULL)
+		return;
+
+	raw_cons = cpr->cp_raw_cons;
 	while (1) {
+		if (!cpr || !cpr->cp_ring_struct)
+			return;
+
 		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
 		cmp = &cpr->cp_desc_ring[cons];
 
diff --git a/drivers/net/bnxt/bnxt_irq.h b/drivers/net/bnxt/bnxt_irq.h
index e21bec56..4d2f7af9 100644
--- a/drivers/net/bnxt/bnxt_irq.h
+++ b/drivers/net/bnxt/bnxt_irq.h
@@ -34,6 +34,9 @@
 #ifndef _BNXT_IRQ_H_
 #define _BNXT_IRQ_H_
 
+#define BNXT_MISC_VEC_ID               RTE_INTR_VEC_ZERO_OFFSET
+#define BNXT_RX_VEC_START              RTE_INTR_VEC_RXTX_OFFSET
+
 struct bnxt_irq {
 	rte_intr_callback_fn	handler;
 	unsigned int		vector;
diff --git a/drivers/net/bnxt/bnxt_nvm_defs.h b/drivers/net/bnxt/bnxt_nvm_defs.h
new file mode 100644
index 00000000..c5ccc9bc
--- /dev/null
+++ b/drivers/net/bnxt/bnxt_nvm_defs.h
@@ -0,0 +1,75 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2014-2016 Broadcom Corporation
+ * Copyright (c) 2016-2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _BNXT_NVM_DEFS_H_
+#define _BNXT_NVM_DEFS_H_
+
+enum bnxt_nvm_directory_type {
+	BNX_DIR_TYPE_UNUSED = 0,
+	BNX_DIR_TYPE_PKG_LOG = 1,
+	BNX_DIR_TYPE_UPDATE = 2,
+	BNX_DIR_TYPE_CHIMP_PATCH = 3,
+	BNX_DIR_TYPE_BOOTCODE = 4,
+	BNX_DIR_TYPE_VPD = 5,
+	BNX_DIR_TYPE_EXP_ROM_MBA = 6,
+	BNX_DIR_TYPE_AVS = 7,
+	BNX_DIR_TYPE_PCIE = 8,
+	BNX_DIR_TYPE_PORT_MACRO = 9,
+	BNX_DIR_TYPE_APE_FW = 10,
+	BNX_DIR_TYPE_APE_PATCH = 11,
+	BNX_DIR_TYPE_KONG_FW = 12,
+	BNX_DIR_TYPE_KONG_PATCH = 13,
+	BNX_DIR_TYPE_BONO_FW = 14,
+	BNX_DIR_TYPE_BONO_PATCH = 15,
+	BNX_DIR_TYPE_TANG_FW = 16,
+	BNX_DIR_TYPE_TANG_PATCH = 17,
+	BNX_DIR_TYPE_BOOTCODE_2 = 18,
+	BNX_DIR_TYPE_CCM = 19,
+	BNX_DIR_TYPE_PCI_CFG = 20,
+	BNX_DIR_TYPE_TSCF_UCODE = 21,
+	BNX_DIR_TYPE_ISCSI_BOOT = 22,
+	BNX_DIR_TYPE_ISCSI_BOOT_IPV6 = 24,
+	BNX_DIR_TYPE_ISCSI_BOOT_IPV4N6 = 25,
+	BNX_DIR_TYPE_ISCSI_BOOT_CFG6 = 26,
+	BNX_DIR_TYPE_EXT_PHY = 27,
+	BNX_DIR_TYPE_SHARED_CFG = 40,
+	BNX_DIR_TYPE_PORT_CFG = 41,
+	BNX_DIR_TYPE_FUNC_CFG = 42,
+	BNX_DIR_TYPE_MGMT_CFG = 48,
+	BNX_DIR_TYPE_MGMT_DATA = 49,
+	BNX_DIR_TYPE_MGMT_WEB_DATA = 50,
+	BNX_DIR_TYPE_MGMT_WEB_META = 51,
+	BNX_DIR_TYPE_MGMT_EVENT_LOG = 52,
+	BNX_DIR_TYPE_MGMT_AUDIT_LOG = 53
+};
+
+#define BNX_DIR_ORDINAL_FIRST			0
+
+#define BNX_DIR_EXT_NONE			0
+#define BNX_DIR_EXT_INACTIVE			(1 << 0)
+#define BNX_DIR_EXT_UPDATE			(1 << 1)
+
+#define BNX_DIR_ATTR_NONE			0
+#define BNX_DIR_ATTR_NO_CHKSUM			(1 << 0)
+#define BNX_DIR_ATTR_PROP_STREAM		(1 << 1)
+
+#define BNX_PKG_LOG_MAX_LENGTH			4096
+
+enum bnxnvm_pkglog_field_index {
+	BNX_PKG_LOG_FIELD_IDX_INSTALLED_TIMESTAMP	= 0,
+	BNX_PKG_LOG_FIELD_IDX_PKG_DESCRIPTION		= 1,
+	BNX_PKG_LOG_FIELD_IDX_PKG_VERSION		= 2,
+	BNX_PKG_LOG_FIELD_IDX_PKG_TIMESTAMP		= 3,
+	BNX_PKG_LOG_FIELD_IDX_PKG_CHECKSUM		= 4,
+	BNX_PKG_LOG_FIELD_IDX_INSTALLED_ITEMS		= 5,
+	BNX_PKG_LOG_FIELD_IDX_INSTALLED_MASK		= 6
+};
+
+#endif				/* Don't add anything after this line */
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index 9d0ae277..0fa2f0c0 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -98,7 +98,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 	struct rte_pci_device *pdev = bp->pdev;
 	const struct rte_memzone *mz = NULL;
 	char mz_name[RTE_MEMZONE_NAMESIZE];
-	phys_addr_t mz_phys_addr;
+	rte_iova_t mz_phys_addr;
 	int sz;
 
 	int stats_len = (tx_ring_info || rx_ring_info) ?
@@ -172,15 +172,15 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 			return -ENOMEM;
 	}
 	memset(mz->addr, 0, mz->len);
-	mz_phys_addr = mz->phys_addr;
+	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
 		RTE_LOG(WARNING, PMD,
 			"Memzone physical address same as virtual.\n");
 		RTE_LOG(WARNING, PMD,
-			"Using rte_mem_virt2phy()\n");
+			"Using rte_mem_virt2iova()\n");
 		for (sz = 0; sz < total_alloc_len; sz += getpagesize())
 			rte_mem_lock_page(((char *)mz->addr) + sz);
-		mz_phys_addr = rte_mem_virt2phy(mz->addr);
+		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == 0) {
 			RTE_LOG(ERR, PMD,
 			"unable to map ring address to physical memory\n");
@@ -231,7 +231,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 		rx_ring->bd = ((char *)mz->addr + ag_ring_start);
 		rx_ring_info->ag_desc_ring =
 		    (struct rx_prod_pkt_bd *)rx_ring->bd;
-		rx_ring->bd_dma = mz->phys_addr + ag_ring_start;
+		rx_ring->bd_dma = mz->iova + ag_ring_start;
 		rx_ring_info->ag_desc_mapping = rx_ring->bd_dma;
 		rx_ring->mem_zone = (const void *)mz;
 
@@ -323,8 +323,10 @@ int bnxt_alloc_hwrm_rings(struct bnxt *bp)
 
 		ring = rxr->ag_ring_struct;
 		/* Agg ring */
-		if (ring == NULL)
+		if (ring == NULL) {
 			RTE_LOG(ERR, PMD, "Alloc AGG Ring is NULL!\n");
+			goto err_out;
+		}
 
 		rc = bnxt_hwrm_ring_alloc(bp, ring,
 				HWRM_RING_ALLOC_INPUT_RING_TYPE_RX,
diff --git a/drivers/net/bnxt/bnxt_ring.h b/drivers/net/bnxt/bnxt_ring.h
index 6d1eb588..164f482e 100644
--- a/drivers/net/bnxt/bnxt_ring.h
+++ b/drivers/net/bnxt/bnxt_ring.h
@@ -41,7 +41,7 @@
 #define RING_NEXT(ring, idx)		(((idx) + 1) & (ring)->ring_mask)
 
 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
-	((uint64_t)((mb)->buf_physaddr + (mb)->data_off))
+	((uint64_t)((mb)->buf_iova + (mb)->data_off))
 
 #define DB_IDX_MASK						0xffffff
 #define DB_IDX_VALID						(0x1 << 26)
@@ -70,7 +70,7 @@
 
 struct bnxt_ring {
 	void			*bd;
-	phys_addr_t		bd_dma;
+	rte_iova_t		bd_dma;
 	uint32_t		ring_size;
 	uint32_t		ring_mask;
 
diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index 0793820b..c4da474e 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -60,10 +60,13 @@ void bnxt_free_rxq_stats(struct bnxt_rx_queue *rxq)
 int bnxt_mq_rx_configure(struct bnxt *bp)
 {
 	struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf;
-	unsigned int i, j, nb_q_per_grp, ring_idx;
-	int start_grp_id, end_grp_id, rc = 0;
+	const struct rte_eth_vmdq_rx_conf *conf =
+		    &dev_conf->rx_adv_conf.vmdq_rx_conf;
+	unsigned int i, j, nb_q_per_grp = 1, ring_idx = 0;
+	int start_grp_id, end_grp_id = 1, rc = 0;
 	struct bnxt_vnic_info *vnic;
 	struct bnxt_filter_info *filter;
+	enum rte_eth_nb_pools pools = bp->rx_cp_nr_rings, max_pools = 0;
 	struct bnxt_rx_queue *rxq;
 
 	bp->nr_vnics = 0;
@@ -98,117 +101,125 @@ int bnxt_mq_rx_configure(struct bnxt *bp)
 	}
 
 	/* Multi-queue mode */
-	if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_VMDQ_FLAG) {
+	if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_VMDQ_DCB_RSS) {
 		/* VMDq ONLY, VMDq+RSS, VMDq+DCB, VMDq+DCB+RSS */
-		enum rte_eth_nb_pools pools;
 
 		switch (dev_conf->rxmode.mq_mode) {
 		case ETH_MQ_RX_VMDQ_RSS:
 		case ETH_MQ_RX_VMDQ_ONLY:
-			{
-				const struct rte_eth_vmdq_rx_conf *conf =
-				    &dev_conf->rx_adv_conf.vmdq_rx_conf;
-
-				/* ETH_8/64_POOLs */
-				pools = conf->nb_queue_pools;
-				break;
-			}
+			/* ETH_8/64_POOLs */
+			pools = conf->nb_queue_pools;
+			/* For each pool, allocate MACVLAN CFA rule & VNIC */
+			max_pools = RTE_MIN(bp->max_vnics,
+					    RTE_MIN(bp->max_l2_ctx,
+					    RTE_MIN(bp->max_rsscos_ctx,
+						    ETH_64_POOLS)));
+			if (pools > max_pools)
+				pools = max_pools;
+			break;
+		case ETH_MQ_RX_RSS:
+			pools = bp->rx_cp_nr_rings;
+			break;
 		default:
 			RTE_LOG(ERR, PMD, "Unsupported mq_mod %d\n",
 				dev_conf->rxmode.mq_mode);
 			rc = -EINVAL;
 			goto err_out;
 		}
-		/* For each pool, allocate MACVLAN CFA rule & VNIC */
-		if (!pools) {
-			pools = RTE_MIN(bp->max_vnics,
-			    RTE_MIN(bp->max_l2_ctx,
-			     RTE_MIN(bp->max_rsscos_ctx, ETH_64_POOLS)));
-			RTE_LOG(ERR, PMD,
-				"VMDq pool not set, defaulted to 64\n");
-			pools = ETH_64_POOLS;
+	}
+
+	nb_q_per_grp = bp->rx_cp_nr_rings / pools;
+	start_grp_id = 0;
+	end_grp_id = nb_q_per_grp;
+
+	for (i = 0; i < pools; i++) {
+		vnic = bnxt_alloc_vnic(bp);
+		if (!vnic) {
+			RTE_LOG(ERR, PMD, "VNIC alloc failed\n");
+			rc = -ENOMEM;
+			goto err_out;
 		}
-		nb_q_per_grp = bp->rx_cp_nr_rings / pools;
-		start_grp_id = 0;
-		end_grp_id = nb_q_per_grp;
-
-		ring_idx = 0;
-		for (i = 0; i < pools; i++) {
-			vnic = bnxt_alloc_vnic(bp);
-			if (!vnic) {
-				RTE_LOG(ERR, PMD,
-					"VNIC alloc failed\n");
-				rc = -ENOMEM;
-				goto err_out;
-			}
-			vnic->flags |= BNXT_VNIC_INFO_BCAST;
-			STAILQ_INSERT_TAIL(&bp->ff_pool[i], vnic, next);
-			bp->nr_vnics++;
+		vnic->flags |= BNXT_VNIC_INFO_BCAST;
+		STAILQ_INSERT_TAIL(&bp->ff_pool[i], vnic, next);
+		bp->nr_vnics++;
 
-			for (j = 0; j < nb_q_per_grp; j++, ring_idx++) {
-				rxq = bp->eth_dev->data->rx_queues[ring_idx];
-				rxq->vnic = vnic;
-			}
-			if (i == 0)
-				vnic->func_default = true;
-			vnic->ff_pool_idx = i;
-			vnic->start_grp_id = start_grp_id;
-			vnic->end_grp_id = end_grp_id;
-
-			filter = bnxt_alloc_filter(bp);
-			if (!filter) {
-				RTE_LOG(ERR, PMD,
-					"L2 filter alloc failed\n");
-				rc = -ENOMEM;
-				goto err_out;
+		for (j = 0; j < nb_q_per_grp; j++, ring_idx++) {
+			rxq = bp->eth_dev->data->rx_queues[ring_idx];
+			rxq->vnic = vnic;
+		}
+		if (i == 0) {
+			if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_VMDQ_DCB) {
+				bp->eth_dev->data->promiscuous = 1;
+				vnic->flags |= BNXT_VNIC_INFO_PROMISC;
 			}
-			/*
-			 * TODO: Configure & associate CFA rule for
-			 * each VNIC for each VMDq with MACVLAN, MACVLAN+TC
-			 */
-			STAILQ_INSERT_TAIL(&vnic->filter, filter, next);
-
-			start_grp_id = end_grp_id + 1;
-			end_grp_id += nb_q_per_grp;
+			vnic->func_default = true;
 		}
-		goto out;
-	}
+		vnic->ff_pool_idx = i;
+		vnic->start_grp_id = start_grp_id;
+		vnic->end_grp_id = end_grp_id;
+
+		if (i) {
+			if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_VMDQ_DCB ||
+			    !(dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS))
+				vnic->rss_dflt_cr = true;
+			goto skip_filter_allocation;
+		}
+		filter = bnxt_alloc_filter(bp);
+		if (!filter) {
+			RTE_LOG(ERR, PMD, "L2 filter alloc failed\n");
+			rc = -ENOMEM;
+			goto err_out;
+		}
+		/*
+		 * TODO: Configure & associate CFA rule for
+		 * each VNIC for each VMDq with MACVLAN, MACVLAN+TC
+		 */
+		STAILQ_INSERT_TAIL(&vnic->filter, filter, next);
 
-	/* Non-VMDq mode - RSS, DCB, RSS+DCB */
-	/* Init default VNIC for RSS or DCB only */
-	vnic = bnxt_alloc_vnic(bp);
-	if (!vnic) {
-		RTE_LOG(ERR, PMD, "VNIC alloc failed\n");
-		rc = -ENOMEM;
-		goto err_out;
-	}
-	vnic->flags |= BNXT_VNIC_INFO_BCAST;
-	/* Partition the rx queues for the single pool */
-	for (i = 0; i < bp->rx_cp_nr_rings; i++) {
-		rxq = bp->eth_dev->data->rx_queues[i];
-		rxq->vnic = vnic;
-	}
-	STAILQ_INSERT_TAIL(&bp->ff_pool[0], vnic, next);
-	bp->nr_vnics++;
-
-	vnic->func_default = true;
-	vnic->ff_pool_idx = 0;
-	vnic->start_grp_id = 0;
-	vnic->end_grp_id = bp->rx_cp_nr_rings;
-	filter = bnxt_alloc_filter(bp);
-	if (!filter) {
-		RTE_LOG(ERR, PMD, "L2 filter alloc failed\n");
-		rc = -ENOMEM;
-		goto err_out;
+skip_filter_allocation:
+		start_grp_id = end_grp_id;
+		end_grp_id += nb_q_per_grp;
 	}
-	STAILQ_INSERT_TAIL(&vnic->filter, filter, next);
-
-	if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)
-		vnic->hash_type =
-			HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV4 |
-			HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV6;
 
 out:
+	if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
+		struct rte_eth_rss_conf *rss = &dev_conf->rx_adv_conf.rss_conf;
+		uint16_t hash_type = 0;
+
+		if (bp->flags & BNXT_FLAG_UPDATE_HASH) {
+			rss = &bp->rss_conf;
+			bp->flags &= ~BNXT_FLAG_UPDATE_HASH;
+		}
+
+		if (rss->rss_hf & ETH_RSS_IPV4)
+			hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV4;
+		if (rss->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
+			hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV4;
+		if (rss->rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
+			hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV4;
+		if (rss->rss_hf & ETH_RSS_IPV6)
+			hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV6;
+		if (rss->rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
+			hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV6;
+		if (rss->rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
+			hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV6;
+
+		for (i = 0; i < bp->nr_vnics; i++) {
+			STAILQ_FOREACH(vnic, &bp->ff_pool[i], next) {
+			vnic->hash_type = hash_type;
+
+			/*
+			 * Use the supplied key if the key length is
+			 * acceptable and the rss_key is not NULL
+			 */
+			if (rss->rss_key &&
+			    rss->rss_key_len <= HW_HASH_KEY_SIZE)
+				memcpy(vnic->rss_hash_key,
+				       rss->rss_key, rss->rss_key_len);
+			}
+		}
+	}
+
 	return rc;
 
 err_out:
@@ -349,3 +360,41 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 out:
 	return rc;
 }
+
+int
+bnxt_rx_queue_intr_enable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
+{
+	struct bnxt_rx_queue *rxq;
+	struct bnxt_cp_ring_info *cpr;
+	int rc = 0;
+
+	if (eth_dev->data->rx_queues) {
+		rxq = eth_dev->data->rx_queues[queue_id];
+		if (!rxq) {
+			rc = -EINVAL;
+			return rc;
+		}
+		cpr = rxq->cp_ring;
+		B_CP_DB_ARM(cpr);
+	}
+	return rc;
+}
+
+int
+bnxt_rx_queue_intr_disable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
+{
+	struct bnxt_rx_queue *rxq;
+	struct bnxt_cp_ring_info *cpr;
+	int rc = 0;
+
+	if (eth_dev->data->rx_queues) {
+		rxq = eth_dev->data->rx_queues[queue_id];
+		if (!rxq) {
+			rc = -EINVAL;
+			return rc;
+		}
+		cpr = rxq->cp_ring;
+		B_CP_DB_DISARM(cpr);
+	}
+	return rc;
+}
diff --git a/drivers/net/bnxt/bnxt_rxq.h b/drivers/net/bnxt/bnxt_rxq.h
index 01aaa007..508731ee 100644
--- a/drivers/net/bnxt/bnxt_rxq.h
+++ b/drivers/net/bnxt/bnxt_rxq.h
@@ -48,7 +48,7 @@ struct bnxt_rx_queue {
 	uint16_t		rx_free_thresh; /* max free RX desc to hold */
 	uint16_t		queue_id; /* RX queue index */
 	uint16_t		reg_idx; /* RX queue register index */
-	uint8_t			port_id; /* Device port identifier */
+	uint16_t		port_id; /* Device port identifier */
 	uint8_t			crc_len; /* 0 if CRC stripped, 4 otherwise */
 
 	struct bnxt		*bp;
@@ -73,5 +73,9 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 			       const struct rte_eth_rxconf *rx_conf,
 			       struct rte_mempool *mp);
 void bnxt_free_rx_mbufs(struct bnxt *bp);
+int bnxt_rx_queue_intr_enable_op(struct rte_eth_dev *eth_dev,
+				 uint16_t queue_id);
+int bnxt_rx_queue_intr_disable_op(struct rte_eth_dev *eth_dev,
+				  uint16_t queue_id);
 
 #endif
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index bee67d33..30891b74 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -199,7 +199,7 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq,
 	if (tpa_start1->flags2 &
 	    rte_cpu_to_le_32(RX_TPA_START_CMPL_FLAGS2_META_FORMAT_VLAN)) {
 		mbuf->vlan_tci = rte_le_to_cpu_32(tpa_start1->metadata);
-		mbuf->ol_flags |= PKT_RX_VLAN_PKT;
+		mbuf->ol_flags |= PKT_RX_VLAN;
 	}
 	if (likely(tpa_start1->flags2 &
 		   rte_cpu_to_le_32(RX_TPA_START_CMPL_FLAGS2_L4_CS_CALC)))
@@ -219,6 +219,9 @@ static int bnxt_agg_bufs_valid(struct bnxt_cp_ring_info *cpr,
 	raw_cp_cons = ADV_RAW_CMP(raw_cp_cons, agg_bufs);
 	last_cp_cons = RING_CMP(cpr->cp_ring_struct, raw_cp_cons);
 	agg_cmpl = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[last_cp_cons];
+	cpr->valid = FLIP_VALID(raw_cp_cons,
+				cpr->cp_ring_struct->ring_mask,
+				cpr->valid);
 	return CMP_VALID(agg_cmpl, raw_cp_cons, cpr->cp_ring_struct);
 }
 
@@ -332,6 +335,48 @@ static inline struct rte_mbuf *bnxt_tpa_end(
 	return mbuf;
 }
 
+static uint32_t
+bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
+{
+	uint32_t pkt_type = 0;
+	uint32_t t_ipcs = 0, ip = 0, ip6 = 0;
+	uint32_t tcp = 0, udp = 0, icmp = 0;
+	uint32_t vlan = 0;
+
+	vlan = !!(rxcmp1->flags2 &
+		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN));
+	t_ipcs = !!(rxcmp1->flags2 &
+		rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC));
+	ip6 = !!(rxcmp1->flags2 &
+		 rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_TYPE));
+	icmp = !!(rxcmp->flags_type &
+		  rte_cpu_to_le_16(RX_PKT_CMPL_FLAGS_ITYPE_ICMP));
+	tcp = !!(rxcmp->flags_type &
+		 rte_cpu_to_le_16(RX_PKT_CMPL_FLAGS_ITYPE_TCP));
+	udp = !!(rxcmp->flags_type &
+		 rte_cpu_to_le_16(RX_PKT_CMPL_FLAGS_ITYPE_UDP));
+	ip = !!(rxcmp->flags_type &
+		rte_cpu_to_le_16(RX_PKT_CMPL_FLAGS_ITYPE_IP));
+
+	pkt_type |= ((ip || tcp || udp || icmp) && !t_ipcs && !ip6) ?
+		RTE_PTYPE_L3_IPV4_EXT_UNKNOWN : 0;
+	pkt_type |= ((ip || tcp || udp || icmp) && !t_ipcs && ip6) ?
+		RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : 0;
+	pkt_type |= (!t_ipcs &&  icmp) ? RTE_PTYPE_L4_ICMP : 0;
+	pkt_type |= (!t_ipcs &&  udp) ? RTE_PTYPE_L4_UDP : 0;
+	pkt_type |= (!t_ipcs &&  tcp) ? RTE_PTYPE_L4_TCP : 0;
+	pkt_type |= ((ip || tcp || udp || icmp) && t_ipcs && !ip6) ?
+		RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN : 0;
+	pkt_type |= ((ip || tcp || udp || icmp) && t_ipcs && ip6) ?
+		RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN : 0;
+	pkt_type |= (t_ipcs &&  icmp) ? RTE_PTYPE_INNER_L4_ICMP : 0;
+	pkt_type |= (t_ipcs &&  udp) ? RTE_PTYPE_INNER_L4_UDP : 0;
+	pkt_type |= (t_ipcs &&  tcp) ? RTE_PTYPE_INNER_L4_TCP : 0;
+	pkt_type |= vlan ? RTE_PTYPE_L2_ETHER_VLAN : 0;
+
+	return pkt_type;
+}
+
 static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 			    struct bnxt_rx_queue *rxq, uint32_t *raw_cons)
 {
@@ -360,13 +405,17 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 	if (!CMP_VALID(rxcmp1, tmp_raw_cons, cpr->cp_ring_struct))
 		return -EBUSY;
 
+	cpr->valid = FLIP_VALID(cp_cons,
+				cpr->cp_ring_struct->ring_mask,
+				cpr->valid);
+
 	cmp_type = CMP_TYPE(rxcmp);
-	if (cmp_type == RX_PKT_CMPL_TYPE_RX_L2_TPA_START) {
+	if (cmp_type == RX_TPA_START_CMPL_TYPE_RX_TPA_START) {
 		bnxt_tpa_start(rxq, (struct rx_tpa_start_cmpl *)rxcmp,
 			       (struct rx_tpa_start_cmpl_hi *)rxcmp1);
 		rc = -EINVAL; /* Continue w/o new mbuf */
 		goto next_rx;
-	} else if (cmp_type == RX_PKT_CMPL_TYPE_RX_L2_TPA_END) {
+	} else if (cmp_type == RX_TPA_END_CMPL_TYPE_RX_TPA_END) {
 		mbuf = bnxt_tpa_end(rxq, &tmp_raw_cons,
 				   (struct rx_tpa_end_cmpl *)rxcmp,
 				   (struct rx_tpa_end_cmpl_hi *)rxcmp1);
@@ -388,10 +437,10 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 
 	cons = rxcmp->opaque;
 	mbuf = bnxt_consume_rx_buf(rxr, cons);
-	rte_prefetch0(mbuf);
-
 	if (mbuf == NULL)
-		return -ENOMEM;
+		return -EBUSY;
+
+	rte_prefetch0(mbuf);
 
 	mbuf->nb_segs = 1;
 	mbuf->next = NULL;
@@ -415,9 +464,21 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 			(RX_PKT_CMPL_METADATA_VID_MASK |
 			RX_PKT_CMPL_METADATA_DE |
 			RX_PKT_CMPL_METADATA_PRI_MASK);
-		mbuf->ol_flags |= PKT_RX_VLAN_PKT;
+		mbuf->ol_flags |= PKT_RX_VLAN;
 	}
 
+	if (likely(RX_CMP_IP_CS_OK(rxcmp1)))
+		mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+	else
+		mbuf->ol_flags |= PKT_RX_IP_CKSUM_NONE;
+
+	if (likely(RX_CMP_L4_CS_OK(rxcmp1)))
+		mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+	else
+		mbuf->ol_flags |= PKT_RX_L4_CKSUM_NONE;
+
+	mbuf->packet_type = bnxt_parse_pkt_type(rxcmp, rxcmp1);
+
 #ifdef BNXT_DEBUG
 	if (rxcmp1->errors_v2 & RX_CMP_L2_ERRORS) {
 		/* Re-install the mbuf back to the rx ring */
@@ -448,13 +509,14 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 	if (bnxt_alloc_rx_data(rxq, rxr, prod)) {
 		RTE_LOG(ERR, PMD, "mbuf alloc failed with prod=0x%x\n", prod);
 		rc = -ENOMEM;
+		goto rx;
 	}
 	rxr->rx_prod = prod;
 	/*
 	 * All MBUFs are allocated with the same size under DPDK,
 	 * no optimization for rx_copy_thresh
 	 */
-
+rx:
 	*rx_pkt = mbuf;
 
 next_rx:
@@ -476,22 +538,24 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	struct rx_pkt_cmpl *rxcmp;
 	uint16_t prod = rxr->rx_prod;
 	uint16_t ag_prod = rxr->ag_prod;
+	int rc = 0;
 
 	/* Handle RX burst request */
 	while (1) {
-		int rc;
-
 		cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
 		rte_prefetch0(&cpr->cp_desc_ring[cons]);
 		rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
 
 		if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct))
 			break;
+		cpr->valid = FLIP_VALID(cons,
+					cpr->cp_ring_struct->ring_mask,
+					cpr->valid);
 
 		/* TODO: Avoid magic numbers... */
 		if ((CMP_TYPE(rxcmp) & 0x30) == 0x10) {
 			rc = bnxt_rx_pkt(&rx_pkts[nb_rx_pkts], rxq, &raw_cons);
-			if (likely(!rc))
+			if (likely(!rc) || rc == -ENOMEM)
 				nb_rx_pkts++;
 			if (rc == -EBUSY)	/* partial completion */
 				break;
@@ -514,6 +578,30 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	B_RX_DB(rxr->rx_doorbell, rxr->rx_prod);
 	/* Ring the AGG ring DB */
 	B_RX_DB(rxr->ag_doorbell, rxr->ag_prod);
+
+	/* Attempt to alloc Rx buf in case of a previous allocation failure. */
+	if (rc == -ENOMEM) {
+		int i;
+
+		for (i = prod; i <= nb_rx_pkts;
+			i = RING_NEXT(rxr->rx_ring_struct, i)) {
+			struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i];
+
+			/* Buffer already allocated for this index. */
+			if (rx_buf->mbuf != NULL)
+				continue;
+
+			/* This slot is empty. Alloc buffer for Rx */
+			if (!bnxt_alloc_rx_data(rxq, rxr, i)) {
+				rxr->rx_prod = i;
+				B_RX_DB(rxr->rx_doorbell, rxr->rx_prod);
+			} else {
+				RTE_LOG(ERR, PMD, "Alloc  mbuf failed\n");
+				break;
+			}
+		}
+	}
+
 	return nb_rx_pkts;
 }
 
diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
index f8d6dc80..a94373d1 100644
--- a/drivers/net/bnxt/bnxt_rxr.h
+++ b/drivers/net/bnxt/bnxt_rxr.h
@@ -52,6 +52,22 @@
 #define BNXT_TPA_OUTER_L3_OFF(hdr_info)	\
 	((hdr_info) & 0x1ff)
 
+#define RX_CMP_L4_CS_BITS	rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_L4_CS_CALC)
+
+#define RX_CMP_L4_CS_ERR_BITS	rte_cpu_to_le_32(RX_PKT_CMPL_ERRORS_L4_CS_ERROR)
+
+#define RX_CMP_L4_CS_OK(rxcmp1)						\
+	    (((rxcmp1)->flags2 & RX_CMP_L4_CS_BITS) &&		\
+	     !((rxcmp1)->errors_v2 & RX_CMP_L4_CS_ERR_BITS))
+
+#define RX_CMP_IP_CS_ERR_BITS	rte_cpu_to_le_32(RX_PKT_CMPL_ERRORS_IP_CS_ERROR)
+
+#define RX_CMP_IP_CS_BITS	rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_CS_CALC)
+
+#define RX_CMP_IP_CS_OK(rxcmp1)						\
+		(((rxcmp1)->flags2 & RX_CMP_IP_CS_BITS) &&	\
+		!((rxcmp1)->errors_v2 & RX_CMP_IP_CS_ERR_BITS))
+
 enum pkt_hash_types {
 	PKT_HASH_TYPE_NONE,	/* Undefined type */
 	PKT_HASH_TYPE_L2,	/* Input: src_MAC, dest_MAC */
@@ -85,8 +101,8 @@ struct bnxt_rx_ring_info {
 	struct bnxt_sw_rx_bd	*rx_buf_ring; /* sw ring */
 	struct bnxt_sw_rx_bd	*ag_buf_ring; /* sw ring */
 
-	phys_addr_t		rx_desc_mapping;
-	phys_addr_t		ag_desc_mapping;
+	rte_iova_t		rx_desc_mapping;
+	rte_iova_t		ag_desc_mapping;
 
 	struct bnxt_ring	*rx_ring_struct;
 	struct bnxt_ring	*ag_ring_struct;
diff --git a/drivers/net/bnxt/bnxt_stats.c b/drivers/net/bnxt/bnxt_stats.c
index d7d0e35c..fe83d370 100644
--- a/drivers/net/bnxt/bnxt_stats.c
+++ b/drivers/net/bnxt/bnxt_stats.c
@@ -228,9 +228,10 @@ void bnxt_free_stats(struct bnxt *bp)
 	}
 }
 
-void bnxt_stats_get_op(struct rte_eth_dev *eth_dev,
+int bnxt_stats_get_op(struct rte_eth_dev *eth_dev,
 			   struct rte_eth_stats *bnxt_stats)
 {
+	int rc = 0;
 	unsigned int i;
 	struct bnxt *bp = eth_dev->data->dev_private;
 
@@ -240,17 +241,26 @@ void bnxt_stats_get_op(struct rte_eth_dev *eth_dev,
 		struct bnxt_rx_queue *rxq = bp->rx_queues[i];
 		struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
 
-		bnxt_hwrm_ctx_qstats(bp, cpr->hw_stats_ctx_id, i, bnxt_stats);
+		rc = bnxt_hwrm_ctx_qstats(bp, cpr->hw_stats_ctx_id, i,
+				     bnxt_stats, 1);
+		if (unlikely(rc))
+			return rc;
 	}
 
 	for (i = 0; i < bp->tx_cp_nr_rings; i++) {
 		struct bnxt_tx_queue *txq = bp->tx_queues[i];
 		struct bnxt_cp_ring_info *cpr = txq->cp_ring;
 
-		bnxt_hwrm_ctx_qstats(bp, cpr->hw_stats_ctx_id, i, bnxt_stats);
+		rc = bnxt_hwrm_ctx_qstats(bp, cpr->hw_stats_ctx_id, i,
+				     bnxt_stats, 0);
+		if (unlikely(rc))
+			return rc;
 	}
-	bnxt_hwrm_func_qstats(bp, 0xffff, bnxt_stats);
+	rc = bnxt_hwrm_func_qstats(bp, 0xffff, bnxt_stats);
+	if (unlikely(rc))
+		return rc;
 	bnxt_stats->rx_nombuf = rte_atomic64_read(&bp->rx_mbuf_alloc_fail);
+	return rc;
 }
 
 void bnxt_stats_reset_op(struct rte_eth_dev *eth_dev)
@@ -358,3 +368,54 @@ void bnxt_dev_xstats_reset_op(struct rte_eth_dev *eth_dev)
 	if (!(bp->flags & BNXT_FLAG_PORT_STATS))
 		RTE_LOG(ERR, PMD, "Operation not supported\n");
 }
+
+int bnxt_dev_xstats_get_by_id_op(struct rte_eth_dev *dev, const uint64_t *ids,
+		uint64_t *values, unsigned int limit)
+{
+	/* Account for the Tx drop pkts aka the Anti spoof counter */
+	const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
+				RTE_DIM(bnxt_tx_stats_strings) + 1;
+	struct rte_eth_xstat xstats[stat_cnt];
+	uint64_t values_copy[stat_cnt];
+	uint16_t i;
+
+	if (!ids)
+		return bnxt_dev_xstats_get_op(dev, xstats, stat_cnt);
+
+	bnxt_dev_xstats_get_by_id_op(dev, NULL, values_copy, stat_cnt);
+	for (i = 0; i < limit; i++) {
+		if (ids[i] >= stat_cnt) {
+			RTE_LOG(ERR, PMD, "id value isn't valid");
+			return -1;
+		}
+		values[i] = values_copy[ids[i]];
+	}
+	return stat_cnt;
+}
+
+int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev,
+				struct rte_eth_xstat_name *xstats_names,
+				const uint64_t *ids, unsigned int limit)
+{
+	/* Account for the Tx drop pkts aka the Anti spoof counter */
+	const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
+				RTE_DIM(bnxt_tx_stats_strings) + 1;
+	struct rte_eth_xstat_name xstats_names_copy[stat_cnt];
+	uint16_t i;
+
+	if (!ids)
+		return bnxt_dev_xstats_get_names_op(dev, xstats_names,
+						    stat_cnt);
+	bnxt_dev_xstats_get_names_by_id_op(dev, xstats_names_copy, NULL,
+			stat_cnt);
+
+	for (i = 0; i < limit; i++) {
+		if (ids[i] >= stat_cnt) {
+			RTE_LOG(ERR, PMD, "id value isn't valid");
+			return -1;
+		}
+		strcpy(xstats_names[i].name,
+				xstats_names_copy[ids[i]].name);
+	}
+	return stat_cnt;
+}
diff --git a/drivers/net/bnxt/bnxt_stats.h b/drivers/net/bnxt/bnxt_stats.h
index b6d133ef..51d16f5d 100644
--- a/drivers/net/bnxt/bnxt_stats.h
+++ b/drivers/net/bnxt/bnxt_stats.h
@@ -37,7 +37,7 @@
 #include <rte_ethdev.h>
 
 void bnxt_free_stats(struct bnxt *bp);
-void bnxt_stats_get_op(struct rte_eth_dev *eth_dev,
+int bnxt_stats_get_op(struct rte_eth_dev *eth_dev,
 			   struct rte_eth_stats *bnxt_stats);
 void bnxt_stats_reset_op(struct rte_eth_dev *eth_dev);
 int bnxt_dev_xstats_get_names_op(__rte_unused struct rte_eth_dev *eth_dev,
@@ -46,6 +46,11 @@ int bnxt_dev_xstats_get_names_op(__rte_unused struct rte_eth_dev *eth_dev,
 int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
 			   struct rte_eth_xstat *xstats, unsigned int n);
 void bnxt_dev_xstats_reset_op(struct rte_eth_dev *eth_dev);
+int bnxt_dev_xstats_get_by_id_op(struct rte_eth_dev *dev, const uint64_t *ids,
+				uint64_t *values, unsigned int limit);
+int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev,
+				struct rte_eth_xstat_name *xstats_names,
+				const uint64_t *ids, unsigned int limit);
 
 struct bnxt_xstats_name_off {
 	char name[RTE_ETH_XSTATS_NAME_SIZE];
diff --git a/drivers/net/bnxt/bnxt_txq.h b/drivers/net/bnxt/bnxt_txq.h
index 16f3a0bd..f753c10f 100644
--- a/drivers/net/bnxt/bnxt_txq.h
+++ b/drivers/net/bnxt/bnxt_txq.h
@@ -46,7 +46,7 @@ struct bnxt_tx_queue {
 	uint16_t		tx_next_rs; /* next desc to set RS bit */
 	uint16_t		queue_id; /* TX queue index */
 	uint16_t		reg_idx; /* TX queue register index */
-	uint8_t			port_id; /* Device port identifier */
+	uint16_t		port_id; /* Device port identifier */
 	uint8_t			pthresh; /* Prefetch threshold register */
 	uint8_t			hthresh; /* Host threshold register */
 	uint8_t			wthresh; /* Write-back threshold reg */
diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
index 6870b16d..8ca4bbd8 100644
--- a/drivers/net/bnxt/bnxt_txr.c
+++ b/drivers/net/bnxt/bnxt_txr.c
@@ -161,7 +161,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 
 	if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
 				PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
-				PKT_TX_VLAN_PKT))
+				PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM))
 		long_bd = true;
 
 	tx_buf = &txr->tx_buf_ring[txr->tx_prod];
@@ -211,21 +211,39 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 
 		if (tx_pkt->ol_flags & PKT_TX_TCP_SEG) {
 			/* TSO */
-			txbd1->lflags = TX_BD_LONG_LFLAGS_LSO;
+			txbd1->lflags |= TX_BD_LONG_LFLAGS_LSO;
 			txbd1->hdr_size = tx_pkt->l2_len + tx_pkt->l3_len +
 					tx_pkt->l4_len + tx_pkt->outer_l2_len +
 					tx_pkt->outer_l3_len;
 			txbd1->mss = tx_pkt->tso_segsz;
 
-		} else if (tx_pkt->ol_flags & (PKT_TX_TCP_CKSUM |
-					PKT_TX_UDP_CKSUM)) {
+		} else if (tx_pkt->ol_flags & PKT_TX_OIP_IIP_TCP_UDP_CKSUM) {
+			/* Outer IP, Inner IP, Inner TCP/UDP CSO */
+			txbd1->lflags |= TX_BD_FLG_TIP_IP_TCP_UDP_CHKSUM;
+			txbd1->mss = 0;
+		} else if (tx_pkt->ol_flags & PKT_TX_IIP_TCP_UDP_CKSUM) {
+			/* (Inner) IP, (Inner) TCP/UDP CSO */
+			txbd1->lflags |= TX_BD_FLG_IP_TCP_UDP_CHKSUM;
+			txbd1->mss = 0;
+		} else if (tx_pkt->ol_flags & PKT_TX_OIP_TCP_UDP_CKSUM) {
+			/* Outer IP, (Inner) TCP/UDP CSO */
+			txbd1->lflags |= TX_BD_FLG_TIP_TCP_UDP_CHKSUM;
+			txbd1->mss = 0;
+		} else if (tx_pkt->ol_flags & PKT_TX_OIP_IIP_CKSUM) {
+			/* Outer IP, Inner IP CSO */
+			txbd1->lflags |= TX_BD_FLG_TIP_IP_CHKSUM;
+			txbd1->mss = 0;
+		} else if (tx_pkt->ol_flags & PKT_TX_TCP_UDP_CKSUM) {
 			/* TCP/UDP CSO */
-			txbd1->lflags = TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM;
+			txbd1->lflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM;
 			txbd1->mss = 0;
-
 		} else if (tx_pkt->ol_flags & PKT_TX_IP_CKSUM) {
 			/* IP CSO */
-			txbd1->lflags = TX_BD_LONG_LFLAGS_IP_CHKSUM;
+			txbd1->lflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM;
+			txbd1->mss = 0;
+		} else if (tx_pkt->ol_flags & PKT_TX_OUTER_IP_CKSUM) {
+			/* IP CSO */
+			txbd1->lflags |= TX_BD_LONG_LFLAGS_T_IP_CHKSUM;
 			txbd1->mss = 0;
 		}
 	} else {
@@ -295,6 +313,9 @@ static int bnxt_handle_tx_cp(struct bnxt_tx_queue *txq)
 
 			if (!CMP_VALID(txcmp, raw_cons, cpr->cp_ring_struct))
 				break;
+			cpr->valid = FLIP_VALID(cons,
+						cpr->cp_ring_struct->ring_mask,
+						cpr->valid);
 
 			if (CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2)
 				nb_tx_pkts++;
diff --git a/drivers/net/bnxt/bnxt_txr.h b/drivers/net/bnxt/bnxt_txr.h
index 5b097114..2feac51d 100644
--- a/drivers/net/bnxt/bnxt_txr.h
+++ b/drivers/net/bnxt/bnxt_txr.h
@@ -49,7 +49,7 @@ struct bnxt_tx_ring_info {
 	struct tx_bd_long	*tx_desc_ring;
 	struct bnxt_sw_tx_bd	*tx_buf_ring;
 
-	phys_addr_t		tx_desc_mapping;
+	rte_iova_t		tx_desc_mapping;
 
 #define BNXT_DEV_STATE_CLOSING	0x1
 	uint32_t		dev_state;
@@ -69,4 +69,25 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id);
 uint16_t bnxt_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			       uint16_t nb_pkts);
 
+#define PKT_TX_OIP_IIP_TCP_UDP_CKSUM	(PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | \
+					PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM)
+#define PKT_TX_IIP_TCP_UDP_CKSUM	(PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | \
+					PKT_TX_IP_CKSUM)
+#define PKT_TX_OIP_TCP_UDP_CKSUM	(PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | \
+					PKT_TX_OUTER_IP_CKSUM)
+#define PKT_TX_OIP_IIP_CKSUM		(PKT_TX_IP_CKSUM |	\
+					 PKT_TX_OUTER_IP_CKSUM)
+#define PKT_TX_TCP_UDP_CKSUM		(PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)
+
+
+#define TX_BD_FLG_TIP_IP_TCP_UDP_CHKSUM	(TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM | \
+					TX_BD_LONG_LFLAGS_T_IP_CHKSUM | \
+					TX_BD_LONG_LFLAGS_IP_CHKSUM)
+#define TX_BD_FLG_IP_TCP_UDP_CHKSUM	(TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM | \
+					TX_BD_LONG_LFLAGS_IP_CHKSUM)
+#define TX_BD_FLG_TIP_IP_CHKSUM		(TX_BD_LONG_LFLAGS_T_IP_CHKSUM | \
+					TX_BD_LONG_LFLAGS_IP_CHKSUM)
+#define TX_BD_FLG_TIP_TCP_UDP_CHKSUM	(TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM | \
+					TX_BD_LONG_LFLAGS_T_IP_CHKSUM)
+
 #endif
diff --git a/drivers/net/bnxt/bnxt_vnic.c b/drivers/net/bnxt/bnxt_vnic.c
index db9fb079..5bac2605 100644
--- a/drivers/net/bnxt/bnxt_vnic.c
+++ b/drivers/net/bnxt/bnxt_vnic.c
@@ -83,6 +83,7 @@ void bnxt_init_vnics(struct bnxt *bp)
 
 		prandom_bytes(vnic->rss_hash_key, HW_HASH_KEY_SIZE);
 		STAILQ_INIT(&vnic->filter);
+		STAILQ_INIT(&vnic->flow_list);
 		STAILQ_INSERT_TAIL(&bp->free_vnic_list, vnic, next);
 	}
 	for (i = 0; i < MAX_FF_POOLS; i++)
@@ -174,7 +175,7 @@ int bnxt_alloc_vnic_attributes(struct bnxt *bp)
 				BNXT_MAX_MC_ADDRS * ETHER_ADDR_LEN);
 	uint16_t max_vnics;
 	int i;
-	phys_addr_t mz_phys_addr;
+	rte_iova_t mz_phys_addr;
 
 	max_vnics = bp->max_vnics;
 	snprintf(mz_name, RTE_MEMZONE_NAMESIZE,
@@ -191,13 +192,13 @@ int bnxt_alloc_vnic_attributes(struct bnxt *bp)
 		if (!mz)
 			return -ENOMEM;
 	}
-	mz_phys_addr = mz->phys_addr;
+	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
 		RTE_LOG(WARNING, PMD,
 			"Memzone physical address same as virtual.\n");
 		RTE_LOG(WARNING, PMD,
-			"Using rte_mem_virt2phy()\n");
-		mz_phys_addr = rte_mem_virt2phy(mz->addr);
+			"Using rte_mem_virt2iova()\n");
+		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == 0) {
 			RTE_LOG(ERR, PMD,
 			"unable to map vnic address to physical memory\n");
diff --git a/drivers/net/bnxt/bnxt_vnic.h b/drivers/net/bnxt/bnxt_vnic.h
index 993f2212..875dc3c1 100644
--- a/drivers/net/bnxt/bnxt_vnic.h
+++ b/drivers/net/bnxt/bnxt_vnic.h
@@ -53,11 +53,11 @@ struct bnxt_vnic_info {
 	uint16_t	dflt_ring_grp;
 	uint16_t	mru;
 	uint16_t	hash_type;
-	phys_addr_t	rss_table_dma_addr;
+	rte_iova_t	rss_table_dma_addr;
 	uint16_t	*rss_table;
-	phys_addr_t	rss_hash_key_dma_addr;
+	rte_iova_t	rss_hash_key_dma_addr;
 	void		*rss_hash_key;
-	phys_addr_t	mc_list_dma_addr;
+	rte_iova_t	mc_list_dma_addr;
 	char		*mc_list;
 	uint32_t	mc_addr_cnt;
 #define BNXT_MAX_MC_ADDRS		16
@@ -80,6 +80,7 @@ struct bnxt_vnic_info {
 	bool		rss_dflt_cr;
 
 	STAILQ_HEAD(, bnxt_filter_info)	filter;
+	STAILQ_HEAD(, rte_flow)	flow_list;
 };
 
 struct bnxt;
diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index cb8660af..c16edbad 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -33,25 +33,27 @@
 
 #ifndef _HSI_STRUCT_DEF_DPDK_
 #define _HSI_STRUCT_DEF_DPDK_
-/* HSI and HWRM Specification 1.7.7 */
+/* HSI and HWRM Specification 1.8.2 */
 #define HWRM_VERSION_MAJOR	1
-#define HWRM_VERSION_MINOR	7
-#define HWRM_VERSION_UPDATE	7
+#define HWRM_VERSION_MINOR	8
+#define HWRM_VERSION_UPDATE	2
 
-#define HWRM_VERSION_STR	"1.7.7"
+#define HWRM_VERSION_RSVD	0 /* non-zero means beta version */
+
+#define HWRM_VERSION_STR	"1.8.2.0"
 /*
  * Following is the signature for HWRM message field that indicates not
  * applicable	(All F's). Need to cast it the size of the field if needed.
  */
 #define HWRM_NA_SIGNATURE	((uint32_t)(-1))
 #define HWRM_MAX_REQ_LEN	(128)  /* hwrm_func_buf_rgtr */
-#define HWRM_MAX_RESP_LEN	(248)  /* hwrm_selftest_qlist */
+#define HWRM_MAX_RESP_LEN	(280)  /* hwrm_selftest_qlist */
 #define HW_HASH_INDEX_SIZE	 0x80	/* 7 bit indirection table index. */
 #define HW_HASH_KEY_SIZE	40
 #define HWRM_RESP_VALID_KEY	1 /* valid key for HWRM response */
 #define HWRM_ROCE_SP_HSI_VERSION_MAJOR	1
-#define HWRM_ROCE_SP_HSI_VERSION_MINOR	7
-#define HWRM_ROCE_SP_HSI_VERSION_UPDATE	4
+#define HWRM_ROCE_SP_HSI_VERSION_MINOR	8
+#define HWRM_ROCE_SP_HSI_VERSION_UPDATE	2
 
 /*
  * Request types
@@ -129,6 +131,9 @@
 #define HWRM_CFA_NTUPLE_FILTER_ALLOC	(UINT32_C(0x99))
 #define HWRM_CFA_NTUPLE_FILTER_FREE	(UINT32_C(0x9a))
 #define HWRM_CFA_NTUPLE_FILTER_CFG	(UINT32_C(0x9b))
+#define HWRM_CFA_EM_FLOW_ALLOC		(UINT32_C(0x9c))
+#define HWRM_CFA_EM_FLOW_FREE		(UINT32_C(0x9d))
+#define HWRM_CFA_EM_FLOW_CFG		(UINT32_C(0x9e))
 #define HWRM_TUNNEL_DST_PORT_QUERY	(UINT32_C(0xa0))
 #define HWRM_TUNNEL_DST_PORT_ALLOC	(UINT32_C(0xa1))
 #define HWRM_TUNNEL_DST_PORT_FREE	(UINT32_C(0xa2))
@@ -815,8 +820,6 @@ struct rx_pkt_cmpl {
 	 * packet. Length = 32B
 	 */
 	#define RX_PKT_CMPL_TYPE_RX_L2			UINT32_C(0x11)
-	#define RX_PKT_CMPL_TYPE_RX_L2_TPA_START	UINT32_C(0x13)
-	#define RX_PKT_CMPL_TYPE_RX_L2_TPA_END		UINT32_C(0x15)
 	/*
 	 * When this bit is '1', it indicates a packet that has an error
 	 * of some type. Type of error is indicated in error_flags.
@@ -1800,6 +1803,8 @@ struct hwrm_async_event_cmpl {
 		UINT32_C(0x32)
 	/* VF Configuration Change */
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE	UINT32_C(0x33)
+	/* LLFC/PFC Configuration Change */
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LLFC_PFC_CHANGE UINT32_C(0x34)
 	/* HWRM Error */
 	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR	UINT32_C(0xff)
 	uint32_t event_data2;
@@ -2117,9 +2122,18 @@ struct hwrm_ver_get_output {
 	 * This field returns the default request timeout value in
 	 * milliseconds.
 	 */
+	uint8_t init_pending;
+	/*
+	 * This field will indicate if any subsystems is not fully
+	 * initialized.
+	 */
+	/*
+	 * If set to 1, device is not ready. If set to 0, device is
+	 * ready to accept all HWRM commands.
+	 */
+	#define HWRM_VER_GET_OUTPUT_INIT_PENDING_DEV_NOT_RDY UINT32_C(0x1)
 	uint8_t unused_0;
 	uint8_t unused_1;
-	uint8_t unused_2;
 	uint8_t valid;
 	/*
 	 * This field is used in Output records to indicate that the
@@ -2246,6 +2260,122 @@ struct hwrm_func_reset_output {
 	 */
 } __attribute__((packed));
 
+/* hwrm_func_vf_cfg */
+/*
+ * Description: This command allows configuration of a VF by its driver. If this
+ * function is called by a PF driver, then the HWRM shall fail this command. If
+ * guest VLAN and/or MAC address are provided in this command, then the HWRM
+ * shall set up appropriate MAC/VLAN filters for the VF that is being
+ * configured. A VF driver should set VF MTU/MRU using this command prior to
+ * allocating RX VNICs or TX rings for the corresponding VF.
+ */
+/* Input (32 bytes) */
+struct hwrm_func_vf_cfg_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format for the
+	 * rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request will be
+	 * optionally completed on. If the value is -1, then no CR completion
+	 * will be generated. Any other value must be a valid CR ring_id value
+	 * for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function ids
+	 * 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written when the
+	 * request is complete. This area must be 16B aligned and must be
+	 * cleared to zero before the request is made.
+	 */
+	uint32_t enables;
+	/* This bit must be '1' for the mtu field to be configured. */
+	#define HWRM_FUNC_VF_CFG_INPUT_ENABLES_MTU                 UINT32_C(0x1)
+	/* This bit must be '1' for the guest_vlan field to be configured. */
+	#define HWRM_FUNC_VF_CFG_INPUT_ENABLES_GUEST_VLAN          UINT32_C(0x2)
+	/*
+	 * This bit must be '1' for the async_event_cr field to be configured.
+	 */
+	#define HWRM_FUNC_VF_CFG_INPUT_ENABLES_ASYNC_EVENT_CR      UINT32_C(0x4)
+	/* This bit must be '1' for the dflt_mac_addr field to be configured. */
+	#define HWRM_FUNC_VF_CFG_INPUT_ENABLES_DFLT_MAC_ADDR       UINT32_C(0x8)
+	uint16_t mtu;
+	/*
+	 * The maximum transmission unit requested on the function. The HWRM
+	 * should make sure that the mtu of the function does not exceed the mtu
+	 * of the physical port that this function is associated with. In
+	 * addition to requesting mtu per function, it is possible to configure
+	 * mtu per transmit ring. By default, the mtu of each transmit ring
+	 * associated with a function is equal to the mtu of the function. The
+	 * HWRM should make sure that the mtu of each transmit ring that is
+	 * assigned to a function has a valid mtu.
+	 */
+	uint16_t guest_vlan;
+	/*
+	 * The guest VLAN for the function being configured. This field's format
+	 * is same as 802.1Q Tag's Tag Control Information (TCI) format that
+	 * includes both Priority Code Point (PCP) and VLAN Identifier (VID).
+	 */
+	uint16_t async_event_cr;
+	/*
+	 * ID of the target completion ring for receiving asynchronous event
+	 * completions. If this field is not valid, then the HWRM shall use the
+	 * default completion ring of the function that is being configured as
+	 * the target completion ring for providing any asynchronous event
+	 * completions for that function. If this field is valid, then the HWRM
+	 * shall use the completion ring identified by this ID as the target
+	 * completion ring for providing any asynchronous event completions for
+	 * the function that is being configured.
+	 */
+	uint8_t dflt_mac_addr[6];
+	/*
+	 * This value is the current MAC address requested by the VF driver to
+	 * be configured on this VF. A value of 00-00-00-00-00-00 indicates no
+	 * MAC address configuration is requested by the VF driver. The parent
+	 * PF driver may reject or overwrite this MAC address.
+	 */
+} __attribute__((packed));
+
+/* Output (16 bytes) */
+
+struct hwrm_func_vf_cfg_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in parameters,
+	 * and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the output is
+	 * completely written to RAM. This field should be read as '1' to
+	 * indicate that the output has been completely written. When writing a
+	 * command completion or response to an internal processor, the order of
+	 * writes has to be such that this field is written last.
+	 */
+} __attribute__((packed));
+
 /* hwrm_func_qcaps */
 /*
  * Description: This command returns capabilities of a function. The input FID
@@ -2727,8 +2857,16 @@ struct hwrm_func_qcfg_output {
 	#define HWRM_FUNC_QCFG_OUTPUT_PORT_PF_CNT_UNAVAIL	UINT32_C(0x0)
 	uint16_t dflt_vnic_id;
 	/* The default VNIC ID assigned to a function that is being queried. */
-	uint8_t unused_0;
-	uint8_t unused_1;
+	uint16_t max_mtu_configured;
+	/*
+	 * This value specifies the MAX MTU that can be configured by
+	 * host drivers. This 'max_mtu_configure' can be HW max MTU or
+	 * OEM applications specified value. Host drivers can't
+	 * configure the MTU greater than this value. Host drivers
+	 * should read this value prior to configuring the MTU. FW will
+	 * fail the host request with MTU greater than
+	 * 'max_mtu_configured'.
+	 */
 	uint32_t min_bw;
 	/*
 	 * Minimum BW allocated for this function. The HWRM will
@@ -2826,7 +2964,7 @@ struct hwrm_func_qcfg_output {
 	#define HWRM_FUNC_QCFG_OUTPUT_EVB_MODE_VEB	UINT32_C(0x1)
 	/* Virtual Ethernet Port Aggregator	(VEPA) */
 	#define HWRM_FUNC_QCFG_OUTPUT_EVB_MODE_VEPA	UINT32_C(0x2)
-	uint8_t unused_2;
+	uint8_t unused_0;
 	uint16_t alloc_vfs;
 	/*
 	 * The number of VFs that are allocated to the function. This is
@@ -2846,7 +2984,7 @@ struct hwrm_func_qcfg_output {
 	 * The number of strict priority transmit rings out of currently
 	 * allocated TX rings to the function	(alloc_tx_rings).
 	 */
-	uint8_t unused_3;
+	uint8_t unused_1;
 	uint8_t valid;
 	/*
 	 * This field is used in Output records to indicate that the
@@ -3199,6 +3337,14 @@ struct hwrm_func_cfg_input {
 	 */
 	#define HWRM_FUNC_CFG_INPUT_FLAGS_NO_AUTOCLEAR_STATISTIC	\
 		UINT32_C(0x1000)
+	/*
+	 * This bit requests that the firmware test to see if all the
+	 * assets requested in this command (i.e. number of TX rings)
+	 * are available. The firmware will return an error if the
+	 * requested assets are not available. The firwmare will NOT
+	 * reserve the assets if they are available.
+	 */
+	#define HWRM_FUNC_CFG_INPUT_FLAGS_TX_ASSETS_TEST UINT32_C(0x2000)
 	uint32_t enables;
 	/* This bit must be '1' for the mtu field to be configured. */
 	#define HWRM_FUNC_CFG_INPUT_ENABLES_MTU	UINT32_C(0x1)
@@ -4236,123 +4382,6 @@ struct hwrm_func_buf_unrgtr_output {
 	 */
 } __attribute__((packed));
 
-/* hwrm_func_vf_cfg */
-/*
- * Description: This command allows configuration of a VF by its driver. If this
- * function is called by a PF driver, then the HWRM shall fail this command. If
- * guest VLAN and/or MAC address are provided in this command, then the HWRM
- * shall set up appropriate MAC/VLAN filters for the VF that is being
- * configured. A VF driver should set VF MTU/MRU using this command prior to
- * allocating RX VNICs or TX rings for the corresponding VF.
- */
-/* Input (32 bytes) */
-
-struct hwrm_func_vf_cfg_input {
-	uint16_t req_type;
-	/*
-	 * This value indicates what type of request this is. The format for the
-	 * rest of the command is determined by this field.
-	 */
-	uint16_t cmpl_ring;
-	/*
-	 * This value indicates the what completion ring the request will be
-	 * optionally completed on. If the value is -1, then no CR completion
-	 * will be generated. Any other value must be a valid CR ring_id value
-	 * for this function.
-	 */
-	uint16_t seq_id;
-	/* This value indicates the command sequence number. */
-	uint16_t target_id;
-	/*
-	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function ids
-	 * 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF - HWRM
-	 */
-	uint64_t resp_addr;
-	/*
-	 * This is the host address where the response will be written when the
-	 * request is complete. This area must be 16B aligned and must be
-	 * cleared to zero before the request is made.
-	 */
-	uint32_t enables;
-	/* This bit must be '1' for the mtu field to be configured. */
-	#define HWRM_FUNC_VF_CFG_INPUT_ENABLES_MTU                 UINT32_C(0x1)
-	/* This bit must be '1' for the guest_vlan field to be configured. */
-	#define HWRM_FUNC_VF_CFG_INPUT_ENABLES_GUEST_VLAN          UINT32_C(0x2)
-	/*
-	 * This bit must be '1' for the async_event_cr field to be configured.
-	 */
-	#define HWRM_FUNC_VF_CFG_INPUT_ENABLES_ASYNC_EVENT_CR      UINT32_C(0x4)
-	/* This bit must be '1' for the dflt_mac_addr field to be configured. */
-	#define HWRM_FUNC_VF_CFG_INPUT_ENABLES_DFLT_MAC_ADDR       UINT32_C(0x8)
-	uint16_t mtu;
-	/*
-	 * The maximum transmission unit requested on the function. The HWRM
-	 * should make sure that the mtu of the function does not exceed the mtu
-	 * of the physical port that this function is associated with. In
-	 * addition to requesting mtu per function, it is possible to configure
-	 * mtu per transmit ring. By default, the mtu of each transmit ring
-	 * associated with a function is equal to the mtu of the function. The
-	 * HWRM should make sure that the mtu of each transmit ring that is
-	 * assigned to a function has a valid mtu.
-	 */
-	uint16_t guest_vlan;
-	/*
-	 * The guest VLAN for the function being configured. This field's format
-	 * is same as 802.1Q Tag's Tag Control Information (TCI) format that
-	 * includes both Priority Code Point (PCP) and VLAN Identifier (VID).
-	 */
-	uint16_t async_event_cr;
-	/*
-	 * ID of the target completion ring for receiving asynchronous event
-	 * completions. If this field is not valid, then the HWRM shall use the
-	 * default completion ring of the function that is being configured as
-	 * the target completion ring for providing any asynchronous event
-	 * completions for that function. If this field is valid, then the HWRM
-	 * shall use the completion ring identified by this ID as the target
-	 * completion ring for providing any asynchronous event completions for
-	 * the function that is being configured.
-	 */
-	uint8_t dflt_mac_addr[6];
-	/*
-	 * This value is the current MAC address requested by the VF driver to
-	 * be configured on this VF. A value of 00-00-00-00-00-00 indicates no
-	 * MAC address configuration is requested by the VF driver. The parent
-	 * PF driver may reject or overwrite this MAC address.
-	 */
-} __attribute__((packed));
-
-/* Output (16 bytes) */
-
-struct hwrm_func_vf_cfg_output {
-	uint16_t error_code;
-	/*
-	 * Pass/Fail or error type Note: receiver to verify the in parameters,
-	 * and fail the call with an error when appropriate
-	 */
-	uint16_t req_type;
-	/* This field returns the type of original request. */
-	uint16_t seq_id;
-	/* This field provides original sequence number of the command. */
-	uint16_t resp_len;
-	/*
-	 * This field is the length of the response in bytes. The last
-	 * byte of the response is a valid flag that will read as '1'
-	 * when the command has been completely written to memory.
-	 */
-	uint32_t unused_0;
-	uint8_t unused_1;
-	uint8_t unused_2;
-	uint8_t unused_3;
-	uint8_t valid;
-	/*
-	 * This field is used in Output records to indicate that the output is
-	 * completely written to RAM. This field should be read as '1' to
-	 * indicate that the output has been completely written. When writing a
-	 * command completion or response to an internal processor, the order of
-	 * writes has to be such that this field is written last.
-	 */
-} __attribute__((packed));
-
 /* hwrm_port_phy_cfg */
 /*
  * Description: This command configures the PHY device for the port. It allows
@@ -4917,12 +4946,12 @@ struct hwrm_port_phy_qcfg_output {
 	#define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100GB	UINT32_C(0x3e8)
 	/* 10Mb link speed */
 	#define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10MB	UINT32_C(0xffff)
-	uint8_t duplex;
+	uint8_t duplex_cfg;
 	/* This value is indicates the duplex of the current connection. */
 	/* Half Duplex connection. */
-	#define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_HALF	UINT32_C(0x0)
+	#define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_HALF UINT32_C(0x0)
 	/* Full duplex connection. */
-	#define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_FULL	UINT32_C(0x1)
+	#define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL UINT32_C(0x1)
 	uint8_t pause;
 	/*
 	 * This value is used to indicate the current pause
@@ -5250,6 +5279,11 @@ struct hwrm_port_phy_qcfg_output {
 	/* 40G_ACTIVE_CABLE */
 	#define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_ACTIVE_CABLE \
 		UINT32_C(0x18)
+	#define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASET UINT32_C(0x19)
+	/* 1G_baseSX */
+	#define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASESX UINT32_C(0x1a)
+	/* 1G_baseCX */
+	#define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASECX UINT32_C(0x1b)
 	uint8_t media_type;
 	/* This value represents a media type. */
 	/* Unknown */
@@ -5576,8 +5610,16 @@ struct hwrm_port_phy_qcfg_output {
 	 */
 	#define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_CLAUSE91_ENABLED	\
 		UINT32_C(0x40)
+	uint8_t duplex_state;
+	/*
+	 * This value is indicates the duplex of the current connection
+	 * state.
+	 */
+	/* Half Duplex connection. */
+	#define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_STATE_HALF UINT32_C(0x0)
+	/* Full duplex connection. */
+	#define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_STATE_FULL UINT32_C(0x1)
 	uint8_t unused_1;
-	uint8_t unused_2;
 	char phy_vendor_name[16];
 	/*
 	 * Up to 16 bytes of null padded ASCII string representing PHY
@@ -5591,10 +5633,10 @@ struct hwrm_port_phy_qcfg_output {
 	 * to null, then the vendor specific part number is not
 	 * available.
 	 */
-	uint32_t unused_3;
+	uint32_t unused_2;
+	uint8_t unused_3;
 	uint8_t unused_4;
 	uint8_t unused_5;
-	uint8_t unused_6;
 	uint8_t valid;
 	/*
 	 * This field is used in Output records to indicate that the
@@ -7314,6 +7356,14 @@ struct hwrm_vnic_cfg_input {
 	 * that is used for computing RSS hash only.
 	 */
 	#define HWRM_VNIC_CFG_INPUT_FLAGS_RSS_DFLT_CR_MODE	UINT32_C(0x20)
+	/*
+	 * When this bit is '1', the VNIC is being configured to receive
+	 * both RoCE and non-RoCE traffic, but forward only the RoCE
+	 * traffic further. Also, RoCE traffic can be mirrored to L2
+	 * driver.
+	 */
+	#define HWRM_VNIC_CFG_INPUT_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE \
+	UINT32_C(0x40)
 	uint32_t enables;
 	/*
 	 * This bit must be '1' for the dflt_ring_grp field to be
@@ -7523,6 +7573,13 @@ struct hwrm_vnic_qcfg_output {
 	 * is not configured.
 	 */
 	#define HWRM_VNIC_QCFG_OUTPUT_FLAGS_RSS_DFLT_CR_MODE	UINT32_C(0x20)
+	/*
+	 * When this bit is '1', the VNIC is configured to receive both
+	 * RoCE and non-RoCE traffic, but forward only RoCE traffic
+	 * further. Also RoCE traffic can be mirrored to L2 driver.
+	 */
+	#define HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE \
+	UINT32_C(0x40)
 	uint32_t unused_2;
 	uint8_t unused_3;
 	uint8_t unused_4;
@@ -7538,6 +7595,183 @@ struct hwrm_vnic_qcfg_output {
 	 */
 } __attribute__((packed));
 
+
+/* hwrm_vnic_tpa_cfg */
+/* Description: This function is used to enable/configure TPA on the VNIC. */
+/* Input	(40 bytes) */
+struct hwrm_vnic_tpa_cfg_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format
+	 * for the rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request
+	 * will be optionally completed on. If the value is -1, then no
+	 * CR completion will be generated. Any other value must be a
+	 * valid CR ring_id value for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+	 * - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written
+	 * when the request is complete. This area must be 16B aligned
+	 * and must be cleared to zero before the request is made.
+	 */
+	uint32_t flags;
+	/*
+	 * When this bit is '1', the VNIC shall be configured to perform
+	 * transparent packet aggregation	(TPA) of non-tunneled TCP
+	 * packets.
+	 */
+	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_TPA	UINT32_C(0x1)
+	/*
+	 * When this bit is '1', the VNIC shall be configured to perform
+	 * transparent packet aggregation	(TPA) of tunneled TCP packets.
+	 */
+	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_ENCAP_TPA	UINT32_C(0x2)
+	/*
+	 * When this bit is '1', the VNIC shall be configured to perform
+	 * transparent packet aggregation	(TPA) according to Windows
+	 * Receive Segment Coalescing	(RSC) rules.
+	 */
+	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_RSC_WND_UPDATE	UINT32_C(0x4)
+	/*
+	 * When this bit is '1', the VNIC shall be configured to perform
+	 * transparent packet aggregation	(TPA) according to Linux
+	 * Generic Receive Offload	(GRO) rules.
+	 */
+	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO	UINT32_C(0x8)
+	/*
+	 * When this bit is '1', the VNIC shall be configured to perform
+	 * transparent packet aggregation	(TPA) for TCP packets with IP
+	 * ECN set to non-zero.
+	 */
+	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_ECN	UINT32_C(0x10)
+	/*
+	 * When this bit is '1', the VNIC shall be configured to perform
+	 * transparent packet aggregation	(TPA) for GRE tunneled TCP
+	 * packets only if all packets have the same GRE sequence.
+	 */
+	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_SAME_GRE_SEQ	\
+		UINT32_C(0x20)
+	/*
+	 * When this bit is '1' and the GRO mode is enabled, the VNIC
+	 * shall be configured to perform transparent packet aggregation
+	 *	(TPA) for TCP/IPv4 packets with consecutively increasing
+	 * IPIDs. In other words, the last packet that is being
+	 * aggregated to an already existing aggregation context shall
+	 * have IPID 1 more than the IPID of the last packet that was
+	 * aggregated in that aggregation context.
+	 */
+	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO_IPID_CHECK	UINT32_C(0x40)
+	/*
+	 * When this bit is '1' and the GRO mode is enabled, the VNIC
+	 * shall be configured to perform transparent packet aggregation
+	 *	(TPA) for TCP packets with the same TTL	(IPv4) or Hop limit
+	 *	(IPv6) value.
+	 */
+	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO_TTL_CHECK	UINT32_C(0x80)
+	uint32_t enables;
+	/* This bit must be '1' for the max_agg_segs field to be configured. */
+	#define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGG_SEGS	UINT32_C(0x1)
+	/* This bit must be '1' for the max_aggs field to be configured. */
+	#define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGGS	UINT32_C(0x2)
+	/*
+	 * This bit must be '1' for the max_agg_timer field to be
+	 * configured.
+	 */
+	#define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGG_TIMER	UINT32_C(0x4)
+	/* This bit must be '1' for the min_agg_len field to be configured. */
+	#define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MIN_AGG_LEN	UINT32_C(0x8)
+	uint16_t vnic_id;
+	/* Logical vnic ID */
+	uint16_t max_agg_segs;
+	/*
+	 * This is the maximum number of TCP segments that can be
+	 * aggregated	(unit is Log2). Max value is 31.
+	 */
+	/* 1 segment */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_1	UINT32_C(0x0)
+	/* 2 segments */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_2	UINT32_C(0x1)
+	/* 4 segments */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_4	UINT32_C(0x2)
+	/* 8 segments */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_8	UINT32_C(0x3)
+	/* Any segment size larger than this is not valid */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_MAX	UINT32_C(0x1f)
+	uint16_t max_aggs;
+	/*
+	 * This is the maximum number of aggregations this VNIC is
+	 * allowed	(unit is Log2). Max value is 7
+	 */
+	/* 1 aggregation */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_1	UINT32_C(0x0)
+	/* 2 aggregations */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_2	UINT32_C(0x1)
+	/* 4 aggregations */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_4	UINT32_C(0x2)
+	/* 8 aggregations */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_8	UINT32_C(0x3)
+	/* 16 aggregations */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_16	UINT32_C(0x4)
+	/* Any aggregation size larger than this is not valid */
+	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_MAX	UINT32_C(0x7)
+	uint8_t unused_0;
+	uint8_t unused_1;
+	uint32_t max_agg_timer;
+	/*
+	 * This is the maximum amount of time allowed for an aggregation
+	 * context to complete after it was initiated.
+	 */
+	uint32_t min_agg_len;
+	/*
+	 * This is the minimum amount of payload length required to
+	 * start an aggregation context.
+	 */
+} __attribute__((packed));
+
+/* Output	(16 bytes) */
+struct hwrm_vnic_tpa_cfg_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
 /* hwrm_vnic_rss_cfg */
 /* Description: This function is used to enable RSS configuration. */
 /* Input	(48 bytes) */
@@ -7651,7 +7885,6 @@ struct hwrm_vnic_rss_cfg_output {
  * the VNIC.
  */
 /* Input (40 bytes) */
-
 struct hwrm_vnic_plcmodes_cfg_input {
 	uint16_t req_type;
 	/*
@@ -7770,7 +8003,6 @@ struct hwrm_vnic_plcmodes_cfg_input {
 } __attribute__((packed));
 
 /* Output (16 bytes) */
-
 struct hwrm_vnic_plcmodes_cfg_output {
 	uint16_t error_code;
 	/*
@@ -7807,7 +8039,6 @@ struct hwrm_vnic_plcmodes_cfg_output {
  * of the VNIC.
  */
 /* Input (24 bytes) */
-
 struct hwrm_vnic_plcmodes_qcfg_input {
 	uint16_t req_type;
 	/*
@@ -7840,7 +8071,6 @@ struct hwrm_vnic_plcmodes_qcfg_input {
 } __attribute__((packed));
 
 /* Output (24 bytes) */
-
 struct hwrm_vnic_plcmodes_qcfg_output {
 	uint16_t error_code;
 	/*
@@ -8065,182 +8295,6 @@ struct hwrm_vnic_rss_cos_lb_ctx_free_output {
 	 */
 } __attribute__((packed));
 
-/* hwrm_vnic_tpa_cfg */
-/* Description: This function is used to enable/configure TPA on the VNIC. */
-/* Input	(40 bytes) */
-struct hwrm_vnic_tpa_cfg_input {
-	uint16_t req_type;
-	/*
-	 * This value indicates what type of request this is. The format
-	 * for the rest of the command is determined by this field.
-	 */
-	uint16_t cmpl_ring;
-	/*
-	 * This value indicates the what completion ring the request
-	 * will be optionally completed on. If the value is -1, then no
-	 * CR completion will be generated. Any other value must be a
-	 * valid CR ring_id value for this function.
-	 */
-	uint16_t seq_id;
-	/* This value indicates the command sequence number. */
-	uint16_t target_id;
-	/*
-	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
-	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
-	 * - HWRM
-	 */
-	uint64_t resp_addr;
-	/*
-	 * This is the host address where the response will be written
-	 * when the request is complete. This area must be 16B aligned
-	 * and must be cleared to zero before the request is made.
-	 */
-	uint32_t flags;
-	/*
-	 * When this bit is '1', the VNIC shall be configured to perform
-	 * transparent packet aggregation	(TPA) of non-tunneled TCP
-	 * packets.
-	 */
-	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_TPA	UINT32_C(0x1)
-	/*
-	 * When this bit is '1', the VNIC shall be configured to perform
-	 * transparent packet aggregation	(TPA) of tunneled TCP packets.
-	 */
-	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_ENCAP_TPA	UINT32_C(0x2)
-	/*
-	 * When this bit is '1', the VNIC shall be configured to perform
-	 * transparent packet aggregation	(TPA) according to Windows
-	 * Receive Segment Coalescing	(RSC) rules.
-	 */
-	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_RSC_WND_UPDATE	UINT32_C(0x4)
-	/*
-	 * When this bit is '1', the VNIC shall be configured to perform
-	 * transparent packet aggregation	(TPA) according to Linux
-	 * Generic Receive Offload	(GRO) rules.
-	 */
-	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO	UINT32_C(0x8)
-	/*
-	 * When this bit is '1', the VNIC shall be configured to perform
-	 * transparent packet aggregation	(TPA) for TCP packets with IP
-	 * ECN set to non-zero.
-	 */
-	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_ECN	UINT32_C(0x10)
-	/*
-	 * When this bit is '1', the VNIC shall be configured to perform
-	 * transparent packet aggregation	(TPA) for GRE tunneled TCP
-	 * packets only if all packets have the same GRE sequence.
-	 */
-	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_SAME_GRE_SEQ	\
-		UINT32_C(0x20)
-	/*
-	 * When this bit is '1' and the GRO mode is enabled, the VNIC
-	 * shall be configured to perform transparent packet aggregation
-	 *	(TPA) for TCP/IPv4 packets with consecutively increasing
-	 * IPIDs. In other words, the last packet that is being
-	 * aggregated to an already existing aggregation context shall
-	 * have IPID 1 more than the IPID of the last packet that was
-	 * aggregated in that aggregation context.
-	 */
-	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO_IPID_CHECK	UINT32_C(0x40)
-	/*
-	 * When this bit is '1' and the GRO mode is enabled, the VNIC
-	 * shall be configured to perform transparent packet aggregation
-	 *	(TPA) for TCP packets with the same TTL	(IPv4) or Hop limit
-	 *	(IPv6) value.
-	 */
-	#define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO_TTL_CHECK	UINT32_C(0x80)
-	uint32_t enables;
-	/* This bit must be '1' for the max_agg_segs field to be configured. */
-	#define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGG_SEGS	UINT32_C(0x1)
-	/* This bit must be '1' for the max_aggs field to be configured. */
-	#define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGGS	UINT32_C(0x2)
-	/*
-	 * This bit must be '1' for the max_agg_timer field to be
-	 * configured.
-	 */
-	#define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGG_TIMER	UINT32_C(0x4)
-	/* This bit must be '1' for the min_agg_len field to be configured. */
-	#define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MIN_AGG_LEN	UINT32_C(0x8)
-	uint16_t vnic_id;
-	/* Logical vnic ID */
-	uint16_t max_agg_segs;
-	/*
-	 * This is the maximum number of TCP segments that can be
-	 * aggregated	(unit is Log2). Max value is 31.
-	 */
-	/* 1 segment */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_1	UINT32_C(0x0)
-	/* 2 segments */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_2	UINT32_C(0x1)
-	/* 4 segments */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_4	UINT32_C(0x2)
-	/* 8 segments */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_8	UINT32_C(0x3)
-	/* Any segment size larger than this is not valid */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_MAX	UINT32_C(0x1f)
-	uint16_t max_aggs;
-	/*
-	 * This is the maximum number of aggregations this VNIC is
-	 * allowed	(unit is Log2). Max value is 7
-	 */
-	/* 1 aggregation */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_1	UINT32_C(0x0)
-	/* 2 aggregations */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_2	UINT32_C(0x1)
-	/* 4 aggregations */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_4	UINT32_C(0x2)
-	/* 8 aggregations */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_8	UINT32_C(0x3)
-	/* 16 aggregations */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_16	UINT32_C(0x4)
-	/* Any aggregation size larger than this is not valid */
-	#define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_MAX	UINT32_C(0x7)
-	uint8_t unused_0;
-	uint8_t unused_1;
-	uint32_t max_agg_timer;
-	/*
-	 * This is the maximum amount of time allowed for an aggregation
-	 * context to complete after it was initiated.
-	 */
-	uint32_t min_agg_len;
-	/*
-	 * This is the minimum amount of payload length required to
-	 * start an aggregation context.
-	 */
-} __attribute__((packed));
-
-/* Output	(16 bytes) */
-struct hwrm_vnic_tpa_cfg_output {
-	uint16_t error_code;
-	/*
-	 * Pass/Fail or error type Note: receiver to verify the in
-	 * parameters, and fail the call with an error when appropriate
-	 */
-	uint16_t req_type;
-	/* This field returns the type of original request. */
-	uint16_t seq_id;
-	/* This field provides original sequence number of the command. */
-	uint16_t resp_len;
-	/*
-	 * This field is the length of the response in bytes. The last
-	 * byte of the response is a valid flag that will read as '1'
-	 * when the command has been completely written to memory.
-	 */
-	uint32_t unused_0;
-	uint8_t unused_1;
-	uint8_t unused_2;
-	uint8_t unused_3;
-	uint8_t valid;
-	/*
-	 * This field is used in Output records to indicate that the
-	 * output is completely written to RAM. This field should be
-	 * read as '1' to indicate that the output has been completely
-	 * written. When writing a command completion or response to an
-	 * internal processor, the order of writes has to be such that
-	 * this field is written last.
-	 */
-} __attribute__((packed));
-
 /* hwrm_ring_alloc */
 /*
  * Description: This command allocates and does basic preparation for a ring.
@@ -9046,6 +9100,12 @@ struct hwrm_cfa_l2_filter_alloc_input {
 	 * datagram payload
 	 */
 	#define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPGRE UINT32_C(0x8)
+	/*
+	 * IPV4 over virtual eXtensible Local Area
+	 * Network (IPV4oVXLAN)
+	 */
+	#define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+		UINT32_C(0x9)
 	/* Any tunneled traffic */
 	#define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
 		UINT32_C(0xff)
@@ -9471,6 +9531,25 @@ struct hwrm_cfa_l2_set_rx_mask_output {
 	 */
 } __attribute__((packed));
 
+/* Command specific Error Codes (8 bytes) */
+struct hwrm_cfa_l2_set_rx_mask_cmd_err {
+	uint8_t code;
+	/*
+	 * command specific error codes that goes to the cmd_err field
+	 * in Common HWRM Error Response.
+	 */
+	/* Unknown error */
+	#define HWRM_CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN UINT32_C(0x0)
+	/*
+	 * Unable to complete operation due to conflict
+	 * with Ntuple Filter
+	 */
+	#define \
+	HWRM_CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR \
+	UINT32_C(0x1)
+	uint8_t unused_0[7];
+} __attribute__((packed));
+
 /* hwrm_cfa_vlan_antispoof_cfg */
 /* Description: Configures vlan anti-spoof filters for VF. */
 /* Input (32 bytes) */
@@ -9550,6 +9629,1010 @@ struct hwrm_cfa_vlan_antispoof_cfg_output {
 	 */
 };
 
+/* hwrm_cfa_ntuple_filter_alloc */
+/*
+ * Description: This is a ntuple filter that uses fields from L4/L3 header and
+ * optionally fields from L2. The ntuple filters apply to receive traffic only.
+ * All L2/L3/L4 header fields are specified in network byte order. These filters
+ * can be used for Receive Flow Steering (RFS). # For ethertype value, only
+ * 0x0800 (IPv4) and 0x86dd (IPv6) shall be supported for ntuple filters. # If a
+ * field specified in this command is not enabled as a valid field, then that
+ * field shall not be used in matching packet header fields against this filter.
+ */
+/* Input	(128 bytes) */
+struct hwrm_cfa_ntuple_filter_alloc_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format
+	 * for the rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request
+	 * will be optionally completed on. If the value is -1, then no
+	 * CR completion will be generated. Any other value must be a
+	 * valid CR ring_id value for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+	 * - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written
+	 * when the request is complete. This area must be 16B aligned
+	 * and must be cleared to zero before the request is made.
+	 */
+	uint32_t flags;
+	/*
+	 * Setting of this flag indicates the applicability to the
+	 * loopback path.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_LOOPBACK	\
+		UINT32_C(0x1)
+	/*
+	 * Setting of this flag indicates drop action. If this flag is
+	 * not set, then it should be considered accept action.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_DROP	UINT32_C(0x2)
+	/*
+	 * Setting of this flag indicates that a meter is expected to be
+	 * attached to this flow. This hint can be used when choosing
+	 * the action record format required for the flow.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_METER UINT32_C(0x4)
+	uint32_t enables;
+	/* This bit must be '1' for the l2_filter_id field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_L2_FILTER_ID   \
+		UINT32_C(0x1)
+	/* This bit must be '1' for the ethertype field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_ETHERTYPE	 \
+		UINT32_C(0x2)
+	/* This bit must be '1' for the tunnel_type field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE	\
+		UINT32_C(0x4)
+	/* This bit must be '1' for the src_macaddr field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_MACADDR	\
+		UINT32_C(0x8)
+	/* This bit must be '1' for the ipaddr_type field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_IPADDR_TYPE	\
+		UINT32_C(0x10)
+	/* This bit must be '1' for the src_ipaddr field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR	\
+		UINT32_C(0x20)
+	/*
+	 * This bit must be '1' for the src_ipaddr_mask field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR_MASK \
+		UINT32_C(0x40)
+	/* This bit must be '1' for the dst_ipaddr field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR	\
+		UINT32_C(0x80)
+	/*
+	 * This bit must be '1' for the dst_ipaddr_mask field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR_MASK \
+		UINT32_C(0x100)
+	/* This bit must be '1' for the ip_protocol field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_IP_PROTOCOL	\
+		UINT32_C(0x200)
+	/* This bit must be '1' for the src_port field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT	\
+		UINT32_C(0x400)
+	/*
+	 * This bit must be '1' for the src_port_mask field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT_MASK  \
+		UINT32_C(0x800)
+	/* This bit must be '1' for the dst_port field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT	\
+		UINT32_C(0x1000)
+	/*
+	 * This bit must be '1' for the dst_port_mask field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT_MASK  \
+		UINT32_C(0x2000)
+	/* This bit must be '1' for the pri_hint field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_PRI_HINT	\
+		UINT32_C(0x4000)
+	/*
+	 * This bit must be '1' for the ntuple_filter_id field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_NTUPLE_FILTER_ID \
+		UINT32_C(0x8000)
+	/* This bit must be '1' for the dst_id field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_ID	\
+		UINT32_C(0x10000)
+	/*
+	 * This bit must be '1' for the mirror_vnic_id field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
+		UINT32_C(0x20000)
+	/* This bit must be '1' for the dst_macaddr field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_MACADDR	\
+		UINT32_C(0x40000)
+	uint64_t l2_filter_id;
+	/*
+	 * This value identifies a set of CFA data structures used for
+	 * an L2 context.
+	 */
+	uint8_t src_macaddr[6];
+	/*
+	 * This value indicates the source MAC address in the Ethernet
+	 * header.
+	 */
+	uint16_t ethertype;
+	/* This value indicates the ethertype in the Ethernet header. */
+	uint8_t ip_addr_type;
+	/*
+	 * This value indicates the type of IP address. 4 - IPv4 6 -
+	 * IPv6 All others are invalid.
+	 */
+	/* invalid */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_UNKNOWN \
+		UINT32_C(0x0)
+	/* IPv4 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV4 \
+		UINT32_C(0x4)
+	/* IPv6 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV6 \
+		UINT32_C(0x6)
+	uint8_t ip_protocol;
+	/*
+	 * The value of protocol filed in IP header. Applies to UDP and
+	 * TCP traffic. 6 - TCP 17 - UDP
+	 */
+	/* invalid */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_UNKNOWN \
+		UINT32_C(0x0)
+	/* TCP */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_TCP \
+		UINT32_C(0x6)
+	/* UDP */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_UDP \
+		UINT32_C(0x11)
+	uint16_t dst_id;
+	/*
+	 * If set, this value shall represent the Logical VNIC ID of the
+	 * destination VNIC for the RX path and network port id of the
+	 * destination port for the TX path.
+	 */
+	uint16_t mirror_vnic_id;
+	/* Logical VNIC ID of the VNIC where traffic is mirrored. */
+	uint8_t tunnel_type;
+	/*
+	 * This value indicates the tunnel type for this filter. If this
+	 * field is not specified, then the filter shall apply to both
+	 * non-tunneled and tunneled packets. If this field conflicts
+	 * with the tunnel_type specified in the l2_filter_id, then the
+	 * HWRM shall return an error for this command.
+	 */
+	/* Non-tunnel */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
+		UINT32_C(0x0)
+	/* Virtual eXtensible Local Area Network	(VXLAN) */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
+		UINT32_C(0x1)
+	/*
+	 * Network Virtualization Generic Routing
+	 * Encapsulation	(NVGRE)
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
+		UINT32_C(0x2)
+	/*
+	 * Generic Routing Encapsulation	(GRE) inside
+	 * Ethernet payload
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
+		UINT32_C(0x3)
+	/* IP in IP */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
+		UINT32_C(0x4)
+	/* Generic Network Virtualization Encapsulation	(Geneve) */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
+		UINT32_C(0x5)
+	/* Multi-Protocol Lable Switching	(MPLS) */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
+		UINT32_C(0x6)
+	/* Stateless Transport Tunnel	(STT) */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_STT UINT32_C(0x7)
+	/*
+	 * Generic Routing Encapsulation	(GRE) inside IP
+	 * datagram payload
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
+		UINT32_C(0x8)
+	/* Any tunneled traffic */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
+		UINT32_C(0xff)
+	uint8_t pri_hint;
+	/*
+	 * This hint is provided to help in placing the filter in the
+	 * filter table.
+	 */
+	/* No preference */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_NO_PREFER \
+		UINT32_C(0x0)
+	/* Above the given filter */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_ABOVE UINT32_C(0x1)
+	/* Below the given filter */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_BELOW UINT32_C(0x2)
+	/* As high as possible */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_HIGHEST \
+		UINT32_C(0x3)
+	/* As low as possible */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_LOWEST UINT32_C(0x4)
+	uint32_t src_ipaddr[4];
+	/*
+	 * The value of source IP address to be used in filtering. For
+	 * IPv4, first four bytes represent the IP address.
+	 */
+	uint32_t src_ipaddr_mask[4];
+	/*
+	 * The value of source IP address mask to be used in filtering.
+	 * For IPv4, first four bytes represent the IP address mask.
+	 */
+	uint32_t dst_ipaddr[4];
+	/*
+	 * The value of destination IP address to be used in filtering.
+	 * For IPv4, first four bytes represent the IP address.
+	 */
+	uint32_t dst_ipaddr_mask[4];
+	/*
+	 * The value of destination IP address mask to be used in
+	 * filtering. For IPv4, first four bytes represent the IP
+	 * address mask.
+	 */
+	uint16_t src_port;
+	/*
+	 * The value of source port to be used in filtering. Applies to
+	 * UDP and TCP traffic.
+	 */
+	uint16_t src_port_mask;
+	/*
+	 * The value of source port mask to be used in filtering.
+	 * Applies to UDP and TCP traffic.
+	 */
+	uint16_t dst_port;
+	/*
+	 * The value of destination port to be used in filtering.
+	 * Applies to UDP and TCP traffic.
+	 */
+	uint16_t dst_port_mask;
+	/*
+	 * The value of destination port mask to be used in filtering.
+	 * Applies to UDP and TCP traffic.
+	 */
+	uint64_t ntuple_filter_id_hint;
+	/* This is the ID of the filter that goes along with the pri_hint. */
+} __attribute__((packed));
+
+/* Output	(24 bytes) */
+struct hwrm_cfa_ntuple_filter_alloc_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint64_t ntuple_filter_id;
+	/* This value is an opaque id into CFA data structures. */
+	uint32_t flow_id;
+	/*
+	 * This is the ID of the flow associated with this filter. This
+	 * value shall be used to match and associate the flow
+	 * identifier returned in completion records. A value of
+	 * 0xFFFFFFFF shall indicate no flow id.
+	 */
+	uint8_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
+/* Command specific Error Codes (8 bytes) */
+struct hwrm_cfa_ntuple_filter_alloc_cmd_err {
+	uint8_t code;
+	/*
+	 * command specific error codes that goes to the cmd_err field
+	 * in Common HWRM Error Response.
+	 */
+	/* Unknown error */
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN UINT32_C(0x0)
+	/*
+	 * Unable to complete operation due to conflict
+	 * with Rx Mask VLAN
+	 */
+	#define \
+	HWRM_CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR \
+	UINT32_C(0x1)
+	uint8_t unused_0[7];
+} __attribute__((packed));
+
+/* hwrm_cfa_ntuple_filter_free */
+/* Description: Free an ntuple filter */
+/* Input	(24 bytes) */
+struct hwrm_cfa_ntuple_filter_free_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format
+	 * for the rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request
+	 * will be optionally completed on. If the value is -1, then no
+	 * CR completion will be generated. Any other value must be a
+	 * valid CR ring_id value for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+	 * - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written
+	 * when the request is complete. This area must be 16B aligned
+	 * and must be cleared to zero before the request is made.
+	 */
+	uint64_t ntuple_filter_id;
+	/* This value is an opaque id into CFA data structures. */
+} __attribute__((packed));
+
+/* Output	(16 bytes) */
+struct hwrm_cfa_ntuple_filter_free_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
+/* hwrm_cfa_ntuple_filter_cfg */
+/*
+ * Description: Configure an ntuple filter with a new destination VNIC and/or
+ * meter.
+ */
+/* Input	(48 bytes) */
+struct hwrm_cfa_ntuple_filter_cfg_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format
+	 * for the rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request
+	 * will be optionally completed on. If the value is -1, then no
+	 * CR completion will be generated. Any other value must be a
+	 * valid CR ring_id value for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+	 * - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written
+	 * when the request is complete. This area must be 16B aligned
+	 * and must be cleared to zero before the request is made.
+	 */
+	uint32_t enables;
+	/* This bit must be '1' for the new_dst_id field to be configured. */
+	#define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_ENABLES_NEW_DST_ID	\
+		UINT32_C(0x1)
+	/*
+	 * This bit must be '1' for the new_mirror_vnic_id field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_ENABLES_NEW_MIRROR_VNIC_ID \
+		UINT32_C(0x2)
+	/*
+	 * This bit must be '1' for the new_meter_instance_id field to
+	 * be configured.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_ENABLES_NEW_METER_INSTANCE_ID \
+		UINT32_C(0x4)
+	uint32_t unused_0;
+	uint64_t ntuple_filter_id;
+	/* This value is an opaque id into CFA data structures. */
+	uint32_t new_dst_id;
+	/*
+	 * If set, this value shall represent the new Logical VNIC ID of
+	 * the destination VNIC for the RX path and new network port id
+	 * of the destination port for the TX path.
+	 */
+	uint32_t new_mirror_vnic_id;
+	/* New Logical VNIC ID of the VNIC where traffic is mirrored. */
+	uint16_t new_meter_instance_id;
+	/*
+	 * New meter to attach to the flow. Specifying the invalid
+	 * instance ID is used to remove any existing meter from the
+	 * flow.
+	 */
+	/*
+	 * A value of 0xfff is considered invalid and
+	 * implies the instance is not configured.
+	 */
+	#define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_NEW_METER_INSTANCE_ID_INVALID \
+		UINT32_C(0xffff)
+	uint16_t unused_1[3];
+} __attribute__((packed));
+
+/* Output	(16 bytes) */
+struct hwrm_cfa_ntuple_filter_cfg_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
+/* hwrm_cfa_em_flow_alloc */
+/*
+ * Description: This is a generic Exact Match	(EM) flow that uses fields from
+ * L4/L3/L2 headers. The EM flows apply to transmit and receive traffic. All
+ * L2/L3/L4 header fields are specified in network byte order. For each EM flow,
+ * there is an associated set of actions specified. For tunneled packets, all
+ * L2/L3/L4 fields specified are fields of inner headers unless otherwise
+ * specified. # If a field specified in this command is not enabled as a valid
+ * field, then that field shall not be used in matching packet header fields
+ * against this EM flow entry.
+ */
+/* Input	(112 bytes) */
+struct hwrm_cfa_em_flow_alloc_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format
+	 * for the rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request
+	 * will be optionally completed on. If the value is -1, then no
+	 * CR completion will be generated. Any other value must be a
+	 * valid CR ring_id value for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+	 * - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written
+	 * when the request is complete. This area must be 16B aligned
+	 * and must be cleared to zero before the request is made.
+	 */
+	uint32_t flags;
+	/*
+	 * Enumeration denoting the RX, TX type of the resource. This
+	 * enumeration is used for resources that are similar for both
+	 * TX and RX paths of the chip.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH	UINT32_C(0x1)
+	/* tx path */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_TX	\
+		(UINT32_C(0x0) << 0)
+	/* rx path */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_RX	\
+		(UINT32_C(0x1) << 0)
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_LAST \
+		CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_RX
+	/*
+	 * Setting of this flag indicates enabling of a byte counter for
+	 * a given flow.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_BYTE_CTR	UINT32_C(0x2)
+	/*
+	 * Setting of this flag indicates enabling of a packet counter
+	 * for a given flow.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PKT_CTR	UINT32_C(0x4)
+	/*
+	 * Setting of this flag indicates de-capsulation action for the
+	 * given flow.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_DECAP	UINT32_C(0x8)
+	/*
+	 * Setting of this flag indicates encapsulation action for the
+	 * given flow.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_ENCAP	UINT32_C(0x10)
+	/*
+	 * Setting of this flag indicates drop action. If this flag is
+	 * not set, then it should be considered accept action.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_DROP	UINT32_C(0x20)
+	/*
+	 * Setting of this flag indicates that a meter is expected to be
+	 * attached to this flow. This hint can be used when choosing
+	 * the action record format required for the flow.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_METER	UINT32_C(0x40)
+	uint32_t enables;
+	/* This bit must be '1' for the l2_filter_id field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_L2_FILTER_ID UINT32_C(0x1)
+	/* This bit must be '1' for the tunnel_type field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_TUNNEL_TYPE UINT32_C(0x2)
+	/* This bit must be '1' for the tunnel_id field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_TUNNEL_ID UINT32_C(0x4)
+	/* This bit must be '1' for the src_macaddr field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_MACADDR UINT32_C(0x8)
+	/* This bit must be '1' for the dst_macaddr field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_MACADDR UINT32_C(0x10)
+	/* This bit must be '1' for the ovlan_vid field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_OVLAN_VID UINT32_C(0x20)
+	/* This bit must be '1' for the ivlan_vid field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IVLAN_VID UINT32_C(0x40)
+	/* This bit must be '1' for the ethertype field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_ETHERTYPE UINT32_C(0x80)
+	/* This bit must be '1' for the src_ipaddr field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_IPADDR	UINT32_C(0x100)
+	/* This bit must be '1' for the dst_ipaddr field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_IPADDR	UINT32_C(0x200)
+	/* This bit must be '1' for the ipaddr_type field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IPADDR_TYPE UINT32_C(0x400)
+	/* This bit must be '1' for the ip_protocol field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IP_PROTOCOL UINT32_C(0x800)
+	/* This bit must be '1' for the src_port field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_PORT UINT32_C(0x1000)
+	/* This bit must be '1' for the dst_port field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_PORT UINT32_C(0x2000)
+	/* This bit must be '1' for the dst_id field to be configured. */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_ID	UINT32_C(0x4000)
+	/*
+	 * This bit must be '1' for the mirror_vnic_id field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID	\
+		UINT32_C(0x8000)
+	/*
+	 * This bit must be '1' for the encap_record_id field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_ENCAP_RECORD_ID	 \
+		UINT32_C(0x10000)
+	/*
+	 * This bit must be '1' for the meter_instance_id field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_METER_INSTANCE_ID	\
+		UINT32_C(0x20000)
+	uint64_t l2_filter_id;
+	/*
+	 * This value identifies a set of CFA data structures used for
+	 * an L2 context.
+	 */
+	uint8_t tunnel_type;
+	/* Tunnel Type. */
+	/* Non-tunnel */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
+		UINT32_C(0x0)
+	/* Virtual eXtensible Local Area Network	(VXLAN) */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_VXLAN	UINT32_C(0x1)
+	/*
+	 * Network Virtualization Generic Routing
+	 * Encapsulation	(NVGRE)
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_NVGRE	UINT32_C(0x2)
+	/*
+	 * Generic Routing Encapsulation	(GRE) inside
+	 * Ethernet payload
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_L2GRE	UINT32_C(0x3)
+	/* IP in IP */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_IPIP	UINT32_C(0x4)
+	/* Generic Network Virtualization Encapsulation	(Geneve) */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_GENEVE	UINT32_C(0x5)
+	/* Multi-Protocol Lable Switching	(MPLS) */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_MPLS	UINT32_C(0x6)
+	/* Stateless Transport Tunnel	(STT) */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_STT	UINT32_C(0x7)
+	/*
+	 * Generic Routing Encapsulation	(GRE) inside IP
+	 * datagram payload
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_IPGRE	UINT32_C(0x8)
+	/*
+	 * IPV4 over virtual eXtensible Local Area
+	 * Network (IPV4oVXLAN)
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 UINT32_C(0x9)
+	/* Any tunneled traffic */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
+		UINT32_C(0xff)
+	uint8_t unused_0;
+	uint16_t unused_1;
+	uint32_t tunnel_id;
+	/*
+	 * Tunnel identifier. Virtual Network Identifier	(VNI). Only
+	 * valid with tunnel_types VXLAN, NVGRE, and Geneve. Only lower
+	 * 24-bits of VNI field are used in setting up the filter.
+	 */
+	uint8_t src_macaddr[6];
+	/*
+	 * This value indicates the source MAC address in the Ethernet
+	 * header.
+	 */
+	uint16_t meter_instance_id;
+	/* The meter instance to attach to the flow. */
+	/*
+	 * A value of 0xfff is considered invalid and
+	 * implies the instance is not configured.
+	 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_METER_INSTANCE_ID_INVALID   \
+		UINT32_C(0xffff)
+	uint8_t dst_macaddr[6];
+	/*
+	 * This value indicates the destination MAC address in the
+	 * Ethernet header.
+	 */
+	uint16_t ovlan_vid;
+	/*
+	 * This value indicates the VLAN ID of the outer VLAN tag in the
+	 * Ethernet header.
+	 */
+	uint16_t ivlan_vid;
+	/*
+	 * This value indicates the VLAN ID of the inner VLAN tag in the
+	 * Ethernet header.
+	 */
+	uint16_t ethertype;
+	/* This value indicates the ethertype in the Ethernet header. */
+	uint8_t ip_addr_type;
+	/*
+	 * This value indicates the type of IP address. 4 - IPv4 6 -
+	 * IPv6 All others are invalid.
+	 */
+	/* invalid */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_UNKNOWN UINT32_C(0x0)
+	/* IPv4 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV4	UINT32_C(0x4)
+	/* IPv6 */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV6	UINT32_C(0x6)
+	uint8_t ip_protocol;
+	/*
+	 * The value of protocol filed in IP header. Applies to UDP and
+	 * TCP traffic. 6 - TCP 17 - UDP
+	 */
+	/* invalid */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_UNKNOWN UINT32_C(0x0)
+	/* TCP */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_TCP UINT32_C(0x6)
+	/* UDP */
+	#define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_UDP UINT32_C(0x11)
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint32_t src_ipaddr[4];
+	/*
+	 * The value of source IP address to be used in filtering. For
+	 * IPv4, first four bytes represent the IP address.
+	 */
+	uint32_t dst_ipaddr[4];
+	/*
+	 * big_endian = True The value of destination IP address to be
+	 * used in filtering. For IPv4, first four bytes represent the
+	 * IP address.
+	 */
+	uint16_t src_port;
+	/*
+	 * The value of source port to be used in filtering. Applies to
+	 * UDP and TCP traffic.
+	 */
+	uint16_t dst_port;
+	/*
+	 * The value of destination port to be used in filtering.
+	 * Applies to UDP and TCP traffic.
+	 */
+	uint16_t dst_id;
+	/*
+	 * If set, this value shall represent the Logical VNIC ID of the
+	 * destination VNIC for the RX path and network port id of the
+	 * destination port for the TX path.
+	 */
+	uint16_t mirror_vnic_id;
+	/* Logical VNIC ID of the VNIC where traffic is mirrored. */
+	uint32_t encap_record_id;
+	/* Logical ID of the encapsulation record. */
+	uint32_t unused_4;
+} __attribute__((packed));
+
+/* Output	(24 bytes) */
+struct hwrm_cfa_em_flow_alloc_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint64_t em_filter_id;
+	/* This value is an opaque id into CFA data structures. */
+	uint32_t flow_id;
+	/*
+	 * This is the ID of the flow associated with this filter. This
+	 * value shall be used to match and associate the flow
+	 * identifier returned in completion records. A value of
+	 * 0xFFFFFFFF shall indicate no flow id.
+	 */
+	uint8_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
+/* hwrm_cfa_em_flow_free */
+/* Description: Free an EM flow table entry */
+/* Input	(24 bytes) */
+struct hwrm_cfa_em_flow_free_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format
+	 * for the rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request
+	 * will be optionally completed on. If the value is -1, then no
+	 * CR completion will be generated. Any other value must be a
+	 * valid CR ring_id value for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+	 * - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written
+	 * when the request is complete. This area must be 16B aligned
+	 * and must be cleared to zero before the request is made.
+	 */
+	uint64_t em_filter_id;
+	/* This value is an opaque id into CFA data structures. */
+} __attribute__((packed));
+
+/* Output	(16 bytes) */
+struct hwrm_cfa_em_flow_free_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
+/* hwrm_cfa_em_flow_cfg */
+/*
+ * Description: Configure an EM flow with a new destination VNIC and/or meter.
+ */
+/* Input	(48 bytes) */
+struct hwrm_cfa_em_flow_cfg_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format
+	 * for the rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request
+	 * will be optionally completed on. If the value is -1, then no
+	 * CR completion will be generated. Any other value must be a
+	 * valid CR ring_id value for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+	 * - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written
+	 * when the request is complete. This area must be 16B aligned
+	 * and must be cleared to zero before the request is made.
+	 */
+	uint32_t enables;
+	/* This bit must be '1' for the new_dst_id field to be configured. */
+	#define HWRM_CFA_EM_FLOW_CFG_INPUT_ENABLES_NEW_DST_ID	UINT32_C(0x1)
+	/*
+	 * This bit must be '1' for the new_mirror_vnic_id field to be
+	 * configured.
+	 */
+	#define HWRM_CFA_EM_FLOW_CFG_INPUT_ENABLES_NEW_MIRROR_VNIC_ID	\
+		UINT32_C(0x2)
+	/*
+	 * This bit must be '1' for the new_meter_instance_id field to
+	 * be configured.
+	 */
+	#define HWRM_CFA_EM_FLOW_CFG_INPUT_ENABLES_NEW_METER_INSTANCE_ID  \
+		UINT32_C(0x4)
+	uint32_t unused_0;
+	uint64_t em_filter_id;
+	/* This value is an opaque id into CFA data structures. */
+	uint32_t new_dst_id;
+	/*
+	 * If set, this value shall represent the new Logical VNIC ID of
+	 * the destination VNIC for the RX path and network port id of
+	 * the destination port for the TX path.
+	 */
+	uint32_t new_mirror_vnic_id;
+	/* New Logical VNIC ID of the VNIC where traffic is mirrored. */
+	uint16_t new_meter_instance_id;
+	/*
+	 * New meter to attach to the flow. Specifying the invalid
+	 * instance ID is used to remove any existing meter from the
+	 * flow.
+	 */
+	/*
+	 * A value of 0xfff is considered invalid and
+	 * implies the instance is not configured.
+	 */
+	#define HWRM_CFA_EM_FLOW_CFG_INPUT_NEW_METER_INSTANCE_ID_INVALID \
+		UINT32_C(0xffff)
+	uint16_t unused_1[3];
+} __attribute__((packed));
+
+/* Output	(16 bytes) */
+struct hwrm_cfa_em_flow_cfg_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
 /* hwrm_tunnel_dst_port_query */
 /*
  * Description: This function is called by a driver to query tunnel type
@@ -9591,6 +10674,12 @@ struct hwrm_tunnel_dst_port_query_input {
 	/* Generic Network Virtualization Encapsulation	(Geneve) */
 	#define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_GENEVE \
 		UINT32_C(0x5)
+	/*
+	 * IPV4 over virtual eXtensible Local Area
+	 * Network (IPV4oVXLAN)
+	 */
+	#define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+		UINT32_C(0x9)
 	uint8_t unused_0[7];
 } __attribute__((packed));
 
@@ -9691,6 +10780,12 @@ struct hwrm_tunnel_dst_port_alloc_input {
 	/* Generic Network Virtualization Encapsulation	(Geneve) */
 	#define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
 		UINT32_C(0x5)
+	/*
+	 * IPV4 over virtual eXtensible Local Area
+	 * Network (IPV4oVXLAN)
+	 */
+	#define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+		UINT32_C(0x9)
 	uint8_t unused_0;
 	uint16_t tunnel_dst_port_val;
 	/*
@@ -9781,6 +10876,12 @@ struct hwrm_tunnel_dst_port_free_input {
 	#define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_VXLAN UINT32_C(0x1)
 	/* Generic Network Virtualization Encapsulation	(Geneve) */
 	#define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_GENEVE UINT32_C(0x5)
+	/*
+	 * IPV4 over virtual eXtensible Local Area
+	 * Network (IPV4oVXLAN)
+	 */
+	#define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+		UINT32_C(0x9)
 	uint8_t unused_0;
 	uint16_t tunnel_dst_port_id;
 	/*
@@ -9984,77 +11085,9 @@ struct hwrm_stat_ctx_free_output {
 	 */
 } __attribute__((packed));
 
-/* hwrm_stat_ctx_clr_stats */
-/* Description: This command clears statistics of a context. */
-/* Input	(24 bytes) */
-struct hwrm_stat_ctx_clr_stats_input {
-	uint16_t req_type;
-	/*
-	 * This value indicates what type of request this is. The format
-	 * for the rest of the command is determined by this field.
-	 */
-	uint16_t cmpl_ring;
-	/*
-	 * This value indicates the what completion ring the request
-	 * will be optionally completed on. If the value is -1, then no
-	 * CR completion will be generated. Any other value must be a
-	 * valid CR ring_id value for this function.
-	 */
-	uint16_t seq_id;
-	/* This value indicates the command sequence number. */
-	uint16_t target_id;
-	/*
-	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
-	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
-	 * - HWRM
-	 */
-	uint64_t resp_addr;
-	/*
-	 * This is the host address where the response will be written
-	 * when the request is complete. This area must be 16B aligned
-	 * and must be cleared to zero before the request is made.
-	 */
-	uint32_t stat_ctx_id;
-	/* ID of the statistics context that is being queried. */
-	uint32_t unused_0;
-} __attribute__((packed));
-
-/* Output	(16 bytes) */
-struct hwrm_stat_ctx_clr_stats_output {
-	uint16_t error_code;
-	/*
-	 * Pass/Fail or error type Note: receiver to verify the in
-	 * parameters, and fail the call with an error when appropriate
-	 */
-	uint16_t req_type;
-	/* This field returns the type of original request. */
-	uint16_t seq_id;
-	/* This field provides original sequence number of the command. */
-	uint16_t resp_len;
-	/*
-	 * This field is the length of the response in bytes. The last
-	 * byte of the response is a valid flag that will read as '1'
-	 * when the command has been completely written to memory.
-	 */
-	uint32_t unused_0;
-	uint8_t unused_1;
-	uint8_t unused_2;
-	uint8_t unused_3;
-	uint8_t valid;
-	/*
-	 * This field is used in Output records to indicate that the
-	 * output is completely written to RAM. This field should be
-	 * read as '1' to indicate that the output has been completely
-	 * written. When writing a command completion or response to an
-	 * internal processor, the order of writes has to be such that
-	 * this field is written last.
-	 */
-} __attribute__((packed));
-
 /* hwrm_stat_ctx_query */
 /* Description: This command returns statistics of a context. */
 /* Input (24 bytes) */
-
 struct hwrm_stat_ctx_query_input {
 	uint16_t req_type;
 	/*
@@ -10087,7 +11120,6 @@ struct hwrm_stat_ctx_query_input {
 } __attribute__((packed));
 
 /* Output (176 bytes) */
-
 struct hwrm_stat_ctx_query_output {
 	uint16_t error_code;
 	/*
@@ -10158,6 +11190,73 @@ struct hwrm_stat_ctx_query_output {
 	 */
 } __attribute__((packed));
 
+/* hwrm_stat_ctx_clr_stats */
+/* Description: This command clears statistics of a context. */
+/* Input	(24 bytes) */
+struct hwrm_stat_ctx_clr_stats_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format
+	 * for the rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request
+	 * will be optionally completed on. If the value is -1, then no
+	 * CR completion will be generated. Any other value must be a
+	 * valid CR ring_id value for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+	 * - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written
+	 * when the request is complete. This area must be 16B aligned
+	 * and must be cleared to zero before the request is made.
+	 */
+	uint32_t stat_ctx_id;
+	/* ID of the statistics context that is being queried. */
+	uint32_t unused_0;
+} __attribute__((packed));
+
+/* Output	(16 bytes) */
+struct hwrm_stat_ctx_clr_stats_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
 /* hwrm_exec_fwd_resp */
 /*
  * Description: This command is used to send an encapsulated request to the
@@ -10331,6 +11430,310 @@ struct hwrm_reject_fwd_resp_output {
 	 */
 } __attribute__((packed));
 
+/* hwrm_nvm_get_dir_entries */
+/* Input (24 bytes) */
+struct hwrm_nvm_get_dir_entries_input {
+	uint16_t req_type;
+	uint16_t cmpl_ring;
+	uint16_t seq_id;
+	uint16_t target_id;
+	uint64_t resp_addr;
+	uint64_t host_dest_addr;
+} __attribute__((packed));
+
+/* Output (16 bytes) */
+struct hwrm_nvm_get_dir_entries_output {
+	uint16_t error_code;
+	uint16_t req_type;
+	uint16_t seq_id;
+	uint16_t resp_len;
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+} __attribute__((packed));
+
+
+/* hwrm_nvm_erase_dir_entry */
+/* Input (24 bytes) */
+struct hwrm_nvm_erase_dir_entry_input {
+	uint16_t req_type;
+	uint16_t cmpl_ring;
+	uint16_t seq_id;
+	uint16_t target_id;
+	uint64_t resp_addr;
+	uint16_t dir_idx;
+	uint16_t unused_0[3];
+};
+
+/* Output (16 bytes) */
+struct hwrm_nvm_erase_dir_entry_output {
+	uint16_t error_code;
+	uint16_t req_type;
+	uint16_t seq_id;
+	uint16_t resp_len;
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+};
+
+/* hwrm_nvm_get_dir_info */
+/* Input (16 bytes) */
+struct hwrm_nvm_get_dir_info_input {
+	uint16_t req_type;
+	uint16_t cmpl_ring;
+	uint16_t seq_id;
+	uint16_t target_id;
+	uint64_t resp_addr;
+} __attribute__((packed));
+
+/* Output (24 bytes) */
+struct hwrm_nvm_get_dir_info_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint32_t entries;
+	/* Number of directory entries in the directory. */
+	uint32_t entry_length;
+	/* Size of each directory entry, in bytes. */
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
+/* hwrm_nvm_write */
+/*
+ * Note: Write to the allocated NVRAM of an item referenced by an existing
+ * directory entry.
+ */
+/* Input (48 bytes) */
+struct hwrm_nvm_write_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format
+	 * for the rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request
+	 * will be optionally completed on. If the value is -1, then no
+	 * CR completion will be generated. Any other value must be a
+	 * valid CR ring_id value for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+	 * - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written
+	 * when the request is complete. This area must be 16B aligned
+	 * and must be cleared to zero before the request is made.
+	 */
+	uint64_t host_src_addr;
+	/* 64-bit Host Source Address. This is where the source data is. */
+	uint16_t dir_type;
+	/*
+	 * The Directory Entry Type (valid values are defined in the
+	 * bnxnvm_directory_type enum defined in the file
+	 * bnxnvm_defs.h).
+	 */
+	uint16_t dir_ordinal;
+	/*
+	 * Directory ordinal. The 0-based instance of the combined
+	 * Directory Entry Type and Extension.
+	 */
+	uint16_t dir_ext;
+	/*
+	 * The Directory Entry Extension flags (see BNX_DIR_EXT_* in the
+	 * file bnxnvm_defs.h).
+	 */
+	uint16_t dir_attr;
+	/*
+	 * Directory Entry Attribute flags (see BNX_DIR_ATTR_* in the
+	 * file bnxnvm_defs.h).
+	 */
+	uint32_t dir_data_length;
+	/*
+	 * Length of data to write, in bytes. May be less than or equal
+	 * to the allocated size for the directory entry. The data
+	 * length stored in the directory entry will be updated to
+	 * reflect this value once the write is complete.
+	 */
+	uint16_t option;
+	/* Option. */
+	uint16_t flags;
+	/*
+	 * When this bit is '1', the original active image will not be
+	 * removed. TBD: what purpose is this?
+	 */
+	#define HWRM_NVM_WRITE_INPUT_FLAGS_KEEP_ORIG_ACTIVE_IMG UINT32_C(0x1)
+	uint32_t dir_item_length;
+	/*
+	 * The requested length of the allocated NVM for the item, in
+	 * bytes. This value may be greater than or equal to the
+	 * specified data length (dir_data_length). If this value is
+	 * less than the specified data length, it will be ignored. The
+	 * response will contain the actual allocated item length, which
+	 * may be greater than the requested item length. The purpose
+	 * for allocating more than the required number of bytes for an
+	 * item's data is to pre-allocate extra storage (padding) to
+	 * accommodate the potential future growth of an item (e.g.
+	 * upgraded firmware with a size increase, log growth, expanded
+	 * configuration data).
+	 */
+	uint32_t unused_0;
+} __attribute__((packed));
+
+/* Output (16 bytes) */
+struct hwrm_nvm_write_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint32_t dir_item_length;
+	/*
+	 * Length of the allocated NVM for the item, in bytes. The value
+	 * may be greater than or equal to the specified data length or
+	 * the requested item length. The actual item length used when
+	 * creating a new directory entry will be a multiple of an NVM
+	 * block size.
+	 */
+	uint16_t dir_idx;
+	/* The directory index of the created or modified item. */
+	uint8_t unused_0;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
+/* hwrm_nvm_read */
+/*
+ * Note: Read the contents of an NVRAM item as referenced (indexed) by an
+ * existing directory entry.
+ */
+/* Input (40 bytes) */
+struct hwrm_nvm_read_input {
+	uint16_t req_type;
+	/*
+	 * This value indicates what type of request this is. The format
+	 * for the rest of the command is determined by this field.
+	 */
+	uint16_t cmpl_ring;
+	/*
+	 * This value indicates the what completion ring the request
+	 * will be optionally completed on. If the value is -1, then no
+	 * CR completion will be generated. Any other value must be a
+	 * valid CR ring_id value for this function.
+	 */
+	uint16_t seq_id;
+	/* This value indicates the command sequence number. */
+	uint16_t target_id;
+	/*
+	 * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+	 * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+	 * - HWRM
+	 */
+	uint64_t resp_addr;
+	/*
+	 * This is the host address where the response will be written
+	 * when the request is complete. This area must be 16B aligned
+	 * and must be cleared to zero before the request is made.
+	 */
+	uint64_t host_dest_addr;
+	/*
+	 * 64-bit Host Destination Address. This is the host address
+	 * where the data will be written to.
+	 */
+	uint16_t dir_idx;
+	/* The 0-based index of the directory entry. */
+	uint8_t unused_0;
+	uint8_t unused_1;
+	uint32_t offset;
+	/* The NVRAM byte-offset to read from. */
+	uint32_t len;
+	/* The length of the data to be read, in bytes. */
+	uint32_t unused_2;
+} __attribute__((packed));
+
+/* Output (16 bytes) */
+struct hwrm_nvm_read_output {
+	uint16_t error_code;
+	/*
+	 * Pass/Fail or error type Note: receiver to verify the in
+	 * parameters, and fail the call with an error when appropriate
+	 */
+	uint16_t req_type;
+	/* This field returns the type of original request. */
+	uint16_t seq_id;
+	/* This field provides original sequence number of the command. */
+	uint16_t resp_len;
+	/*
+	 * This field is the length of the response in bytes. The last
+	 * byte of the response is a valid flag that will read as '1'
+	 * when the command has been completely written to memory.
+	 */
+	uint32_t unused_0;
+	uint8_t unused_1;
+	uint8_t unused_2;
+	uint8_t unused_3;
+	uint8_t valid;
+	/*
+	 * This field is used in Output records to indicate that the
+	 * output is completely written to RAM. This field should be
+	 * read as '1' to indicate that the output has been completely
+	 * written. When writing a command completion or response to an
+	 * internal processor, the order of writes has to be such that
+	 * this field is written last.
+	 */
+} __attribute__((packed));
+
 /* Hardware Resource Manager Specification */
 /* Description: This structure is used to specify port description. */
 /*
@@ -10391,11 +11794,28 @@ struct output {
 /* Short Command Structure (16 bytes) */
 struct hwrm_short_input {
 	uint16_t req_type;
+	/*
+	 * This field indicates the type of request in the request
+	 * buffer. The format for the rest of the command (request) is
+	 * determined by this field.
+	 */
 	uint16_t signature;
-	#define HWRM_SHORT_REQ_SIGNATURE_SHORT_CMD	(UINT32_C(0x4321))
+	/*
+	 * This field indicates a signature that is used to identify
+	 * short form of the command listed here. This field shall be
+	 * set to 17185 (0x4321).
+	 */
+	/* Signature indicating this is a short form of HWRM command */
+	#define HWRM_SHORT_REQ_SIGNATURE_SHORT_CMD UINT32_C(0x4321)
 	uint16_t unused_0;
+	/* Reserved for future use. */
 	uint16_t size;
+	/* This value indicates the length of the request. */
 	uint64_t req_addr;
+	/*
+	 * This is the host address where the request was written. This
+	 * area must be 16B aligned.
+	 */
 } __attribute__((packed));
 
 #define HWRM_GET_HWRM_ERROR_CODE(arg) \
diff --git a/drivers/net/bnxt/rte_pmd_bnxt.c b/drivers/net/bnxt/rte_pmd_bnxt.c
index c343d903..a3134074 100644
--- a/drivers/net/bnxt/rte_pmd_bnxt.c
+++ b/drivers/net/bnxt/rte_pmd_bnxt.c
@@ -67,7 +67,7 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg)
 		true : false;
 }
 
-int rte_pmd_bnxt_set_tx_loopback(uint8_t port, uint8_t on)
+int rte_pmd_bnxt_set_tx_loopback(uint16_t port, uint8_t on)
 {
 	struct rte_eth_dev *eth_dev;
 	struct bnxt *bp;
@@ -108,12 +108,12 @@ rte_pmd_bnxt_set_all_queues_drop_en_cb(struct bnxt_vnic_info *vnic, void *onptr)
 	vnic->bd_stall = !(*on);
 }
 
-int rte_pmd_bnxt_set_all_queues_drop_en(uint8_t port, uint8_t on)
+int rte_pmd_bnxt_set_all_queues_drop_en(uint16_t port, uint8_t on)
 {
 	struct rte_eth_dev *eth_dev;
 	struct bnxt *bp;
 	uint32_t i;
-	int rc;
+	int rc = -EINVAL;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
 
@@ -159,7 +159,7 @@ int rte_pmd_bnxt_set_all_queues_drop_en(uint8_t port, uint8_t on)
 	return rc;
 }
 
-int rte_pmd_bnxt_set_vf_mac_addr(uint8_t port, uint16_t vf,
+int rte_pmd_bnxt_set_vf_mac_addr(uint16_t port, uint16_t vf,
 				struct ether_addr *mac_addr)
 {
 	struct rte_eth_dev *dev;
@@ -191,7 +191,7 @@ int rte_pmd_bnxt_set_vf_mac_addr(uint8_t port, uint16_t vf,
 	return rc;
 }
 
-int rte_pmd_bnxt_set_vf_rate_limit(uint8_t port, uint16_t vf,
+int rte_pmd_bnxt_set_vf_rate_limit(uint16_t port, uint16_t vf,
 				uint16_t tx_rate, uint64_t q_msk)
 {
 	struct rte_eth_dev *eth_dev;
@@ -241,7 +241,7 @@ int rte_pmd_bnxt_set_vf_rate_limit(uint8_t port, uint16_t vf,
 	return rc;
 }
 
-int rte_pmd_bnxt_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on)
+int rte_pmd_bnxt_set_vf_mac_anti_spoof(uint16_t port, uint16_t vf, uint8_t on)
 {
 	struct rte_eth_dev_info dev_info;
 	struct rte_eth_dev *dev;
@@ -294,7 +294,7 @@ int rte_pmd_bnxt_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on)
 	return rc;
 }
 
-int rte_pmd_bnxt_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on)
+int rte_pmd_bnxt_set_vf_vlan_anti_spoof(uint16_t port, uint16_t vf, uint8_t on)
 {
 	struct rte_eth_dev_info dev_info;
 	struct rte_eth_dev *dev;
@@ -322,9 +322,6 @@ int rte_pmd_bnxt_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on)
 	if (vf >= dev_info.max_vfs)
 		return -EINVAL;
 
-	if (on == bp->pf.vf_info[vf].vlan_spoof_en)
-		return 0;
-
 	rc = bnxt_hwrm_func_cfg_vf_set_vlan_anti_spoof(bp, vf, on);
 	if (!rc) {
 		bp->pf.vf_info[vf].vlan_spoof_en = on;
@@ -350,7 +347,7 @@ rte_pmd_bnxt_set_vf_vlan_stripq_cb(struct bnxt_vnic_info *vnic, void *onptr)
 }
 
 int
-rte_pmd_bnxt_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on)
+rte_pmd_bnxt_set_vf_vlan_stripq(uint16_t port, uint16_t vf, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct rte_eth_dev_info dev_info;
@@ -385,7 +382,7 @@ rte_pmd_bnxt_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on)
 	return rc;
 }
 
-int rte_pmd_bnxt_set_vf_rxmode(uint8_t port, uint16_t vf,
+int rte_pmd_bnxt_set_vf_rxmode(uint16_t port, uint16_t vf,
 				uint16_t rx_mask, uint8_t on)
 {
 	struct rte_eth_dev *dev;
@@ -409,20 +406,19 @@ int rte_pmd_bnxt_set_vf_rxmode(uint8_t port, uint16_t vf,
 	if (vf >= bp->pdev->max_vfs)
 		return -EINVAL;
 
-	if (rx_mask & (ETH_VMDQ_ACCEPT_UNTAG | ETH_VMDQ_ACCEPT_HASH_MC)) {
+	if (rx_mask & ETH_VMDQ_ACCEPT_UNTAG) {
 		RTE_LOG(ERR, PMD, "Currently cannot toggle this setting\n");
 		return -ENOTSUP;
 	}
 
-	if (rx_mask & ETH_VMDQ_ACCEPT_HASH_UC && !on) {
-		RTE_LOG(ERR, PMD, "Currently cannot disable UC Rx\n");
-		return -ENOTSUP;
-	}
+	/* Is this really the correct mapping?  VFd seems to think it is. */
+	if (rx_mask & ETH_VMDQ_ACCEPT_HASH_UC)
+		flag |= BNXT_VNIC_INFO_PROMISC;
 
 	if (rx_mask & ETH_VMDQ_ACCEPT_BROADCAST)
 		flag |= BNXT_VNIC_INFO_BCAST;
 	if (rx_mask & ETH_VMDQ_ACCEPT_MULTICAST)
-		flag |= BNXT_VNIC_INFO_ALLMULTI;
+		flag |= BNXT_VNIC_INFO_ALLMULTI | BNXT_VNIC_INFO_MCAST;
 
 	if (on)
 		bp->pf.vf_info[vf].l2_rx_mask |= flag;
@@ -477,7 +473,7 @@ static int bnxt_set_vf_table(struct bnxt *bp, uint16_t vf)
 	return rc;
 }
 
-int rte_pmd_bnxt_set_vf_vlan_filter(uint8_t port, uint16_t vlan,
+int rte_pmd_bnxt_set_vf_vlan_filter(uint16_t port, uint16_t vlan,
 				    uint64_t vf_mask, uint8_t vlan_on)
 {
 	struct bnxt_vlan_table_entry *ve;
@@ -570,7 +566,7 @@ int rte_pmd_bnxt_set_vf_vlan_filter(uint8_t port, uint16_t vlan,
 	return rc;
 }
 
-int rte_pmd_bnxt_get_vf_stats(uint8_t port,
+int rte_pmd_bnxt_get_vf_stats(uint16_t port,
 			      uint16_t vf_id,
 			      struct rte_eth_stats *stats)
 {
@@ -598,7 +594,7 @@ int rte_pmd_bnxt_get_vf_stats(uint8_t port,
 	return bnxt_hwrm_func_qstats(bp, bp->pf.first_vf_id + vf_id, stats);
 }
 
-int rte_pmd_bnxt_reset_vf_stats(uint8_t port,
+int rte_pmd_bnxt_reset_vf_stats(uint16_t port,
 				uint16_t vf_id)
 {
 	struct rte_eth_dev *dev;
@@ -625,7 +621,7 @@ int rte_pmd_bnxt_reset_vf_stats(uint8_t port,
 	return bnxt_hwrm_func_clr_stats(bp, bp->pf.first_vf_id + vf_id);
 }
 
-int rte_pmd_bnxt_get_vf_rx_status(uint8_t port, uint16_t vf_id)
+int rte_pmd_bnxt_get_vf_rx_status(uint16_t port, uint16_t vf_id)
 {
 	struct rte_eth_dev *dev;
 	struct rte_eth_dev_info dev_info;
@@ -651,7 +647,7 @@ int rte_pmd_bnxt_get_vf_rx_status(uint8_t port, uint16_t vf_id)
 	return bnxt_vf_vnic_count(bp, vf_id);
 }
 
-int rte_pmd_bnxt_get_vf_tx_drop_count(uint8_t port, uint16_t vf_id,
+int rte_pmd_bnxt_get_vf_tx_drop_count(uint16_t port, uint16_t vf_id,
 				      uint64_t *count)
 {
 	struct rte_eth_dev *dev;
@@ -679,7 +675,7 @@ int rte_pmd_bnxt_get_vf_tx_drop_count(uint8_t port, uint16_t vf_id,
 					     count);
 }
 
-int rte_pmd_bnxt_mac_addr_add(uint8_t port, struct ether_addr *addr,
+int rte_pmd_bnxt_mac_addr_add(uint16_t port, struct ether_addr *addr,
 				uint32_t vf_id)
 {
 	struct rte_eth_dev *dev;
@@ -710,7 +706,7 @@ int rte_pmd_bnxt_mac_addr_add(uint8_t port, struct ether_addr *addr,
 	/* If the VF currently uses a random MAC, update default to this one */
 	if (bp->pf.vf_info[vf_id].random_mac) {
 		if (rte_pmd_bnxt_get_vf_rx_status(port, vf_id) <= 0)
-			rc = bnxt_hwrm_func_vf_mac(bp, vf_id, (uint8_t *)addr);
+			bnxt_hwrm_func_vf_mac(bp, vf_id, (uint8_t *)addr);
 	}
 
 	/* query the default VNIC id used by the function */
@@ -731,7 +727,7 @@ int rte_pmd_bnxt_mac_addr_add(uint8_t port, struct ether_addr *addr,
 		    (HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR |
 		     HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR_MASK) &&
 		    memcmp(addr, filter->l2_addr, ETHER_ADDR_LEN) == 0) {
-			bnxt_hwrm_clear_filter(bp, filter);
+			bnxt_hwrm_clear_l2_filter(bp, filter);
 			break;
 		}
 	}
@@ -749,14 +745,14 @@ int rte_pmd_bnxt_mac_addr_add(uint8_t port, struct ether_addr *addr,
 	/* Do not add a filter for the default MAC */
 	if (bnxt_hwrm_func_qcfg_vf_default_mac(bp, vf_id, &dflt_mac) ||
 	    memcmp(filter->l2_addr, dflt_mac.addr_bytes, ETHER_ADDR_LEN))
-		rc = bnxt_hwrm_set_filter(bp, vnic.fw_vnic_id, filter);
+		rc = bnxt_hwrm_set_l2_filter(bp, vnic.fw_vnic_id, filter);
 
 exit:
 	return rc;
 }
 
 int
-rte_pmd_bnxt_set_vf_vlan_insert(uint8_t port, uint16_t vf,
+rte_pmd_bnxt_set_vf_vlan_insert(uint16_t port, uint16_t vf,
 		uint16_t vlan_id)
 {
 	struct rte_eth_dev *dev;
@@ -793,7 +789,7 @@ rte_pmd_bnxt_set_vf_vlan_insert(uint8_t port, uint16_t vf,
 	return rc;
 }
 
-int rte_pmd_bnxt_set_vf_persist_stats(uint8_t port, uint16_t vf, uint8_t on)
+int rte_pmd_bnxt_set_vf_persist_stats(uint16_t port, uint16_t vf, uint8_t on)
 {
 	struct rte_eth_dev_info dev_info;
 	struct rte_eth_dev *dev;
diff --git a/drivers/net/bnxt/rte_pmd_bnxt.h b/drivers/net/bnxt/rte_pmd_bnxt.h
index c4c4770e..f881d30d 100644
--- a/drivers/net/bnxt/rte_pmd_bnxt.h
+++ b/drivers/net/bnxt/rte_pmd_bnxt.h
@@ -78,7 +78,7 @@ struct rte_pmd_bnxt_mb_event_param {
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_bnxt_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on);
+int rte_pmd_bnxt_set_vf_mac_anti_spoof(uint16_t port, uint16_t vf, uint8_t on);
 
 /**
  * Set the VF MAC address.
@@ -94,7 +94,7 @@ int rte_pmd_bnxt_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on);
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if *vf* or *mac_addr* is invalid.
  */
-int rte_pmd_bnxt_set_vf_mac_addr(uint8_t port, uint16_t vf,
+int rte_pmd_bnxt_set_vf_mac_addr(uint16_t port, uint16_t vf,
 		struct ether_addr *mac_addr);
 
 /**
@@ -115,7 +115,7 @@ int rte_pmd_bnxt_set_vf_mac_addr(uint8_t port, uint16_t vf,
  *   - (-EINVAL) if bad parameter.
  */
 int
-rte_pmd_bnxt_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on);
+rte_pmd_bnxt_set_vf_vlan_stripq(uint16_t port, uint16_t vf, uint8_t on);
 
 /**
  * Enable/Disable vf vlan insert
@@ -134,7 +134,7 @@ rte_pmd_bnxt_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on);
  *   - (-EINVAL) if bad parameter.
  */
 int
-rte_pmd_bnxt_set_vf_vlan_insert(uint8_t port, uint16_t vf,
+rte_pmd_bnxt_set_vf_vlan_insert(uint16_t port, uint16_t vf,
 		uint16_t vlan_id);
 
 /**
@@ -156,7 +156,7 @@ rte_pmd_bnxt_set_vf_vlan_insert(uint8_t port, uint16_t vf,
  *   - (-ENODEV) if *port_id* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_bnxt_set_vf_vlan_filter(uint8_t port, uint16_t vlan,
+int rte_pmd_bnxt_set_vf_vlan_filter(uint16_t port, uint16_t vlan,
 				    uint64_t vf_mask, uint8_t vlan_on);
 
 /**
@@ -173,7 +173,7 @@ int rte_pmd_bnxt_set_vf_vlan_filter(uint8_t port, uint16_t vlan,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_bnxt_set_tx_loopback(uint8_t port, uint8_t on);
+int rte_pmd_bnxt_set_tx_loopback(uint16_t port, uint8_t on);
 
 /**
  * set all queues drop enable bit
@@ -189,7 +189,7 @@ int rte_pmd_bnxt_set_tx_loopback(uint8_t port, uint8_t on);
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_bnxt_set_all_queues_drop_en(uint8_t port, uint8_t on);
+int rte_pmd_bnxt_set_all_queues_drop_en(uint16_t port, uint8_t on);
 
 /**
  * Set the VF rate limit.
@@ -207,7 +207,7 @@ int rte_pmd_bnxt_set_all_queues_drop_en(uint8_t port, uint8_t on);
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if *vf* or *mac_addr* is invalid.
  */
-int rte_pmd_bnxt_set_vf_rate_limit(uint8_t port, uint16_t vf,
+int rte_pmd_bnxt_set_vf_rate_limit(uint16_t port, uint16_t vf,
 				uint16_t tx_rate, uint64_t q_msk);
 
 /**
@@ -226,7 +226,7 @@ int rte_pmd_bnxt_set_vf_rate_limit(uint8_t port, uint16_t vf,
  *   - (-EINVAL) if bad parameter.
  */
 
-int rte_pmd_bnxt_get_vf_stats(uint8_t port,
+int rte_pmd_bnxt_get_vf_stats(uint16_t port,
 			      uint16_t vf_id,
 			      struct rte_eth_stats *stats);
 
@@ -242,7 +242,7 @@ int rte_pmd_bnxt_get_vf_stats(uint8_t port,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_bnxt_reset_vf_stats(uint8_t port,
+int rte_pmd_bnxt_reset_vf_stats(uint16_t port,
 				uint16_t vf_id);
 
 /**
@@ -261,7 +261,7 @@ int rte_pmd_bnxt_reset_vf_stats(uint8_t port,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_bnxt_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on);
+int rte_pmd_bnxt_set_vf_vlan_anti_spoof(uint16_t port, uint16_t vf, uint8_t on);
 
 /**
  * Set RX L2 Filtering mode of a VF of an Ethernet device.
@@ -280,7 +280,7 @@ int rte_pmd_bnxt_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on);
  *   - (-ENODEV) if *port_id* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_bnxt_set_vf_rxmode(uint8_t port, uint16_t vf,
+int rte_pmd_bnxt_set_vf_rxmode(uint16_t port, uint16_t vf,
 				uint16_t rx_mask, uint8_t on);
 
 /**
@@ -297,7 +297,7 @@ int rte_pmd_bnxt_set_vf_rxmode(uint8_t port, uint16_t vf,
  *   - (-ENOMEM) on an allocation failure
  *   - (-1) firmware interface error
  */
-int rte_pmd_bnxt_get_vf_rx_status(uint8_t port, uint16_t vf_id);
+int rte_pmd_bnxt_get_vf_rx_status(uint16_t port, uint16_t vf_id);
 
 /**
  * Queries the TX drop counter for the function
@@ -313,7 +313,7 @@ int rte_pmd_bnxt_get_vf_rx_status(uint8_t port, uint16_t vf_id);
  *   - (-EINVAL) invalid vf_id specified.
  *   - (-ENOTSUP) Ethernet device is not a PF
  */
-int rte_pmd_bnxt_get_vf_tx_drop_count(uint8_t port, uint16_t vf_id,
+int rte_pmd_bnxt_get_vf_tx_drop_count(uint16_t port, uint16_t vf_id,
 				      uint64_t *count);
 
 /**
@@ -331,7 +331,7 @@ int rte_pmd_bnxt_get_vf_tx_drop_count(uint8_t port, uint16_t vf_id,
  *   - (-ENOTSUP) Ethernet device is not a PF
  *   - (-ENOMEM) on an allocation failure
  */
-int rte_pmd_bnxt_mac_addr_add(uint8_t port, struct ether_addr *mac_addr,
+int rte_pmd_bnxt_mac_addr_add(uint16_t port, struct ether_addr *mac_addr,
 				uint32_t vf_id);
 
 /**
@@ -350,5 +350,5 @@ int rte_pmd_bnxt_mac_addr_add(uint8_t port, struct ether_addr *mac_addr,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_bnxt_set_vf_persist_stats(uint8_t port, uint16_t vf, uint8_t on);
+int rte_pmd_bnxt_set_vf_persist_stats(uint16_t port, uint16_t vf, uint8_t on);
 #endif /* _PMD_BNXT_H_ */
diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile
index 910c932d..dea1bd5c 100644
--- a/drivers/net/bonding/Makefile
+++ b/drivers/net/bonding/Makefile
@@ -38,10 +38,14 @@ LIB = librte_pmd_bond.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_cmdline
+LDLIBS += -lrte_pci -lrte_bus_pci
+LDLIBS += -lrte_bus_vdev
 
-EXPORT_MAP := rte_eth_bond_version.map
+EXPORT_MAP := rte_pmd_bond_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 #
 # all source are stored in SRCS-y
diff --git a/drivers/net/bonding/rte_eth_bond.h b/drivers/net/bonding/rte_eth_bond.h
index 8efbf071..87ff2917 100644
--- a/drivers/net/bonding/rte_eth_bond.h
+++ b/drivers/net/bonding/rte_eth_bond.h
@@ -151,7 +151,7 @@ rte_eth_bond_free(const char *name);
  *	0 on success, negative value otherwise
  */
 int
-rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id);
+rte_eth_bond_slave_add(uint16_t bonded_port_id, uint16_t slave_port_id);
 
 /**
  * Remove a slave rte_eth_dev device from the bonded device
@@ -163,7 +163,7 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id);
  *	0 on success, negative value otherwise
  */
 int
-rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id);
+rte_eth_bond_slave_remove(uint16_t bonded_port_id, uint16_t slave_port_id);
 
 /**
  * Set link bonding mode of bonded device
@@ -175,7 +175,7 @@ rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id);
  *	0 on success, negative value otherwise
  */
 int
-rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode);
+rte_eth_bond_mode_set(uint16_t bonded_port_id, uint8_t mode);
 
 /**
  * Get link bonding mode of bonded device
@@ -186,7 +186,7 @@ rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode);
  *	link bonding mode on success, negative value otherwise
  */
 int
-rte_eth_bond_mode_get(uint8_t bonded_port_id);
+rte_eth_bond_mode_get(uint16_t bonded_port_id);
 
 /**
  * Set slave rte_eth_dev as primary slave of bonded device
@@ -198,7 +198,7 @@ rte_eth_bond_mode_get(uint8_t bonded_port_id);
  *	0 on success, negative value otherwise
  */
 int
-rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id);
+rte_eth_bond_primary_set(uint16_t bonded_port_id, uint16_t slave_port_id);
 
 /**
  * Get primary slave of bonded device
@@ -209,7 +209,7 @@ rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id);
  *	Port Id of primary slave on success, -1 on failure
  */
 int
-rte_eth_bond_primary_get(uint8_t bonded_port_id);
+rte_eth_bond_primary_get(uint16_t bonded_port_id);
 
 /**
  * Populate an array with list of the slaves port id's of the bonded device
@@ -223,7 +223,8 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id);
  *	negative value otherwise
  */
 int
-rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len);
+rte_eth_bond_slaves_get(uint16_t bonded_port_id, uint16_t slaves[],
+			uint16_t len);
 
 /**
  * Populate an array with list of the active slaves port id's of the bonded
@@ -238,8 +239,8 @@ rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len);
  *	negative value otherwise
  */
 int
-rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[],
-		uint8_t len);
+rte_eth_bond_active_slaves_get(uint16_t bonded_port_id, uint16_t slaves[],
+				uint16_t len);
 
 /**
  * Set explicit MAC address to use on bonded device and it's slaves.
@@ -252,7 +253,7 @@ rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[],
  *	0 on success, negative value otherwise
  */
 int
-rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
+rte_eth_bond_mac_address_set(uint16_t bonded_port_id,
 		struct ether_addr *mac_addr);
 
 /**
@@ -265,7 +266,7 @@ rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
  *	0 on success, negative value otherwise
  */
 int
-rte_eth_bond_mac_address_reset(uint8_t bonded_port_id);
+rte_eth_bond_mac_address_reset(uint16_t bonded_port_id);
 
 /**
  * Set the transmit policy for bonded device to use when it is operating in
@@ -279,7 +280,7 @@ rte_eth_bond_mac_address_reset(uint8_t bonded_port_id);
  *	0 on success, negative value otherwise.
  */
 int
-rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy);
+rte_eth_bond_xmit_policy_set(uint16_t bonded_port_id, uint8_t policy);
 
 /**
  * Get the transmit policy set on bonded device for balance mode operation
@@ -290,7 +291,7 @@ rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy);
  *	Balance transmit policy on success, negative value otherwise.
  */
 int
-rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id);
+rte_eth_bond_xmit_policy_get(uint16_t bonded_port_id);
 
 /**
  * Set the link monitoring frequency (in ms) for monitoring the link status of
@@ -304,7 +305,7 @@ rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id);
  */
 
 int
-rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms);
+rte_eth_bond_link_monitoring_set(uint16_t bonded_port_id, uint32_t internal_ms);
 
 /**
  * Get the current link monitoring frequency (in ms) for monitoring of the link
@@ -316,7 +317,7 @@ rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms);
  *	Monitoring interval on success, negative value otherwise.
  */
 int
-rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id);
+rte_eth_bond_link_monitoring_get(uint16_t bonded_port_id);
 
 
 /**
@@ -330,7 +331,8 @@ rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id);
  *  0 on success, negative value otherwise.
  */
 int
-rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms);
+rte_eth_bond_link_down_prop_delay_set(uint16_t bonded_port_id,
+				       uint32_t delay_ms);
 
 /**
  * Get the period in milliseconds set for delaying the disabling of a bonded
@@ -342,7 +344,7 @@ rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
  *  Delay period on success, negative value otherwise.
  */
 int
-rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id);
+rte_eth_bond_link_down_prop_delay_get(uint16_t bonded_port_id);
 
 /**
  * Set the period in milliseconds for delaying the enabling of a bonded link
@@ -355,7 +357,8 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id);
  *  0 on success, negative value otherwise.
  */
 int
-rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms);
+rte_eth_bond_link_up_prop_delay_set(uint16_t bonded_port_id,
+				    uint32_t delay_ms);
 
 /**
  * Get the period in milliseconds set for delaying the enabling of a bonded
@@ -367,7 +370,7 @@ rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms);
  *  Delay period on success, negative value otherwise.
  */
 int
-rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id);
+rte_eth_bond_link_up_prop_delay_get(uint16_t bonded_port_id);
 
 
 #ifdef __cplusplus
diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c b/drivers/net/bonding/rte_eth_bond_8023ad.c
index 20b5a896..eee9e502 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.c
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
@@ -209,7 +209,7 @@ set_warning_flags(struct port *port, uint16_t flags)
 }
 
 static void
-show_warnings(uint8_t slave_id)
+show_warnings(uint16_t slave_id)
 {
 	struct port *port = &mode_8023ad_ports[slave_id];
 	uint8_t warnings;
@@ -278,7 +278,7 @@ record_default(struct port *port)
  * @param port			Port on which LACPDU was received.
  */
 static void
-rx_machine(struct bond_dev_private *internals, uint8_t slave_id,
+rx_machine(struct bond_dev_private *internals, uint16_t slave_id,
 		struct lacpdu *lacp)
 {
 	struct port *agg, *port = &mode_8023ad_ports[slave_id];
@@ -399,7 +399,7 @@ rx_machine(struct bond_dev_private *internals, uint8_t slave_id,
  * @param port			Port to handle state machine.
  */
 static void
-periodic_machine(struct bond_dev_private *internals, uint8_t slave_id)
+periodic_machine(struct bond_dev_private *internals, uint16_t slave_id)
 {
 	struct port *port = &mode_8023ad_ports[slave_id];
 	/* Calculate if either site is LACP enabled */
@@ -461,7 +461,7 @@ periodic_machine(struct bond_dev_private *internals, uint8_t slave_id)
  * @param port			Port to handle state machine.
  */
 static void
-mux_machine(struct bond_dev_private *internals, uint8_t slave_id)
+mux_machine(struct bond_dev_private *internals, uint16_t slave_id)
 {
 	struct port *port = &mode_8023ad_ports[slave_id];
 
@@ -564,7 +564,7 @@ mux_machine(struct bond_dev_private *internals, uint8_t slave_id)
  * @param port
  */
 static void
-tx_machine(struct bond_dev_private *internals, uint8_t slave_id)
+tx_machine(struct bond_dev_private *internals, uint16_t slave_id)
 {
 	struct port *agg, *port = &mode_8023ad_ports[slave_id];
 
@@ -688,11 +688,11 @@ static void
 selection_logic(struct bond_dev_private *internals, uint8_t slave_id)
 {
 	struct port *agg, *port;
-	uint8_t slaves_count, new_agg_id, i, j = 0;
-	uint8_t *slaves;
+	uint16_t slaves_count, new_agg_id, i, j = 0;
+	uint16_t *slaves;
 	uint64_t agg_bandwidth[8] = {0};
 	uint64_t agg_count[8] = {0};
-	uint8_t default_slave = 0;
+	uint16_t default_slave = 0;
 	uint8_t mode_count_id, mode_band_id;
 	struct rte_eth_link link_info;
 
@@ -923,7 +923,8 @@ bond_mode_8023ad_periodic_cb(void *arg)
 }
 
 void
-bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_id)
+bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev,
+				uint16_t slave_id)
 {
 	struct bond_dev_private *internals = bond_dev->data->dev_private;
 
@@ -951,7 +952,7 @@ bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_id)
 	memcpy(&port->actor, &initial, sizeof(struct port_params));
 	/* Standard requires that port ID must be grater than 0.
 	 * Add 1 do get corresponding port_number */
-	port->actor.port_number = rte_cpu_to_be_16((uint16_t)slave_id + 1);
+	port->actor.port_number = rte_cpu_to_be_16(slave_id + 1);
 
 	memcpy(&port->partner, &initial, sizeof(struct port_params));
 
@@ -1021,37 +1022,30 @@ bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, uint8_t slave_id)
 }
 
 int
-bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev,
-		uint8_t slave_id)
+bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev __rte_unused,
+		uint16_t slave_id)
 {
-	struct bond_dev_private *internals = bond_dev->data->dev_private;
 	void *pkt = NULL;
-	struct port *port;
-	uint8_t i;
+	struct port *port = NULL;
+	uint8_t old_partner_state;
 
-	/* Given slave must be in active list */
-	RTE_ASSERT(find_slave_by_id(internals->active_slaves,
-	internals->active_slave_count, slave_id) < internals->active_slave_count);
+	port = &mode_8023ad_ports[slave_id];
 
-	/* Exclude slave from transmit policy. If this slave is an aggregator
-	 * make all aggregated slaves unselected to force selection logic
-	 * to select suitable aggregator for this port. */
-	for (i = 0; i < internals->active_slave_count; i++) {
-		port = &mode_8023ad_ports[internals->active_slaves[i]];
-		if (port->aggregator_port_id != slave_id)
-			continue;
+	ACTOR_STATE_CLR(port, AGGREGATION);
+	port->selected = UNSELECTED;
 
-		port->selected = UNSELECTED;
+	old_partner_state = port->partner_state;
+	record_default(port);
 
-		/* Use default aggregator */
-		port->aggregator_port_id = internals->active_slaves[i];
-	}
+	/* If partner timeout state changes then disable timer */
+	if (!((old_partner_state ^ port->partner_state) &
+			STATE_LACP_SHORT_TIMEOUT))
+		timer_cancel(&port->current_while_timer);
 
-	port = &mode_8023ad_ports[slave_id];
-	port->selected = UNSELECTED;
-	port->actor_state &= ~(STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
-			STATE_COLLECTING);
+	PARTNER_STATE_CLR(port, AGGREGATION);
+	ACTOR_STATE_CLR(port, EXPIRED);
 
+	/* flush rx/tx rings */
 	while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
 		rte_pktmbuf_free((struct rte_mbuf *)pkt);
 
@@ -1066,7 +1060,7 @@ bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
 	struct bond_dev_private *internals = bond_dev->data->dev_private;
 	struct ether_addr slave_addr;
 	struct port *slave, *agg_slave;
-	uint8_t slave_id, i, j;
+	uint16_t slave_id, i, j;
 
 	bond_mode_8023ad_stop(bond_dev);
 
@@ -1111,27 +1105,6 @@ bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
 	conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
 	conf->update_timeout_ms = mode4->update_timeout_us / 1000;
 	conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
-}
-
-static void
-bond_mode_8023ad_conf_get_v1607(struct rte_eth_dev *dev,
-		struct rte_eth_bond_8023ad_conf *conf)
-{
-	struct bond_dev_private *internals = dev->data->dev_private;
-	struct mode8023ad_private *mode4 = &internals->mode4;
-
-	bond_mode_8023ad_conf_get(dev, conf);
-	conf->slowrx_cb = mode4->slowrx_cb;
-}
-
-static void
-bond_mode_8023ad_conf_get_v1708(struct rte_eth_dev *dev,
-		struct rte_eth_bond_8023ad_conf *conf)
-{
-	struct bond_dev_private *internals = dev->data->dev_private;
-	struct mode8023ad_private *mode4 = &internals->mode4;
-
-	bond_mode_8023ad_conf_get(dev, conf);
 	conf->slowrx_cb = mode4->slowrx_cb;
 	conf->agg_selection = mode4->agg_selection;
 }
@@ -1171,27 +1144,6 @@ bond_mode_8023ad_conf_assign(struct mode8023ad_private *mode4,
 	mode4->dedicated_queues.tx_qid = UINT16_MAX;
 }
 
-static void
-bond_mode_8023ad_setup_v20(struct rte_eth_dev *dev,
-		struct rte_eth_bond_8023ad_conf *conf)
-{
-	struct rte_eth_bond_8023ad_conf def_conf;
-	struct bond_dev_private *internals = dev->data->dev_private;
-	struct mode8023ad_private *mode4 = &internals->mode4;
-
-	if (conf == NULL) {
-		conf = &def_conf;
-		bond_mode_8023ad_conf_get_default(conf);
-	}
-
-	bond_mode_8023ad_stop(dev);
-	bond_mode_8023ad_conf_assign(mode4, conf);
-
-	if (dev->data->dev_started)
-		bond_mode_8023ad_start(dev);
-}
-
-
 void
 bond_mode_8023ad_setup(struct rte_eth_dev *dev,
 		struct rte_eth_bond_8023ad_conf *conf)
@@ -1207,27 +1159,6 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev,
 
 	bond_mode_8023ad_stop(dev);
 	bond_mode_8023ad_conf_assign(mode4, conf);
-
-
-	if (dev->data->dev_started)
-		bond_mode_8023ad_start(dev);
-}
-
-static void
-bond_mode_8023ad_setup_v1708(struct rte_eth_dev *dev,
-		struct rte_eth_bond_8023ad_conf *conf)
-{
-	struct rte_eth_bond_8023ad_conf def_conf;
-	struct bond_dev_private *internals = dev->data->dev_private;
-	struct mode8023ad_private *mode4 = &internals->mode4;
-
-	if (conf == NULL) {
-		conf = &def_conf;
-		bond_mode_8023ad_conf_get_default(conf);
-	}
-
-	bond_mode_8023ad_stop(dev);
-	bond_mode_8023ad_conf_assign(mode4, conf);
 	mode4->slowrx_cb = conf->slowrx_cb;
 	mode4->agg_selection = AGG_STABLE;
 
@@ -1277,7 +1208,7 @@ bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
 
 void
 bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
-	uint8_t slave_id, struct rte_mbuf *pkt)
+				  uint16_t slave_id, struct rte_mbuf *pkt)
 {
 	struct mode8023ad_private *mode4 = &internals->mode4;
 	struct port *port = &mode_8023ad_ports[slave_id];
@@ -1358,7 +1289,7 @@ free_out:
 }
 
 int
-rte_eth_bond_8023ad_conf_get_v20(uint8_t port_id,
+rte_eth_bond_8023ad_conf_get(uint16_t port_id,
 		struct rte_eth_bond_8023ad_conf *conf)
 {
 	struct rte_eth_dev *bond_dev;
@@ -1373,49 +1304,9 @@ rte_eth_bond_8023ad_conf_get_v20(uint8_t port_id,
 	bond_mode_8023ad_conf_get(bond_dev, conf);
 	return 0;
 }
-VERSION_SYMBOL(rte_eth_bond_8023ad_conf_get, _v20, 2.0);
 
 int
-rte_eth_bond_8023ad_conf_get_v1607(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf)
-{
-	struct rte_eth_dev *bond_dev;
-
-	if (valid_bonded_port_id(port_id) != 0)
-		return -EINVAL;
-
-	if (conf == NULL)
-		return -EINVAL;
-
-	bond_dev = &rte_eth_devices[port_id];
-	bond_mode_8023ad_conf_get_v1607(bond_dev, conf);
-	return 0;
-}
-VERSION_SYMBOL(rte_eth_bond_8023ad_conf_get, _v1607, 16.07);
-
-int
-rte_eth_bond_8023ad_conf_get_v1708(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf)
-{
-	struct rte_eth_dev *bond_dev;
-
-	if (valid_bonded_port_id(port_id) != 0)
-		return -EINVAL;
-
-	if (conf == NULL)
-		return -EINVAL;
-
-	bond_dev = &rte_eth_devices[port_id];
-	bond_mode_8023ad_conf_get_v1708(bond_dev, conf);
-	return 0;
-}
-MAP_STATIC_SYMBOL(int rte_eth_bond_8023ad_conf_get(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf),
-		rte_eth_bond_8023ad_conf_get_v1708);
-BIND_DEFAULT_SYMBOL(rte_eth_bond_8023ad_conf_get, _v1708, 17.08);
-
-int
-rte_eth_bond_8023ad_agg_selection_set(uint8_t port_id,
+rte_eth_bond_8023ad_agg_selection_set(uint16_t port_id,
 		enum rte_bond_8023ad_agg_selection agg_selection)
 {
 	struct rte_eth_dev *bond_dev;
@@ -1437,7 +1328,7 @@ rte_eth_bond_8023ad_agg_selection_set(uint8_t port_id,
 	return 0;
 }
 
-int rte_eth_bond_8023ad_agg_selection_get(uint8_t port_id)
+int rte_eth_bond_8023ad_agg_selection_get(uint16_t port_id)
 {
 	struct rte_eth_dev *bond_dev;
 	struct bond_dev_private *internals;
@@ -1458,7 +1349,7 @@ int rte_eth_bond_8023ad_agg_selection_get(uint8_t port_id)
 
 
 static int
-bond_8023ad_setup_validate(uint8_t port_id,
+bond_8023ad_setup_validate(uint16_t port_id,
 		struct rte_eth_bond_8023ad_conf *conf)
 {
 	if (valid_bonded_port_id(port_id) != 0)
@@ -1482,26 +1373,9 @@ bond_8023ad_setup_validate(uint8_t port_id,
 	return 0;
 }
 
-int
-rte_eth_bond_8023ad_setup_v20(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf)
-{
-	struct rte_eth_dev *bond_dev;
-	int err;
-
-	err = bond_8023ad_setup_validate(port_id, conf);
-	if (err != 0)
-		return err;
-
-	bond_dev = &rte_eth_devices[port_id];
-	bond_mode_8023ad_setup_v20(bond_dev, conf);
-
-	return 0;
-}
-VERSION_SYMBOL(rte_eth_bond_8023ad_setup, _v20, 2.0);
 
 int
-rte_eth_bond_8023ad_setup_v1607(uint8_t port_id,
+rte_eth_bond_8023ad_setup(uint16_t port_id,
 		struct rte_eth_bond_8023ad_conf *conf)
 {
 	struct rte_eth_dev *bond_dev;
@@ -1516,37 +1390,13 @@ rte_eth_bond_8023ad_setup_v1607(uint8_t port_id,
 
 	return 0;
 }
-VERSION_SYMBOL(rte_eth_bond_8023ad_setup, _v1607, 16.07);
-
-
-int
-rte_eth_bond_8023ad_setup_v1708(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf)
-{
-	struct rte_eth_dev *bond_dev;
-	int err;
-
-	err = bond_8023ad_setup_validate(port_id, conf);
-	if (err != 0)
-		return err;
-
-	bond_dev = &rte_eth_devices[port_id];
-	bond_mode_8023ad_setup_v1708(bond_dev, conf);
-
-	return 0;
-}
-BIND_DEFAULT_SYMBOL(rte_eth_bond_8023ad_setup, _v1708, 17.08);
-MAP_STATIC_SYMBOL(int rte_eth_bond_8023ad_setup(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf),
-		rte_eth_bond_8023ad_setup_v1708);
-
 
 
 
 
 
 int
-rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
+rte_eth_bond_8023ad_slave_info(uint16_t port_id, uint16_t slave_id,
 		struct rte_eth_bond_8023ad_slave_info *info)
 {
 	struct rte_eth_dev *bond_dev;
@@ -1579,7 +1429,7 @@ rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
 }
 
 static int
-bond_8023ad_ext_validate(uint8_t port_id, uint8_t slave_id)
+bond_8023ad_ext_validate(uint16_t port_id, uint16_t slave_id)
 {
 	struct rte_eth_dev *bond_dev;
 	struct bond_dev_private *internals;
@@ -1607,7 +1457,8 @@ bond_8023ad_ext_validate(uint8_t port_id, uint8_t slave_id)
 }
 
 int
-rte_eth_bond_8023ad_ext_collect(uint8_t port_id, uint8_t slave_id, int enabled)
+rte_eth_bond_8023ad_ext_collect(uint16_t port_id, uint16_t slave_id,
+				int enabled)
 {
 	struct port *port;
 	int res;
@@ -1627,7 +1478,8 @@ rte_eth_bond_8023ad_ext_collect(uint8_t port_id, uint8_t slave_id, int enabled)
 }
 
 int
-rte_eth_bond_8023ad_ext_distrib(uint8_t port_id, uint8_t slave_id, int enabled)
+rte_eth_bond_8023ad_ext_distrib(uint16_t port_id, uint16_t slave_id,
+				int enabled)
 {
 	struct port *port;
 	int res;
@@ -1647,7 +1499,7 @@ rte_eth_bond_8023ad_ext_distrib(uint8_t port_id, uint8_t slave_id, int enabled)
 }
 
 int
-rte_eth_bond_8023ad_ext_distrib_get(uint8_t port_id, uint8_t slave_id)
+rte_eth_bond_8023ad_ext_distrib_get(uint16_t port_id, uint16_t slave_id)
 {
 	struct port *port;
 	int err;
@@ -1661,7 +1513,7 @@ rte_eth_bond_8023ad_ext_distrib_get(uint8_t port_id, uint8_t slave_id)
 }
 
 int
-rte_eth_bond_8023ad_ext_collect_get(uint8_t port_id, uint8_t slave_id)
+rte_eth_bond_8023ad_ext_collect_get(uint16_t port_id, uint16_t slave_id)
 {
 	struct port *port;
 	int err;
@@ -1675,7 +1527,7 @@ rte_eth_bond_8023ad_ext_collect_get(uint8_t port_id, uint8_t slave_id)
 }
 
 int
-rte_eth_bond_8023ad_ext_slowtx(uint8_t port_id, uint8_t slave_id,
+rte_eth_bond_8023ad_ext_slowtx(uint16_t port_id, uint16_t slave_id,
 		struct rte_mbuf *lacp_pkt)
 {
 	struct port *port;
@@ -1736,7 +1588,7 @@ bond_mode_8023ad_ext_periodic_cb(void *arg)
 }
 
 int
-rte_eth_bond_8023ad_dedicated_queues_enable(uint8_t port)
+rte_eth_bond_8023ad_dedicated_queues_enable(uint16_t port)
 {
 	int retval = 0;
 	struct rte_eth_dev *dev = &rte_eth_devices[port];
@@ -1760,7 +1612,7 @@ rte_eth_bond_8023ad_dedicated_queues_enable(uint8_t port)
 }
 
 int
-rte_eth_bond_8023ad_dedicated_queues_disable(uint8_t port)
+rte_eth_bond_8023ad_dedicated_queues_disable(uint16_t port)
 {
 	int retval = 0;
 	struct rte_eth_dev *dev = &rte_eth_devices[port];
diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.h b/drivers/net/bonding/rte_eth_bond_8023ad.h
index 1d353c73..2874336d 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.h
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.h
@@ -64,7 +64,7 @@ extern "C" {
 #define MARKER_TLV_TYPE_INFO                0x01
 #define MARKER_TLV_TYPE_RESP                0x02
 
-typedef void (*rte_eth_bond_8023ad_ext_slowrx_fn)(uint8_t slave_id,
+typedef void (*rte_eth_bond_8023ad_ext_slowrx_fn)(uint16_t slave_id,
 						  struct rte_mbuf *lacp_pkt);
 
 enum rte_bond_8023ad_selection {
@@ -176,7 +176,7 @@ struct rte_eth_bond_8023ad_slave_info {
 	struct port_params actor;
 	uint8_t partner_state;
 	struct port_params partner;
-	uint8_t agg_port_id;
+	uint16_t agg_port_id;
 };
 
 /**
@@ -192,16 +192,7 @@ struct rte_eth_bond_8023ad_slave_info {
  *   -EINVAL if conf is NULL
  */
 int
-rte_eth_bond_8023ad_conf_get(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf);
-int
-rte_eth_bond_8023ad_conf_get_v20(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf);
-int
-rte_eth_bond_8023ad_conf_get_v1607(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf);
-int
-rte_eth_bond_8023ad_conf_get_v1708(uint8_t port_id,
+rte_eth_bond_8023ad_conf_get(uint16_t port_id,
 		struct rte_eth_bond_8023ad_conf *conf);
 
 /**
@@ -216,16 +207,7 @@ rte_eth_bond_8023ad_conf_get_v1708(uint8_t port_id,
  *   -EINVAL if configuration is invalid.
  */
 int
-rte_eth_bond_8023ad_setup(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf);
-int
-rte_eth_bond_8023ad_setup_v20(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf);
-int
-rte_eth_bond_8023ad_setup_v1607(uint8_t port_id,
-		struct rte_eth_bond_8023ad_conf *conf);
-int
-rte_eth_bond_8023ad_setup_v1708(uint8_t port_id,
+rte_eth_bond_8023ad_setup(uint16_t port_id,
 		struct rte_eth_bond_8023ad_conf *conf);
 
 /**
@@ -241,7 +223,7 @@ rte_eth_bond_8023ad_setup_v1708(uint8_t port_id,
  *       bonded device or is not inactive).
  */
 int
-rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
+rte_eth_bond_8023ad_slave_info(uint16_t port_id, uint16_t slave_id,
 		struct rte_eth_bond_8023ad_slave_info *conf);
 
 #ifdef __cplusplus
@@ -259,7 +241,8 @@ rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id,
  *   -EINVAL if slave is not valid.
  */
 int
-rte_eth_bond_8023ad_ext_collect(uint8_t port_id, uint8_t slave_id, int enabled);
+rte_eth_bond_8023ad_ext_collect(uint16_t port_id, uint16_t slave_id,
+				int enabled);
 
 /**
  * Get COLLECTING flag from slave port actor state.
@@ -272,7 +255,7 @@ rte_eth_bond_8023ad_ext_collect(uint8_t port_id, uint8_t slave_id, int enabled);
  *   -EINVAL if slave is not valid.
  */
 int
-rte_eth_bond_8023ad_ext_collect_get(uint8_t port_id, uint8_t slave_id);
+rte_eth_bond_8023ad_ext_collect_get(uint16_t port_id, uint16_t slave_id);
 
 /**
  * Configure a slave port to start distributing.
@@ -285,7 +268,8 @@ rte_eth_bond_8023ad_ext_collect_get(uint8_t port_id, uint8_t slave_id);
  *   -EINVAL if slave is not valid.
  */
 int
-rte_eth_bond_8023ad_ext_distrib(uint8_t port_id, uint8_t slave_id, int enabled);
+rte_eth_bond_8023ad_ext_distrib(uint16_t port_id, uint16_t slave_id,
+				int enabled);
 
 /**
  * Get DISTRIBUTING flag from slave port actor state.
@@ -298,7 +282,7 @@ rte_eth_bond_8023ad_ext_distrib(uint8_t port_id, uint8_t slave_id, int enabled);
  *   -EINVAL if slave is not valid.
  */
 int
-rte_eth_bond_8023ad_ext_distrib_get(uint8_t port_id, uint8_t slave_id);
+rte_eth_bond_8023ad_ext_distrib_get(uint16_t port_id, uint16_t slave_id);
 
 /**
  * LACPDU transmit path for external 802.3ad state machine.  Caller retains
@@ -312,7 +296,7 @@ rte_eth_bond_8023ad_ext_distrib_get(uint8_t port_id, uint8_t slave_id);
  *   0 on success, negative value otherwise.
  */
 int
-rte_eth_bond_8023ad_ext_slowtx(uint8_t port_id, uint8_t slave_id,
+rte_eth_bond_8023ad_ext_slowtx(uint16_t port_id, uint16_t slave_id,
 		struct rte_mbuf *lacp_pkt);
 
 /**
@@ -338,7 +322,7 @@ rte_eth_bond_8023ad_ext_slowtx(uint8_t port_id, uint8_t slave_id,
  *   0 on success, negative value otherwise.
  */
 int
-rte_eth_bond_8023ad_dedicated_queues_enable(uint8_t port_id);
+rte_eth_bond_8023ad_dedicated_queues_enable(uint16_t port_id);
 
 /**
  * Disable slow queue on slaves
@@ -355,7 +339,7 @@ rte_eth_bond_8023ad_dedicated_queues_enable(uint8_t port_id);
  *
  */
 int
-rte_eth_bond_8023ad_dedicated_queues_disable(uint8_t port_id);
+rte_eth_bond_8023ad_dedicated_queues_disable(uint16_t port_id);
 
 /*
  * Get aggregator mode for 8023ad
@@ -365,7 +349,7 @@ rte_eth_bond_8023ad_dedicated_queues_disable(uint8_t port_id);
  *   agregator mode on success, negative value otherwise
  */
 int
-rte_eth_bond_8023ad_agg_selection_get(uint8_t port_id);
+rte_eth_bond_8023ad_agg_selection_get(uint16_t port_id);
 
 /**
  * Set aggregator mode for 8023ad
@@ -374,6 +358,6 @@ rte_eth_bond_8023ad_agg_selection_get(uint8_t port_id);
  *   0 on success, negative value otherwise
  */
 int
-rte_eth_bond_8023ad_agg_selection_set(uint8_t port_id,
+rte_eth_bond_8023ad_agg_selection_set(uint16_t port_id,
 		enum rte_bond_8023ad_agg_selection agg_selection);
 #endif /* RTE_ETH_BOND_8023AD_H_ */
diff --git a/drivers/net/bonding/rte_eth_bond_8023ad_private.h b/drivers/net/bonding/rte_eth_bond_8023ad_private.h
index d46e44a8..433c7000 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad_private.h
+++ b/drivers/net/bonding/rte_eth_bond_8023ad_private.h
@@ -279,7 +279,7 @@ bond_mode_8023ad_stop(struct rte_eth_dev *dev);
  */
 void
 bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
-	uint8_t slave_id, struct rte_mbuf *pkt);
+				 uint16_t slave_id, struct rte_mbuf *pkt);
 
 /**
  * @internal
@@ -293,7 +293,7 @@ bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
  *  0 on success, negative value otherwise.
  */
 void
-bond_mode_8023ad_activate_slave(struct rte_eth_dev *dev, uint8_t port_id);
+bond_mode_8023ad_activate_slave(struct rte_eth_dev *dev, uint16_t port_id);
 
 /**
  * @internal
@@ -307,7 +307,7 @@ bond_mode_8023ad_activate_slave(struct rte_eth_dev *dev, uint8_t port_id);
  *  0 on success, negative value otherwise.
  */
 int
-bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos);
+bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint16_t slave_pos);
 
 /**
  * Updates state when MAC was changed on bonded device or one of its slaves.
@@ -318,12 +318,12 @@ bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev);
 
 int
 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
-		uint8_t slave_port);
+		uint16_t slave_port);
 
 int
-bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint8_t slave_port);
+bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port);
 
 int
-bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id);
+bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id);
 
 #endif /* RTE_ETH_BOND_8023AD_H_ */
diff --git a/drivers/net/bonding/rte_eth_bond_alb.c b/drivers/net/bonding/rte_eth_bond_alb.c
index d9d37495..f7efbb78 100644
--- a/drivers/net/bonding/rte_eth_bond_alb.c
+++ b/drivers/net/bonding/rte_eth_bond_alb.c
@@ -148,7 +148,7 @@ void bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset,
 	rte_spinlock_unlock(&internals->mode6.lock);
 }
 
-uint8_t
+uint16_t
 bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
 		struct bond_dev_private *internals)
 {
@@ -220,13 +220,13 @@ bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
 	return internals->current_primary_port;
 }
 
-uint8_t
+uint16_t
 bond_mode_alb_arp_upd(struct client_data *client_info,
 		struct rte_mbuf *pkt, struct bond_dev_private *internals)
 {
 	struct ether_hdr *eth_h;
 	struct arp_hdr *arp_h;
-	uint8_t slave_idx;
+	uint16_t slave_idx;
 
 	rte_spinlock_lock(&internals->mode6.lock);
 	eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
diff --git a/drivers/net/bonding/rte_eth_bond_alb.h b/drivers/net/bonding/rte_eth_bond_alb.h
index fd7c3aeb..9f17f7c8 100644
--- a/drivers/net/bonding/rte_eth_bond_alb.h
+++ b/drivers/net/bonding/rte_eth_bond_alb.h
@@ -51,7 +51,7 @@ struct client_data {
 	uint32_t cli_ip;
 	/**< Client IP address */
 
-	uint8_t slave_idx;
+	uint16_t slave_idx;
 	/**< Index of slave on which we connect with that client */
 	uint8_t in_use;
 	/**< Flag indicating if entry in client table is currently used */
@@ -113,7 +113,7 @@ bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset,
  * @return
  * Index of slave on which packet should be sent.
  */
-uint8_t
+uint16_t
 bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
 		struct bond_dev_private *internals);
 
@@ -127,7 +127,7 @@ bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
  * @return
  * Index of slawe on which packet should be sent.
  */
-uint8_t
+uint16_t
 bond_mode_alb_arp_upd(struct client_data *client_info,
 		struct rte_mbuf *pkt, struct bond_dev_private *internals);
 
diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c
index de1d9e0d..980e6368 100644
--- a/drivers/net/bonding/rte_eth_bond_api.c
+++ b/drivers/net/bonding/rte_eth_bond_api.c
@@ -37,7 +37,7 @@
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_tcp.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 #include <rte_kvargs.h>
 
 #include "rte_eth_bond.h"
@@ -56,14 +56,14 @@ check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev)
 }
 
 int
-valid_bonded_port_id(uint8_t port_id)
+valid_bonded_port_id(uint16_t port_id)
 {
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1);
 	return check_for_bonded_ethdev(&rte_eth_devices[port_id]);
 }
 
 int
-valid_slave_port_id(uint8_t port_id, uint8_t mode)
+valid_slave_port_id(uint16_t port_id, uint8_t mode)
 {
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1);
 
@@ -80,7 +80,7 @@ valid_slave_port_id(uint8_t port_id, uint8_t mode)
 }
 
 void
-activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
+activate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id)
 {
 	struct bond_dev_private *internals = eth_dev->data->dev_private;
 	uint8_t active_count = internals->active_slave_count;
@@ -107,11 +107,11 @@ activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
 }
 
 void
-deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id)
+deactivate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id)
 {
-	uint8_t slave_pos;
+	uint16_t slave_pos;
 	struct bond_dev_private *internals = eth_dev->data->dev_private;
-	uint8_t active_count = internals->active_slave_count;
+	uint16_t active_count = internals->active_slave_count;
 
 	if (internals->mode == BONDING_MODE_8023AD) {
 		bond_mode_8023ad_stop(eth_dev);
@@ -153,7 +153,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
 {
 	struct bond_dev_private *internals;
 	char devargs[52];
-	uint8_t port_id;
+	uint16_t port_id;
 	int ret;
 
 	if (name == NULL) {
@@ -193,7 +193,7 @@ rte_eth_bond_free(const char *name)
 }
 
 static int
-slave_vlan_filter_set(uint8_t bonded_port_id, uint8_t slave_port_id)
+slave_vlan_filter_set(uint16_t bonded_port_id, uint16_t slave_port_id)
 {
 	struct rte_eth_dev *bonded_eth_dev;
 	struct bond_dev_private *internals;
@@ -233,7 +233,7 @@ slave_vlan_filter_set(uint8_t bonded_port_id, uint8_t slave_port_id)
 }
 
 static int
-__eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
+__eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
 {
 	struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
 	struct bond_dev_private *internals;
@@ -302,8 +302,13 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 		internals->tx_offload_capa &= dev_info.tx_offload_capa;
 		internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads;
 
-		link_properties_valid(bonded_eth_dev,
-				&slave_eth_dev->data->dev_link);
+		if (link_properties_valid(bonded_eth_dev,
+				&slave_eth_dev->data->dev_link) != 0) {
+			RTE_BOND_LOG(ERR, "Invalid link properties for slave %d"
+					" in bonding mode %d", slave_port_id,
+					internals->mode);
+			return -1;
+		}
 
 		/* RETA size is GCD of all slaves RETA sizes, so, if all sizes will be
 		 * the power of 2, the lower one is GCD
@@ -363,7 +368,7 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 }
 
 int
-rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
+rte_eth_bond_slave_add(uint16_t bonded_port_id, uint16_t slave_port_id)
 {
 	struct rte_eth_dev *bonded_eth_dev;
 	struct bond_dev_private *internals;
@@ -387,7 +392,8 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id)
 }
 
 static int
-__eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
+__eth_bond_slave_remove_lock_free(uint16_t bonded_port_id,
+				   uint16_t slave_port_id)
 {
 	struct rte_eth_dev *bonded_eth_dev;
 	struct bond_dev_private *internals;
@@ -466,7 +472,7 @@ __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 }
 
 int
-rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id)
+rte_eth_bond_slave_remove(uint16_t bonded_port_id, uint16_t slave_port_id)
 {
 	struct rte_eth_dev *bonded_eth_dev;
 	struct bond_dev_private *internals;
@@ -488,7 +494,7 @@ rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id)
 }
 
 int
-rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode)
+rte_eth_bond_mode_set(uint16_t bonded_port_id, uint8_t mode)
 {
 	if (valid_bonded_port_id(bonded_port_id) != 0)
 		return -1;
@@ -497,7 +503,7 @@ rte_eth_bond_mode_set(uint8_t bonded_port_id, uint8_t mode)
 }
 
 int
-rte_eth_bond_mode_get(uint8_t bonded_port_id)
+rte_eth_bond_mode_get(uint16_t bonded_port_id)
 {
 	struct bond_dev_private *internals;
 
@@ -510,7 +516,7 @@ rte_eth_bond_mode_get(uint8_t bonded_port_id)
 }
 
 int
-rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id)
+rte_eth_bond_primary_set(uint16_t bonded_port_id, uint16_t slave_port_id)
 {
 	struct bond_dev_private *internals;
 
@@ -531,7 +537,7 @@ rte_eth_bond_primary_set(uint8_t bonded_port_id, uint8_t slave_port_id)
 }
 
 int
-rte_eth_bond_primary_get(uint8_t bonded_port_id)
+rte_eth_bond_primary_get(uint16_t bonded_port_id)
 {
 	struct bond_dev_private *internals;
 
@@ -547,7 +553,8 @@ rte_eth_bond_primary_get(uint8_t bonded_port_id)
 }
 
 int
-rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
+rte_eth_bond_slaves_get(uint16_t bonded_port_id, uint16_t slaves[],
+			uint16_t len)
 {
 	struct bond_dev_private *internals;
 	uint8_t i;
@@ -570,8 +577,8 @@ rte_eth_bond_slaves_get(uint8_t bonded_port_id, uint8_t slaves[], uint8_t len)
 }
 
 int
-rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[],
-		uint8_t len)
+rte_eth_bond_active_slaves_get(uint16_t bonded_port_id, uint16_t slaves[],
+		uint16_t len)
 {
 	struct bond_dev_private *internals;
 
@@ -586,13 +593,14 @@ rte_eth_bond_active_slaves_get(uint8_t bonded_port_id, uint8_t slaves[],
 	if (internals->active_slave_count > len)
 		return -1;
 
-	memcpy(slaves, internals->active_slaves, internals->active_slave_count);
+	memcpy(slaves, internals->active_slaves,
+	internals->active_slave_count * sizeof(internals->active_slaves[0]));
 
 	return internals->active_slave_count;
 }
 
 int
-rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
+rte_eth_bond_mac_address_set(uint16_t bonded_port_id,
 		struct ether_addr *mac_addr)
 {
 	struct rte_eth_dev *bonded_eth_dev;
@@ -618,7 +626,7 @@ rte_eth_bond_mac_address_set(uint8_t bonded_port_id,
 }
 
 int
-rte_eth_bond_mac_address_reset(uint8_t bonded_port_id)
+rte_eth_bond_mac_address_reset(uint16_t bonded_port_id)
 {
 	struct rte_eth_dev *bonded_eth_dev;
 	struct bond_dev_private *internals;
@@ -647,7 +655,7 @@ rte_eth_bond_mac_address_reset(uint8_t bonded_port_id)
 }
 
 int
-rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy)
+rte_eth_bond_xmit_policy_set(uint16_t bonded_port_id, uint8_t policy)
 {
 	struct bond_dev_private *internals;
 
@@ -677,7 +685,7 @@ rte_eth_bond_xmit_policy_set(uint8_t bonded_port_id, uint8_t policy)
 }
 
 int
-rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
+rte_eth_bond_xmit_policy_get(uint16_t bonded_port_id)
 {
 	struct bond_dev_private *internals;
 
@@ -690,7 +698,7 @@ rte_eth_bond_xmit_policy_get(uint8_t bonded_port_id)
 }
 
 int
-rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
+rte_eth_bond_link_monitoring_set(uint16_t bonded_port_id, uint32_t internal_ms)
 {
 	struct bond_dev_private *internals;
 
@@ -704,7 +712,7 @@ rte_eth_bond_link_monitoring_set(uint8_t bonded_port_id, uint32_t internal_ms)
 }
 
 int
-rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id)
+rte_eth_bond_link_monitoring_get(uint16_t bonded_port_id)
 {
 	struct bond_dev_private *internals;
 
@@ -717,7 +725,8 @@ rte_eth_bond_link_monitoring_get(uint8_t bonded_port_id)
 }
 
 int
-rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
+rte_eth_bond_link_down_prop_delay_set(uint16_t bonded_port_id,
+				       uint32_t delay_ms)
 
 {
 	struct bond_dev_private *internals;
@@ -732,7 +741,7 @@ rte_eth_bond_link_down_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
 }
 
 int
-rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
+rte_eth_bond_link_down_prop_delay_get(uint16_t bonded_port_id)
 {
 	struct bond_dev_private *internals;
 
@@ -745,7 +754,7 @@ rte_eth_bond_link_down_prop_delay_get(uint8_t bonded_port_id)
 }
 
 int
-rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
+rte_eth_bond_link_up_prop_delay_set(uint16_t bonded_port_id, uint32_t delay_ms)
 
 {
 	struct bond_dev_private *internals;
@@ -760,7 +769,7 @@ rte_eth_bond_link_up_prop_delay_set(uint8_t bonded_port_id, uint32_t delay_ms)
 }
 
 int
-rte_eth_bond_link_up_prop_delay_get(uint8_t bonded_port_id)
+rte_eth_bond_link_up_prop_delay_get(uint16_t bonded_port_id)
 {
 	struct bond_dev_private *internals;
 
diff --git a/drivers/net/bonding/rte_eth_bond_args.c b/drivers/net/bonding/rte_eth_bond_args.c
index bb634c62..e816da31 100644
--- a/drivers/net/bonding/rte_eth_bond_args.c
+++ b/drivers/net/bonding/rte_eth_bond_args.c
@@ -33,6 +33,7 @@
 
 #include <rte_devargs.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_kvargs.h>
 
 #include <cmdline_parse.h>
@@ -61,16 +62,6 @@ find_port_id_by_pci_addr(const struct rte_pci_addr *pci_addr)
 	unsigned i;
 
 	for (i = 0; i < rte_eth_dev_count(); i++) {
-
-		/* Currently populated by rte_eth_copy_pci_info().
-		 *
-		 * TODO: Once the PCI bus has arrived we should have a better
-		 * way to test for being a PCI device or not.
-		 */
-		if (rte_eth_devices[i].data->kdrv == RTE_KDRV_UNKNOWN ||
-		    rte_eth_devices[i].data->kdrv == RTE_KDRV_NONE)
-			continue;
-
 		pci_dev = RTE_ETH_DEV_TO_PCI(&rte_eth_devices[i]);
 		eth_pci_addr = &pci_dev->addr;
 
@@ -98,6 +89,16 @@ find_port_id_by_dev_name(const char *name)
 	return -1;
 }
 
+static inline int
+bond_pci_addr_cmp(const struct rte_device *dev, const void *_pci_addr)
+{
+	struct rte_pci_device *pdev;
+	const struct rte_pci_addr *paddr = _pci_addr;
+
+	pdev = RTE_DEV_TO_PCI(*(struct rte_device **)(void *)&dev);
+	return rte_eal_compare_pci_addr(&pdev->addr, paddr);
+}
+
 /**
  * Parses a port identifier string to a port id by pci address, then by name,
  * and finally port id.
@@ -106,10 +107,23 @@ static inline int
 parse_port_id(const char *port_str)
 {
 	struct rte_pci_addr dev_addr;
+	struct rte_bus *pci_bus;
+	struct rte_device *dev;
 	int port_id;
 
+	pci_bus = rte_bus_find_by_name("pci");
+	if (pci_bus == NULL) {
+		RTE_LOG(ERR, PMD, "unable to find PCI bus\n");
+		return -1;
+	}
+
 	/* try parsing as pci address, physical devices */
-	if (eal_parse_pci_DomBDF(port_str, &dev_addr) == 0) {
+	if (pci_bus->parse(port_str, &dev_addr) == 0) {
+		dev = pci_bus->find_device(NULL, bond_pci_addr_cmp, &dev_addr);
+		if (dev == NULL) {
+			RTE_LOG(ERR, PMD, "unable to find PCI device\n");
+			return -1;
+		}
 		port_id = find_port_id_by_pci_addr(&dev_addr);
 		if (port_id < 0)
 			return -1;
@@ -153,7 +167,7 @@ bond_ethdev_parse_slave_port_kvarg(const char *key,
 			return -1;
 		} else
 			slave_ports->slaves[slave_ports->slave_count++] =
-					(uint8_t)port_id;
+					port_id;
 	}
 	return 0;
 }
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 3ee70baa..fe232895 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -43,7 +43,7 @@
 #include <rte_ip_frag.h>
 #include <rte_devargs.h>
 #include <rte_kvargs.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 #include <rte_alarm.h>
 #include <rte_cycles.h>
 
@@ -125,11 +125,12 @@ bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
 }
 
 static inline uint8_t
-is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
+is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
 {
 	const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
 
-	return !vlan_tci && (ethertype == ether_type_slow_be &&
+	return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
+		(ethertype == ether_type_slow_be &&
 		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
 }
 
@@ -174,13 +175,14 @@ const struct rte_flow_attr flow_attr_8023ad = {
 
 int
 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
-		uint8_t slave_port) {
+		uint16_t slave_port) {
+	struct rte_eth_dev_info slave_info;
 	struct rte_flow_error error;
 	struct bond_dev_private *internals = (struct bond_dev_private *)
 			(bond_dev->data->dev_private);
 
-	struct rte_flow_action_queue lacp_queue_conf = {
-		.index = internals->mode4.dedicated_queues.rx_qid,
+	const struct rte_flow_action_queue lacp_queue_conf = {
+		.index = 0,
 	};
 
 	const struct rte_flow_action actions[] = {
@@ -195,19 +197,32 @@ bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
 
 	int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
 			flow_item_8023ad, actions, &error);
-	if (ret < 0)
+	if (ret < 0) {
+		RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
+				__func__, error.message, slave_port,
+				internals->mode4.dedicated_queues.rx_qid);
 		return -1;
+	}
+
+	rte_eth_dev_info_get(slave_port, &slave_info);
+	if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
+			slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
+		RTE_BOND_LOG(ERR,
+			"%s: Slave %d capabilities doesn't allow to allocate additional queues",
+			__func__, slave_port);
+		return -1;
+	}
 
 	return 0;
 }
 
 int
-bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) {
+bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
 	struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
 	struct bond_dev_private *internals = (struct bond_dev_private *)
 			(bond_dev->data->dev_private);
-	struct rte_eth_dev_info bond_info, slave_info;
-	uint8_t idx;
+	struct rte_eth_dev_info bond_info;
+	uint16_t idx;
 
 	/* Verify if all slaves in bonding supports flow director and */
 	if (internals->slave_count > 0) {
@@ -217,9 +232,6 @@ bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) {
 		internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
 
 		for (idx = 0; idx < internals->slave_count; idx++) {
-			rte_eth_dev_info_get(internals->slaves[idx].port_id,
-					&slave_info);
-
 			if (bond_ethdev_8023ad_flow_verify(bond_dev,
 					internals->slaves[idx].port_id) != 0)
 				return -1;
@@ -230,7 +242,7 @@ bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) {
 }
 
 int
-bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint8_t slave_port) {
+bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
 
 	struct rte_flow_error error;
 	struct bond_dev_private *internals = (struct bond_dev_private *)
@@ -270,10 +282,10 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
 	struct bond_dev_private *internals = bd_rx_q->dev_private;
 	uint16_t num_rx_total = 0;	/* Total number of received packets */
-	uint8_t slaves[RTE_MAX_ETHPORTS];
-	uint8_t slave_count;
+	uint16_t slaves[RTE_MAX_ETHPORTS];
+	uint16_t slave_count;
 
-	uint8_t i, idx;
+	uint16_t i, idx;
 
 	/* Copy slave list to protect against slave up/down changes during tx
 	 * bursting */
@@ -302,8 +314,8 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 	struct bond_dev_private *internals;
 	struct bond_tx_queue *bd_tx_q;
 
-	uint8_t num_of_slaves;
-	uint8_t slaves[RTE_MAX_ETHPORTS];
+	uint16_t num_of_slaves;
+	uint16_t slaves[RTE_MAX_ETHPORTS];
 	 /* positions in slaves, not ID */
 	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
 	uint8_t distributing_count;
@@ -394,8 +406,8 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 
 	const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
 	uint16_t num_rx_total = 0;	/* Total number of received packets */
-	uint8_t slaves[RTE_MAX_ETHPORTS];
-	uint8_t slave_count, idx;
+	uint16_t slaves[RTE_MAX_ETHPORTS];
+	uint16_t slave_count, idx;
 
 	uint8_t collecting;  /* current slave collecting status */
 	const uint8_t promisc = internals->promiscuous_en;
@@ -444,7 +456,7 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 			/* Remove packet from array if it is slow packet or slave is not
 			 * in collecting state or bonding interface is not in promiscuous
 			 * mode and packet address does not match. */
-			if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
+			if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
 				!collecting || (!promisc &&
 					!is_multicast_ether_addr(&hdr->d_addr) &&
 					!is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
@@ -526,7 +538,7 @@ ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
 #define MAX_CLIENTS_NUMBER	128
 uint8_t active_clients;
 struct client_stats_t {
-	uint8_t port;
+	uint16_t port;
 	uint32_t ipv4_addr;
 	uint32_t ipv4_rx_packets;
 	uint32_t ipv4_tx_packets;
@@ -534,7 +546,7 @@ struct client_stats_t {
 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
 
 static void
-update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
+update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
 {
 	int i = 0;
 
@@ -592,7 +604,7 @@ update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
 
 static void
 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
-		uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
+		uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
 {
 	struct ipv4_hdr *ipv4_h;
 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
@@ -673,8 +685,8 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
 	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
 
-	uint8_t num_of_slaves;
-	uint8_t slaves[RTE_MAX_ETHPORTS];
+	uint16_t num_of_slaves;
+	uint16_t slaves[RTE_MAX_ETHPORTS];
 
 	uint16_t num_tx_total = 0, num_tx_slave;
 
@@ -904,7 +916,7 @@ bandwidth_cmp(const void *a, const void *b)
 }
 
 static void
-bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
+bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
 		struct bwg_slave *bwg_slave)
 {
 	struct rte_eth_link link_status;
@@ -970,10 +982,10 @@ bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_eth_dev *primary_port =
 			&rte_eth_devices[internals->primary_port];
 	uint16_t num_tx_total = 0;
-	uint8_t i, j;
+	uint16_t i, j;
 
-	uint8_t num_of_slaves = internals->active_slave_count;
-	uint8_t slaves[RTE_MAX_ETHPORTS];
+	uint16_t num_of_slaves = internals->active_slave_count;
+	uint16_t slaves[RTE_MAX_ETHPORTS];
 
 	struct ether_hdr *ether_hdr;
 	struct ether_addr primary_slave_addr;
@@ -1059,7 +1071,7 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	uint16_t num_send, num_not_send = 0;
 	uint16_t num_tx_total = 0;
-	uint8_t slave_idx;
+	uint16_t slave_idx;
 
 	int i, j;
 
@@ -1178,8 +1190,8 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
 	struct bond_dev_private *internals;
 	struct bond_tx_queue *bd_tx_q;
 
-	uint8_t num_of_slaves;
-	uint8_t slaves[RTE_MAX_ETHPORTS];
+	uint16_t num_of_slaves;
+	uint16_t slaves[RTE_MAX_ETHPORTS];
 
 	uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
 
@@ -1239,8 +1251,8 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 	struct bond_dev_private *internals;
 	struct bond_tx_queue *bd_tx_q;
 
-	uint8_t num_of_slaves;
-	uint8_t slaves[RTE_MAX_ETHPORTS];
+	uint16_t num_of_slaves;
+	uint16_t slaves[RTE_MAX_ETHPORTS];
 	 /* positions in slaves, not ID */
 	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
 	uint8_t distributing_count;
@@ -1333,7 +1345,7 @@ bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
 	struct bond_tx_queue *bd_tx_q;
 
 	uint8_t tx_failed_flag = 0, num_of_slaves;
-	uint8_t slaves[RTE_MAX_ETHPORTS];
+	uint16_t slaves[RTE_MAX_ETHPORTS];
 
 	uint16_t max_nb_of_tx_pkts = 0;
 
@@ -1861,7 +1873,7 @@ slave_add(struct bond_dev_private *internals,
 
 void
 bond_ethdev_primary_set(struct bond_dev_private *internals,
-		uint8_t slave_port_id)
+		uint16_t slave_port_id)
 {
 	int i;
 
@@ -2047,7 +2059,7 @@ bond_ethdev_close(struct rte_eth_dev *dev)
 
 	RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
 	while (internals->slave_count != skipped) {
-		uint8_t port_id = internals->slaves[skipped].port_id;
+		uint16_t port_id = internals->slaves[skipped].port_id;
 
 		rte_eth_dev_stop(port_id);
 
@@ -2125,7 +2137,7 @@ static int
 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 {
 	int res;
-	uint8_t i;
+	uint16_t i;
 	struct bond_dev_private *internals = dev->data->dev_private;
 
 	/* don't do this while a slave is being added */
@@ -2137,7 +2149,7 @@ bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 		rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
 
 	for (i = 0; i < internals->slave_count; i++) {
-		uint8_t port_id = internals->slaves[i].port_id;
+		uint16_t port_id = internals->slaves[i].port_id;
 
 		res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
 		if (res == ENOTSUP)
@@ -2277,7 +2289,7 @@ bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
 static int
 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
 {
-	void (*link_update)(uint8_t port_id, struct rte_eth_link *eth_link);
+	void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
 
 	struct bond_dev_private *bond_ctx;
 	struct rte_eth_link slave_link;
@@ -2359,7 +2371,7 @@ bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
 }
 
 
-static void
+static int
 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct bond_dev_private *internals = dev->data->dev_private;
@@ -2387,6 +2399,8 @@ bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		}
 
 	}
+
+	return 0;
 }
 
 static void
@@ -2466,7 +2480,7 @@ bond_ethdev_delayed_lsc_propagation(void *arg)
 }
 
 int
-bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
+bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
 		void *param, void *ret_param __rte_unused)
 {
 	struct rte_eth_dev *bonded_eth_dev;
@@ -2747,8 +2761,7 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
 	}
 
 	eth_dev->dev_ops = &default_dev_ops;
-	eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
-		RTE_ETH_DEV_DETACHABLE;
+	eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
 
 	rte_spinlock_init(&internals->lock);
 
@@ -2827,6 +2840,7 @@ bond_probe(struct rte_vdev_device *dev)
 	struct rte_kvargs *kvlist;
 	uint8_t bonding_mode, socket_id/*, agg_mode*/;
 	int  arg_count, port_id;
+	uint8_t agg_mode;
 
 	if (!dev)
 		return -EINVAL;
@@ -2884,6 +2898,25 @@ bond_probe(struct rte_vdev_device *dev)
 	internals = rte_eth_devices[port_id].data->dev_private;
 	internals->kvlist = kvlist;
 
+
+	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
+		if (rte_kvargs_process(kvlist,
+				PMD_BOND_AGG_MODE_KVARG,
+				&bond_ethdev_parse_slave_agg_mode_kvarg,
+				&agg_mode) != 0) {
+			RTE_LOG(ERR, EAL,
+					"Failed to parse agg selection mode for bonded device %s\n",
+					name);
+			goto parse_error;
+		}
+
+		if (internals->mode == BONDING_MODE_8023AD)
+			rte_eth_bond_8023ad_agg_selection_set(port_id,
+					agg_mode);
+	} else {
+		rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
+	}
+
 	RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
 			"socket %u.\n",	name, port_id, bonding_mode, socket_id);
 	return 0;
@@ -2951,7 +2984,7 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
 	struct bond_dev_private *internals = dev->data->dev_private;
 	struct rte_kvargs *kvlist = internals->kvlist;
 	int arg_count;
-	uint8_t port_id = dev - rte_eth_devices;
+	uint16_t port_id = dev - rte_eth_devices;
 	uint8_t agg_mode;
 
 	static const uint8_t default_rss_key[40] = {
@@ -3050,7 +3083,6 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
 					name);
 		}
 		if (internals->mode == BONDING_MODE_8023AD)
-			if (agg_mode != 0)
 				rte_eth_bond_8023ad_agg_selection_set(port_id,
 						agg_mode);
 	}
@@ -3086,7 +3118,7 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
 	/* Parse/set primary slave port id*/
 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
 	if (arg_count == 1) {
-		uint8_t primary_slave_port_id;
+		uint16_t primary_slave_port_id;
 
 		if (rte_kvargs_process(kvlist,
 				PMD_BOND_PRIMARY_SLAVE_KVARG,
@@ -3099,7 +3131,7 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
 		}
 
 		/* Set balance mode transmit policy*/
-		if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
+		if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
 				!= 0) {
 			RTE_LOG(ERR, EAL,
 					"Failed to set primary slave port %d on bonded device %s\n",
diff --git a/drivers/net/bonding/rte_eth_bond_private.h b/drivers/net/bonding/rte_eth_bond_private.h
index 1fe6ff88..1392da98 100644
--- a/drivers/net/bonding/rte_eth_bond_private.h
+++ b/drivers/net/bonding/rte_eth_bond_private.h
@@ -93,12 +93,12 @@ struct bond_tx_queue {
 
 /** Bonded slave devices structure */
 struct bond_ethdev_slave_ports {
-	uint8_t slaves[RTE_MAX_ETHPORTS];	/**< Slave port id array */
-	uint8_t slave_count;				/**< Number of slaves */
+	uint16_t slaves[RTE_MAX_ETHPORTS];	/**< Slave port id array */
+	uint16_t slave_count;				/**< Number of slaves */
 };
 
 struct bond_slave_details {
-	uint8_t port_id;
+	uint16_t port_id;
 
 	uint8_t link_status_poll_enabled;
 	uint8_t link_status_wait_to_complete;
@@ -114,14 +114,14 @@ typedef uint16_t (*xmit_hash_t)(const struct rte_mbuf *buf, uint8_t slave_count)
 
 /** Link Bonding PMD device private configuration Structure */
 struct bond_dev_private {
-	uint8_t port_id;					/**< Port Id of Bonded Port */
+	uint16_t port_id;			/**< Port Id of Bonded Port */
 	uint8_t mode;						/**< Link Bonding Mode */
 
 	rte_spinlock_t lock;
 
-	uint8_t primary_port;				/**< Primary Slave Port */
-	uint8_t current_primary_port;		/**< Primary Slave Port */
-	uint8_t user_defined_primary_port;
+	uint16_t primary_port;			/**< Primary Slave Port */
+	uint16_t current_primary_port;		/**< Primary Slave Port */
+	uint16_t user_defined_primary_port;
 	/**< Flag for whether primary port is user defined or not */
 
 	uint8_t balance_xmit_policy;
@@ -144,16 +144,17 @@ struct bond_dev_private {
 	uint16_t nb_rx_queues;			/**< Total number of rx queues */
 	uint16_t nb_tx_queues;			/**< Total number of tx queues*/
 
-	uint8_t active_slave;		/**< Next active_slave to poll */
-	uint8_t active_slave_count;		/**< Number of active slaves */
-	uint8_t active_slaves[RTE_MAX_ETHPORTS];	/**< Active slave list */
+	uint16_t active_slave;		/**< Next active_slave to poll */
+	uint16_t active_slave_count;		/**< Number of active slaves */
+	uint16_t active_slaves[RTE_MAX_ETHPORTS];    /**< Active slave list */
 
-	uint8_t slave_count;			/**< Number of bonded slaves */
+	uint16_t slave_count;			/**< Number of bonded slaves */
 	struct bond_slave_details slaves[RTE_MAX_ETHPORTS];
 	/**< Arary of bonded slaves details */
 
 	struct mode8023ad_private mode4;
-	uint8_t tlb_slaves_order[RTE_MAX_ETHPORTS]; /* TLB active slaves send order */
+	uint16_t tlb_slaves_order[RTE_MAX_ETHPORTS];
+	/**< TLB active slaves send order */
 	struct mode_alb_private mode6;
 
 	uint32_t rx_offload_capa;            /** Rx offload capability */
@@ -186,10 +187,10 @@ check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev);
 
 /* Search given slave array to find position of given id.
  * Return slave pos or slaves_count if not found. */
-static inline uint8_t
-find_slave_by_id(uint8_t *slaves, uint8_t slaves_count, uint8_t slave_id) {
+static inline uint16_t
+find_slave_by_id(uint16_t *slaves, uint16_t slaves_count, uint16_t slave_id) {
 
-	uint8_t pos;
+	uint16_t pos;
 	for (pos = 0; pos < slaves_count; pos++) {
 		if (slave_id == slaves[pos])
 			break;
@@ -199,19 +200,19 @@ find_slave_by_id(uint8_t *slaves, uint8_t slaves_count, uint8_t slave_id) {
 }
 
 int
-valid_port_id(uint8_t port_id);
+valid_port_id(uint16_t port_id);
 
 int
-valid_bonded_port_id(uint8_t port_id);
+valid_bonded_port_id(uint16_t port_id);
 
 int
-valid_slave_port_id(uint8_t port_id, uint8_t mode);
+valid_slave_port_id(uint16_t port_id, uint8_t mode);
 
 void
-deactivate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id);
+deactivate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id);
 
 void
-activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id);
+activate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id);
 
 void
 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
@@ -255,10 +256,10 @@ xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count);
 
 void
 bond_ethdev_primary_set(struct bond_dev_private *internals,
-		uint8_t slave_port_id);
+		uint16_t slave_port_id);
 
 int
-bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
+bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
 		void *param, void *ret_param);
 
 int
diff --git a/drivers/net/bonding/rte_eth_bond_version.map b/drivers/net/bonding/rte_pmd_bond_version.map
index 0f4e847d..ec3374b0 100644
--- a/drivers/net/bonding/rte_eth_bond_version.map
+++ b/drivers/net/bonding/rte_pmd_bond_version.map
@@ -1,8 +1,6 @@
 DPDK_2.0 {
 	global:
 
-	rte_eth_bond_8023ad_conf_get;
-	rte_eth_bond_8023ad_setup;
 	rte_eth_bond_active_slaves_get;
 	rte_eth_bond_create;
 	rte_eth_bond_link_monitoring_set;
@@ -39,8 +37,6 @@ DPDK_16.07 {
 	rte_eth_bond_8023ad_ext_distrib;
 	rte_eth_bond_8023ad_ext_distrib_get;
 	rte_eth_bond_8023ad_ext_slowtx;
-	rte_eth_bond_8023ad_conf_get;
-	rte_eth_bond_8023ad_setup;
 
 } DPDK_16.04;
 
diff --git a/drivers/net/cxgbe/Makefile b/drivers/net/cxgbe/Makefile
index 7cef6279..65df1425 100644
--- a/drivers/net/cxgbe/Makefile
+++ b/drivers/net/cxgbe/Makefile
@@ -62,12 +62,15 @@ endif
 CFLAGS_BASE_DRIVER =
 
 endif
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 #
 # Add extra flags for base driver files (also known as shared code)
 # to disable warnings in them
 #
-BASE_DRIVER_OBJS=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))
+BASE_DRIVER_OBJS=$(sort $(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))))
 $(foreach obj, $(BASE_DRIVER_OBJS), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
 
 VPATH += $(SRCDIR)/base
diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index 5e5f221e..f2057af1 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -36,6 +36,7 @@
 #ifndef __T4_ADAPTER_H__
 #define __T4_ADAPTER_H__
 
+#include <rte_bus_pci.h>
 #include <rte_mbuf.h>
 #include <rte_io.h>
 
diff --git a/drivers/net/cxgbe/base/t4_hw.c b/drivers/net/cxgbe/base/t4_hw.c
index a8ccea00..282e2e62 100644
--- a/drivers/net/cxgbe/base/t4_hw.c
+++ b/drivers/net/cxgbe/base/t4_hw.c
@@ -40,7 +40,6 @@
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_tailq.h>
 #include <rte_eal.h>
 #include <rte_alarm.h>
@@ -403,6 +402,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox,
 			t4_os_atomic_list_del(&entry, &adap->mbox_list,
 					      &adap->mbox_lock);
 			t4_report_fw_error(adap);
+			free(temp);
 			return (pcie_fw & F_PCIE_FW_ERR) ? -ENXIO : -EBUSY;
 		}
 
@@ -446,6 +446,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox,
 							 &adap->mbox_list,
 							 &adap->mbox_lock));
 		t4_report_fw_error(adap);
+		free(temp);
 		return (v == X_MBOWNER_FW ? -EBUSY : -ETIMEDOUT);
 	}
 
@@ -546,6 +547,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox,
 			T4_OS_MBOX_LOCKING(
 				t4_os_atomic_list_del(&entry, &adap->mbox_list,
 						      &adap->mbox_lock));
+			free(temp);
 			return -G_FW_CMD_RETVAL((int)res);
 		}
 	}
diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c
index 7bca4561..dc153c73 100644
--- a/drivers/net/cxgbe/cxgbe_ethdev.c
+++ b/drivers/net/cxgbe/cxgbe_ethdev.c
@@ -48,10 +48,10 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_tailq.h>
 #include <rte_eal.h>
 #include <rte_alarm.h>
@@ -647,7 +647,7 @@ static void cxgbe_dev_rx_queue_release(void *q)
 /*
  * Get port statistics.
  */
-static void cxgbe_dev_stats_get(struct rte_eth_dev *eth_dev,
+static int cxgbe_dev_stats_get(struct rte_eth_dev *eth_dev,
 				struct rte_eth_stats *eth_stats)
 {
 	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
@@ -690,6 +690,7 @@ static void cxgbe_dev_stats_get(struct rte_eth_dev *eth_dev,
 		eth_stats->q_obytes[i] = txq->stats.tx_bytes;
 		eth_stats->q_errors[i] = txq->stats.mapping_err;
 	}
+	return 0;
 }
 
 /*
diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c
index b709fe2b..5b828c23 100644
--- a/drivers/net/cxgbe/cxgbe_main.c
+++ b/drivers/net/cxgbe/cxgbe_main.c
@@ -51,7 +51,6 @@
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_tailq.h>
 #include <rte_eal.h>
 #include <rte_alarm.h>
diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c
index 5376fc50..fc10d958 100644
--- a/drivers/net/cxgbe/sge.c
+++ b/drivers/net/cxgbe/sge.c
@@ -149,7 +149,7 @@ static int map_mbuf(struct rte_mbuf *mbuf, dma_addr_t *addr)
 	struct rte_mbuf *m = mbuf;
 
 	for (; m; m = m->next, addr++) {
-		*addr = m->buf_physaddr + rte_pktmbuf_headroom(m);
+		*addr = m->buf_iova + rte_pktmbuf_headroom(m);
 		if (*addr == 0)
 			goto out_err;
 	}
@@ -423,7 +423,7 @@ static unsigned int refill_fl_usembufs(struct adapter *adap, struct sge_fl *q,
 		mbuf->nb_segs = 1;
 		mbuf->port = rxq->rspq.port_id;
 
-		mapping = (dma_addr_t)RTE_ALIGN(mbuf->buf_physaddr +
+		mapping = (dma_addr_t)RTE_ALIGN(mbuf->buf_iova +
 						mbuf->data_off,
 						adap->sge.fl_align);
 		mapping |= buf_size_idx;
@@ -1318,7 +1318,7 @@ alloc_sw_ring:
 	if (metadata)
 		*(void **)metadata = s;
 
-	*phys = (uint64_t)tz->phys_addr;
+	*phys = (uint64_t)tz->iova;
 	return tz->addr;
 }
 
@@ -1405,7 +1405,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 	}
 
 	if (pkt->vlan_ex) {
-		mbuf->ol_flags |= PKT_RX_VLAN_PKT;
+		mbuf->ol_flags |= PKT_RX_VLAN;
 		mbuf->vlan_tci = ntohs(pkt->vlan);
 	}
 	rxq->stats.pkts++;
@@ -1550,7 +1550,7 @@ static int process_responses(struct sge_rspq *q, int budget,
 				}
 
 				if (cpl->vlan_ex) {
-					pkt->ol_flags |= PKT_RX_VLAN_PKT;
+					pkt->ol_flags |= PKT_RX_VLAN;
 					pkt->vlan_tci = ntohs(cpl->vlan);
 				}
 
diff --git a/drivers/net/dpaa/Makefile b/drivers/net/dpaa/Makefile
new file mode 100644
index 00000000..171686ec
--- /dev/null
+++ b/drivers/net/dpaa/Makefile
@@ -0,0 +1,63 @@
+#   BSD LICENSE
+#
+#   Copyright 2017 NXP.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of NXP nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+RTE_SDK_DPAA=$(RTE_SDK)/drivers/net/dpaa
+
+#
+# library name
+#
+LIB = librte_pmd_dpaa.a
+
+CFLAGS := -I$(SRCDIR) $(CFLAGS)
+CFLAGS += -O3 $(WERROR_FLAGS)
+CFLAGS += -Wno-pointer-arith
+CFLAGS += -I$(RTE_SDK_DPAA)/
+CFLAGS += -I$(RTE_SDK_DPAA)/include
+CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa
+CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa/include/
+CFLAGS += -I$(RTE_SDK)/drivers/mempool/dpaa
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/linuxapp/eal/include
+
+EXPORT_MAP := rte_pmd_dpaa_version.map
+
+LIBABIVER := 1
+
+# Interfaces with DPDK
+SRCS-$(CONFIG_RTE_LIBRTE_DPAA_PMD) += dpaa_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_DPAA_PMD) += dpaa_rxtx.c
+
+LDLIBS += -lrte_bus_dpaa
+LDLIBS += -lrte_mempool_dpaa
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/dpaa/dpaa_ethdev.c b/drivers/net/dpaa/dpaa_ethdev.c
new file mode 100644
index 00000000..cf5a2ecf
--- /dev/null
+++ b/drivers/net/dpaa/dpaa_ethdev.c
@@ -0,0 +1,1109 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 Freescale Semiconductor, Inc. All rights reserved.
+ *   Copyright 2017 NXP.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of  Freescale Semiconductor, Inc nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* System headers */
+#include <stdio.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <limits.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_interrupts.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_pci.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_memory.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_alarm.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_ring.h>
+
+#include <rte_dpaa_bus.h>
+#include <rte_dpaa_logs.h>
+#include <dpaa_mempool.h>
+
+#include <dpaa_ethdev.h>
+#include <dpaa_rxtx.h>
+
+#include <fsl_usd.h>
+#include <fsl_qman.h>
+#include <fsl_bman.h>
+#include <fsl_fman.h>
+
+/* Keep track of whether QMAN and BMAN have been globally initialized */
+static int is_global_init;
+
+struct rte_dpaa_xstats_name_off {
+	char name[RTE_ETH_XSTATS_NAME_SIZE];
+	uint32_t offset;
+};
+
+static const struct rte_dpaa_xstats_name_off dpaa_xstats_strings[] = {
+	{"rx_align_err",
+		offsetof(struct dpaa_if_stats, raln)},
+	{"rx_valid_pause",
+		offsetof(struct dpaa_if_stats, rxpf)},
+	{"rx_fcs_err",
+		offsetof(struct dpaa_if_stats, rfcs)},
+	{"rx_vlan_frame",
+		offsetof(struct dpaa_if_stats, rvlan)},
+	{"rx_frame_err",
+		offsetof(struct dpaa_if_stats, rerr)},
+	{"rx_drop_err",
+		offsetof(struct dpaa_if_stats, rdrp)},
+	{"rx_undersized",
+		offsetof(struct dpaa_if_stats, rund)},
+	{"rx_oversize_err",
+		offsetof(struct dpaa_if_stats, rovr)},
+	{"rx_fragment_pkt",
+		offsetof(struct dpaa_if_stats, rfrg)},
+	{"tx_valid_pause",
+		offsetof(struct dpaa_if_stats, txpf)},
+	{"tx_fcs_err",
+		offsetof(struct dpaa_if_stats, terr)},
+	{"tx_vlan_frame",
+		offsetof(struct dpaa_if_stats, tvlan)},
+	{"rx_undersized",
+		offsetof(struct dpaa_if_stats, tund)},
+};
+
+static int
+dpaa_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (mtu < ETHER_MIN_MTU)
+		return -EINVAL;
+	if (mtu > ETHER_MAX_LEN)
+		dev->data->dev_conf.rxmode.jumbo_frame = 1;
+	else
+		dev->data->dev_conf.rxmode.jumbo_frame = 0;
+
+	dev->data->dev_conf.rxmode.max_rx_pkt_len = mtu;
+
+	fman_if_set_maxfrm(dpaa_intf->fif, mtu);
+
+	return 0;
+}
+
+static int
+dpaa_eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
+		if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+		    DPAA_MAX_RX_PKT_LEN)
+			return dpaa_mtu_set(dev,
+				dev->data->dev_conf.rxmode.max_rx_pkt_len);
+		else
+			return -1;
+	}
+	return 0;
+}
+
+static const uint32_t *
+dpaa_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+	static const uint32_t ptypes[] = {
+		/*todo -= add more types */
+		RTE_PTYPE_L2_ETHER,
+		RTE_PTYPE_L3_IPV4,
+		RTE_PTYPE_L3_IPV4_EXT,
+		RTE_PTYPE_L3_IPV6,
+		RTE_PTYPE_L3_IPV6_EXT,
+		RTE_PTYPE_L4_TCP,
+		RTE_PTYPE_L4_UDP,
+		RTE_PTYPE_L4_SCTP
+	};
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (dev->rx_pkt_burst == dpaa_eth_queue_rx)
+		return ptypes;
+	return NULL;
+}
+
+static int dpaa_eth_dev_start(struct rte_eth_dev *dev)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	/* Change tx callback to the real one */
+	dev->tx_pkt_burst = dpaa_eth_queue_tx;
+	fman_if_enable_rx(dpaa_intf->fif);
+
+	return 0;
+}
+
+static void dpaa_eth_dev_stop(struct rte_eth_dev *dev)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	fman_if_disable_rx(dpaa_intf->fif);
+	dev->tx_pkt_burst = dpaa_eth_tx_drop_all;
+}
+
+static void dpaa_eth_dev_close(struct rte_eth_dev *dev)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	dpaa_eth_dev_stop(dev);
+}
+
+static int
+dpaa_fw_version_get(struct rte_eth_dev *dev __rte_unused,
+		     char *fw_version,
+		     size_t fw_size)
+{
+	int ret;
+	FILE *svr_file = NULL;
+	unsigned int svr_ver = 0;
+
+	PMD_INIT_FUNC_TRACE();
+
+	svr_file = fopen(DPAA_SOC_ID_FILE, "r");
+	if (!svr_file) {
+		DPAA_PMD_ERR("Unable to open SoC device");
+		return -ENOTSUP; /* Not supported on this infra */
+	}
+
+	ret = fscanf(svr_file, "svr:%x", &svr_ver);
+	if (ret <= 0) {
+		DPAA_PMD_ERR("Unable to read SoC device");
+		return -ENOTSUP; /* Not supported on this infra */
+	}
+
+	ret = snprintf(fw_version, fw_size,
+		       "svr:%x-fman-v%x",
+		       svr_ver,
+		       fman_ip_rev);
+
+	ret += 1; /* add the size of '\0' */
+	if (fw_size < (uint32_t)ret)
+		return ret;
+	else
+		return 0;
+}
+
+static void dpaa_eth_dev_info(struct rte_eth_dev *dev,
+			      struct rte_eth_dev_info *dev_info)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	dev_info->max_rx_queues = dpaa_intf->nb_rx_queues;
+	dev_info->max_tx_queues = dpaa_intf->nb_tx_queues;
+	dev_info->min_rx_bufsize = DPAA_MIN_RX_BUF_SIZE;
+	dev_info->max_rx_pktlen = DPAA_MAX_RX_PKT_LEN;
+	dev_info->max_mac_addrs = DPAA_MAX_MAC_FILTER;
+	dev_info->max_hash_mac_addrs = 0;
+	dev_info->max_vfs = 0;
+	dev_info->max_vmdq_pools = ETH_16_POOLS;
+	dev_info->flow_type_rss_offloads = DPAA_RSS_OFFLOAD_ALL;
+	dev_info->speed_capa = (ETH_LINK_SPEED_1G |
+				ETH_LINK_SPEED_10G);
+	dev_info->rx_offload_capa =
+		(DEV_RX_OFFLOAD_IPV4_CKSUM |
+		DEV_RX_OFFLOAD_UDP_CKSUM   |
+		DEV_RX_OFFLOAD_TCP_CKSUM);
+	dev_info->tx_offload_capa =
+		(DEV_TX_OFFLOAD_IPV4_CKSUM  |
+		DEV_TX_OFFLOAD_UDP_CKSUM   |
+		DEV_TX_OFFLOAD_TCP_CKSUM);
+}
+
+static int dpaa_eth_link_update(struct rte_eth_dev *dev,
+				int wait_to_complete __rte_unused)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+	struct rte_eth_link *link = &dev->data->dev_link;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (dpaa_intf->fif->mac_type == fman_mac_1g)
+		link->link_speed = 1000;
+	else if (dpaa_intf->fif->mac_type == fman_mac_10g)
+		link->link_speed = 10000;
+	else
+		DPAA_PMD_ERR("invalid link_speed: %s, %d",
+			     dpaa_intf->name, dpaa_intf->fif->mac_type);
+
+	link->link_status = dpaa_intf->valid;
+	link->link_duplex = ETH_LINK_FULL_DUPLEX;
+	link->link_autoneg = ETH_LINK_AUTONEG;
+	return 0;
+}
+
+static int dpaa_eth_stats_get(struct rte_eth_dev *dev,
+			       struct rte_eth_stats *stats)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	fman_if_stats_get(dpaa_intf->fif, stats);
+	return 0;
+}
+
+static void dpaa_eth_stats_reset(struct rte_eth_dev *dev)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	fman_if_stats_reset(dpaa_intf->fif);
+}
+
+static int
+dpaa_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
+		    unsigned int n)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+	unsigned int i = 0, num = RTE_DIM(dpaa_xstats_strings);
+	uint64_t values[sizeof(struct dpaa_if_stats) / 8];
+
+	if (xstats == NULL)
+		return 0;
+
+	if (n < num)
+		return num;
+
+	fman_if_stats_get_all(dpaa_intf->fif, values,
+			      sizeof(struct dpaa_if_stats) / 8);
+
+	for (i = 0; i < num; i++) {
+		xstats[i].id = i;
+		xstats[i].value = values[dpaa_xstats_strings[i].offset / 8];
+	}
+	return i;
+}
+
+static int
+dpaa_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
+		      struct rte_eth_xstat_name *xstats_names,
+		      __rte_unused unsigned int limit)
+{
+	unsigned int i, stat_cnt = RTE_DIM(dpaa_xstats_strings);
+
+	if (xstats_names != NULL)
+		for (i = 0; i < stat_cnt; i++)
+			snprintf(xstats_names[i].name,
+				 sizeof(xstats_names[i].name),
+				 "%s",
+				 dpaa_xstats_strings[i].name);
+
+	return stat_cnt;
+}
+
+static int
+dpaa_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
+		      uint64_t *values, unsigned int n)
+{
+	unsigned int i, stat_cnt = RTE_DIM(dpaa_xstats_strings);
+	uint64_t values_copy[sizeof(struct dpaa_if_stats) / 8];
+
+	if (!ids) {
+		struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+		if (n < stat_cnt)
+			return stat_cnt;
+
+		if (!values)
+			return 0;
+
+		fman_if_stats_get_all(dpaa_intf->fif, values_copy,
+				      sizeof(struct dpaa_if_stats));
+
+		for (i = 0; i < stat_cnt; i++)
+			values[i] =
+				values_copy[dpaa_xstats_strings[i].offset / 8];
+
+		return stat_cnt;
+	}
+
+	dpaa_xstats_get_by_id(dev, NULL, values_copy, stat_cnt);
+
+	for (i = 0; i < n; i++) {
+		if (ids[i] >= stat_cnt) {
+			DPAA_PMD_ERR("id value isn't valid");
+			return -1;
+		}
+		values[i] = values_copy[ids[i]];
+	}
+	return n;
+}
+
+static int
+dpaa_xstats_get_names_by_id(
+	struct rte_eth_dev *dev,
+	struct rte_eth_xstat_name *xstats_names,
+	const uint64_t *ids,
+	unsigned int limit)
+{
+	unsigned int i, stat_cnt = RTE_DIM(dpaa_xstats_strings);
+	struct rte_eth_xstat_name xstats_names_copy[stat_cnt];
+
+	if (!ids)
+		return dpaa_xstats_get_names(dev, xstats_names, limit);
+
+	dpaa_xstats_get_names(dev, xstats_names_copy, limit);
+
+	for (i = 0; i < limit; i++) {
+		if (ids[i] >= stat_cnt) {
+			DPAA_PMD_ERR("id value isn't valid");
+			return -1;
+		}
+		strcpy(xstats_names[i].name, xstats_names_copy[ids[i]].name);
+	}
+	return limit;
+}
+
+static void dpaa_eth_promiscuous_enable(struct rte_eth_dev *dev)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	fman_if_promiscuous_enable(dpaa_intf->fif);
+}
+
+static void dpaa_eth_promiscuous_disable(struct rte_eth_dev *dev)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	fman_if_promiscuous_disable(dpaa_intf->fif);
+}
+
+static void dpaa_eth_multicast_enable(struct rte_eth_dev *dev)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	fman_if_set_mcast_filter_table(dpaa_intf->fif);
+}
+
+static void dpaa_eth_multicast_disable(struct rte_eth_dev *dev)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	fman_if_reset_mcast_filter_table(dpaa_intf->fif);
+}
+
+static
+int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+			    uint16_t nb_desc __rte_unused,
+			    unsigned int socket_id __rte_unused,
+			    const struct rte_eth_rxconf *rx_conf __rte_unused,
+			    struct rte_mempool *mp)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	DPAA_PMD_INFO("Rx queue setup for queue index: %d", queue_idx);
+
+	if (!dpaa_intf->bp_info || dpaa_intf->bp_info->mp != mp) {
+		struct fman_if_ic_params icp;
+		uint32_t fd_offset;
+		uint32_t bp_size;
+
+		if (!mp->pool_data) {
+			DPAA_PMD_ERR("Not an offloaded buffer pool!");
+			return -1;
+		}
+		dpaa_intf->bp_info = DPAA_MEMPOOL_TO_POOL_INFO(mp);
+
+		memset(&icp, 0, sizeof(icp));
+		/* set ICEOF for to the default value , which is 0*/
+		icp.iciof = DEFAULT_ICIOF;
+		icp.iceof = DEFAULT_RX_ICEOF;
+		icp.icsz = DEFAULT_ICSZ;
+		fman_if_set_ic_params(dpaa_intf->fif, &icp);
+
+		fd_offset = RTE_PKTMBUF_HEADROOM + DPAA_HW_BUF_RESERVE;
+		fman_if_set_fdoff(dpaa_intf->fif, fd_offset);
+
+		/* Buffer pool size should be equal to Dataroom Size*/
+		bp_size = rte_pktmbuf_data_room_size(mp);
+		fman_if_set_bp(dpaa_intf->fif, mp->size,
+			       dpaa_intf->bp_info->bpid, bp_size);
+		dpaa_intf->valid = 1;
+		DPAA_PMD_INFO("if =%s - fd_offset = %d offset = %d",
+			    dpaa_intf->name, fd_offset,
+			fman_if_get_fdoff(dpaa_intf->fif));
+	}
+	dev->data->rx_queues[queue_idx] = &dpaa_intf->rx_queues[queue_idx];
+
+	return 0;
+}
+
+static
+void dpaa_eth_rx_queue_release(void *rxq __rte_unused)
+{
+	PMD_INIT_FUNC_TRACE();
+}
+
+static
+int dpaa_eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+			    uint16_t nb_desc __rte_unused,
+		unsigned int socket_id __rte_unused,
+		const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	DPAA_PMD_INFO("Tx queue setup for queue index: %d", queue_idx);
+	dev->data->tx_queues[queue_idx] = &dpaa_intf->tx_queues[queue_idx];
+	return 0;
+}
+
+static void dpaa_eth_tx_queue_release(void *txq __rte_unused)
+{
+	PMD_INIT_FUNC_TRACE();
+}
+
+static int dpaa_link_down(struct rte_eth_dev *dev)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	dpaa_eth_dev_stop(dev);
+	return 0;
+}
+
+static int dpaa_link_up(struct rte_eth_dev *dev)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	dpaa_eth_dev_start(dev);
+	return 0;
+}
+
+static int
+dpaa_flow_ctrl_set(struct rte_eth_dev *dev,
+		   struct rte_eth_fc_conf *fc_conf)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+	struct rte_eth_fc_conf *net_fc;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (!(dpaa_intf->fc_conf)) {
+		dpaa_intf->fc_conf = rte_zmalloc(NULL,
+			sizeof(struct rte_eth_fc_conf), MAX_CACHELINE);
+		if (!dpaa_intf->fc_conf) {
+			DPAA_PMD_ERR("unable to save flow control info");
+			return -ENOMEM;
+		}
+	}
+	net_fc = dpaa_intf->fc_conf;
+
+	if (fc_conf->high_water < fc_conf->low_water) {
+		DPAA_PMD_ERR("Incorrect Flow Control Configuration");
+		return -EINVAL;
+	}
+
+	if (fc_conf->mode == RTE_FC_NONE) {
+		return 0;
+	} else if (fc_conf->mode == RTE_FC_TX_PAUSE ||
+		 fc_conf->mode == RTE_FC_FULL) {
+		fman_if_set_fc_threshold(dpaa_intf->fif, fc_conf->high_water,
+					 fc_conf->low_water,
+				dpaa_intf->bp_info->bpid);
+		if (fc_conf->pause_time)
+			fman_if_set_fc_quanta(dpaa_intf->fif,
+					      fc_conf->pause_time);
+	}
+
+	/* Save the information in dpaa device */
+	net_fc->pause_time = fc_conf->pause_time;
+	net_fc->high_water = fc_conf->high_water;
+	net_fc->low_water = fc_conf->low_water;
+	net_fc->send_xon = fc_conf->send_xon;
+	net_fc->mac_ctrl_frame_fwd = fc_conf->mac_ctrl_frame_fwd;
+	net_fc->mode = fc_conf->mode;
+	net_fc->autoneg = fc_conf->autoneg;
+
+	return 0;
+}
+
+static int
+dpaa_flow_ctrl_get(struct rte_eth_dev *dev,
+		   struct rte_eth_fc_conf *fc_conf)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+	struct rte_eth_fc_conf *net_fc = dpaa_intf->fc_conf;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (net_fc) {
+		fc_conf->pause_time = net_fc->pause_time;
+		fc_conf->high_water = net_fc->high_water;
+		fc_conf->low_water = net_fc->low_water;
+		fc_conf->send_xon = net_fc->send_xon;
+		fc_conf->mac_ctrl_frame_fwd = net_fc->mac_ctrl_frame_fwd;
+		fc_conf->mode = net_fc->mode;
+		fc_conf->autoneg = net_fc->autoneg;
+		return 0;
+	}
+	ret = fman_if_get_fc_threshold(dpaa_intf->fif);
+	if (ret) {
+		fc_conf->mode = RTE_FC_TX_PAUSE;
+		fc_conf->pause_time = fman_if_get_fc_quanta(dpaa_intf->fif);
+	} else {
+		fc_conf->mode = RTE_FC_NONE;
+	}
+
+	return 0;
+}
+
+static int
+dpaa_dev_add_mac_addr(struct rte_eth_dev *dev,
+			     struct ether_addr *addr,
+			     uint32_t index,
+			     __rte_unused uint32_t pool)
+{
+	int ret;
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	ret = fman_if_add_mac_addr(dpaa_intf->fif, addr->addr_bytes, index);
+
+	if (ret)
+		RTE_LOG(ERR, PMD, "error: Adding the MAC ADDR failed:"
+			" err = %d", ret);
+	return 0;
+}
+
+static void
+dpaa_dev_remove_mac_addr(struct rte_eth_dev *dev,
+			  uint32_t index)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	fman_if_clear_mac_addr(dpaa_intf->fif, index);
+}
+
+static void
+dpaa_dev_set_mac_addr(struct rte_eth_dev *dev,
+		       struct ether_addr *addr)
+{
+	int ret;
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	ret = fman_if_add_mac_addr(dpaa_intf->fif, addr->addr_bytes, 0);
+	if (ret)
+		RTE_LOG(ERR, PMD, "error: Setting the MAC ADDR failed %d", ret);
+}
+
+static struct eth_dev_ops dpaa_devops = {
+	.dev_configure		  = dpaa_eth_dev_configure,
+	.dev_start		  = dpaa_eth_dev_start,
+	.dev_stop		  = dpaa_eth_dev_stop,
+	.dev_close		  = dpaa_eth_dev_close,
+	.dev_infos_get		  = dpaa_eth_dev_info,
+	.dev_supported_ptypes_get = dpaa_supported_ptypes_get,
+
+	.rx_queue_setup		  = dpaa_eth_rx_queue_setup,
+	.tx_queue_setup		  = dpaa_eth_tx_queue_setup,
+	.rx_queue_release	  = dpaa_eth_rx_queue_release,
+	.tx_queue_release	  = dpaa_eth_tx_queue_release,
+
+	.flow_ctrl_get		  = dpaa_flow_ctrl_get,
+	.flow_ctrl_set		  = dpaa_flow_ctrl_set,
+
+	.link_update		  = dpaa_eth_link_update,
+	.stats_get		  = dpaa_eth_stats_get,
+	.xstats_get		  = dpaa_dev_xstats_get,
+	.xstats_get_by_id	  = dpaa_xstats_get_by_id,
+	.xstats_get_names_by_id	  = dpaa_xstats_get_names_by_id,
+	.xstats_get_names	  = dpaa_xstats_get_names,
+	.xstats_reset		  = dpaa_eth_stats_reset,
+	.stats_reset		  = dpaa_eth_stats_reset,
+	.promiscuous_enable	  = dpaa_eth_promiscuous_enable,
+	.promiscuous_disable	  = dpaa_eth_promiscuous_disable,
+	.allmulticast_enable	  = dpaa_eth_multicast_enable,
+	.allmulticast_disable	  = dpaa_eth_multicast_disable,
+	.mtu_set		  = dpaa_mtu_set,
+	.dev_set_link_down	  = dpaa_link_down,
+	.dev_set_link_up	  = dpaa_link_up,
+	.mac_addr_add		  = dpaa_dev_add_mac_addr,
+	.mac_addr_remove	  = dpaa_dev_remove_mac_addr,
+	.mac_addr_set		  = dpaa_dev_set_mac_addr,
+
+	.fw_version_get		  = dpaa_fw_version_get,
+};
+
+static int dpaa_fc_set_default(struct dpaa_if *dpaa_intf)
+{
+	struct rte_eth_fc_conf *fc_conf;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (!(dpaa_intf->fc_conf)) {
+		dpaa_intf->fc_conf = rte_zmalloc(NULL,
+			sizeof(struct rte_eth_fc_conf), MAX_CACHELINE);
+		if (!dpaa_intf->fc_conf) {
+			DPAA_PMD_ERR("unable to save flow control info");
+			return -ENOMEM;
+		}
+	}
+	fc_conf = dpaa_intf->fc_conf;
+	ret = fman_if_get_fc_threshold(dpaa_intf->fif);
+	if (ret) {
+		fc_conf->mode = RTE_FC_TX_PAUSE;
+		fc_conf->pause_time = fman_if_get_fc_quanta(dpaa_intf->fif);
+	} else {
+		fc_conf->mode = RTE_FC_NONE;
+	}
+
+	return 0;
+}
+
+/* Initialise an Rx FQ */
+static int dpaa_rx_queue_init(struct qman_fq *fq,
+			      uint32_t fqid)
+{
+	struct qm_mcc_initfq opts;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	ret = qman_reserve_fqid(fqid);
+	if (ret) {
+		DPAA_PMD_ERR("reserve rx fqid %d failed with ret: %d",
+			     fqid, ret);
+		return -EINVAL;
+	}
+
+	DPAA_PMD_DEBUG("creating rx fq %p, fqid %d", fq, fqid);
+	ret = qman_create_fq(fqid, QMAN_FQ_FLAG_NO_ENQUEUE, fq);
+	if (ret) {
+		DPAA_PMD_ERR("create rx fqid %d failed with ret: %d",
+			fqid, ret);
+		return ret;
+	}
+
+	opts.we_mask = QM_INITFQ_WE_DESTWQ | QM_INITFQ_WE_FQCTRL |
+		       QM_INITFQ_WE_CONTEXTA;
+
+	opts.fqd.dest.wq = DPAA_IF_RX_PRIORITY;
+	opts.fqd.fq_ctrl = QM_FQCTRL_AVOIDBLOCK | QM_FQCTRL_CTXASTASHING |
+			   QM_FQCTRL_PREFERINCACHE;
+	opts.fqd.context_a.stashing.exclusive = 0;
+	opts.fqd.context_a.stashing.annotation_cl = DPAA_IF_RX_ANNOTATION_STASH;
+	opts.fqd.context_a.stashing.data_cl = DPAA_IF_RX_DATA_STASH;
+	opts.fqd.context_a.stashing.context_cl = DPAA_IF_RX_CONTEXT_STASH;
+
+	/*Enable tail drop */
+	opts.we_mask = opts.we_mask | QM_INITFQ_WE_TDTHRESH;
+	opts.fqd.fq_ctrl = opts.fqd.fq_ctrl | QM_FQCTRL_TDE;
+	qm_fqd_taildrop_set(&opts.fqd.td, CONG_THRESHOLD_RX_Q, 1);
+
+	ret = qman_init_fq(fq, 0, &opts);
+	if (ret)
+		DPAA_PMD_ERR("init rx fqid %d failed with ret: %d", fqid, ret);
+	return ret;
+}
+
+/* Initialise a Tx FQ */
+static int dpaa_tx_queue_init(struct qman_fq *fq,
+			      struct fman_if *fman_intf)
+{
+	struct qm_mcc_initfq opts;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	ret = qman_create_fq(0, QMAN_FQ_FLAG_DYNAMIC_FQID |
+			     QMAN_FQ_FLAG_TO_DCPORTAL, fq);
+	if (ret) {
+		DPAA_PMD_ERR("create tx fq failed with ret: %d", ret);
+		return ret;
+	}
+	opts.we_mask = QM_INITFQ_WE_DESTWQ | QM_INITFQ_WE_FQCTRL |
+		       QM_INITFQ_WE_CONTEXTB | QM_INITFQ_WE_CONTEXTA;
+	opts.fqd.dest.channel = fman_intf->tx_channel_id;
+	opts.fqd.dest.wq = DPAA_IF_TX_PRIORITY;
+	opts.fqd.fq_ctrl = QM_FQCTRL_PREFERINCACHE;
+	opts.fqd.context_b = 0;
+	/* no tx-confirmation */
+	opts.fqd.context_a.hi = 0x80000000 | fman_dealloc_bufs_mask_hi;
+	opts.fqd.context_a.lo = 0 | fman_dealloc_bufs_mask_lo;
+	DPAA_PMD_DEBUG("init tx fq %p, fqid %d", fq, fq->fqid);
+	ret = qman_init_fq(fq, QMAN_INITFQ_FLAG_SCHED, &opts);
+	if (ret)
+		DPAA_PMD_ERR("init tx fqid %d failed %d", fq->fqid, ret);
+	return ret;
+}
+
+#ifdef RTE_LIBRTE_DPAA_DEBUG_DRIVER
+/* Initialise a DEBUG FQ ([rt]x_error, rx_default). */
+static int dpaa_debug_queue_init(struct qman_fq *fq, uint32_t fqid)
+{
+	struct qm_mcc_initfq opts;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	ret = qman_reserve_fqid(fqid);
+	if (ret) {
+		DPAA_PMD_ERR("Reserve debug fqid %d failed with ret: %d",
+			fqid, ret);
+		return -EINVAL;
+	}
+	/* "map" this Rx FQ to one of the interfaces Tx FQID */
+	DPAA_PMD_DEBUG("Creating debug fq %p, fqid %d", fq, fqid);
+	ret = qman_create_fq(fqid, QMAN_FQ_FLAG_NO_ENQUEUE, fq);
+	if (ret) {
+		DPAA_PMD_ERR("create debug fqid %d failed with ret: %d",
+			fqid, ret);
+		return ret;
+	}
+	opts.we_mask = QM_INITFQ_WE_DESTWQ | QM_INITFQ_WE_FQCTRL;
+	opts.fqd.dest.wq = DPAA_IF_DEBUG_PRIORITY;
+	ret = qman_init_fq(fq, 0, &opts);
+	if (ret)
+		DPAA_PMD_ERR("init debug fqid %d failed with ret: %d",
+			    fqid, ret);
+	return ret;
+}
+#endif
+
+/* Initialise a network interface */
+static int
+dpaa_dev_init(struct rte_eth_dev *eth_dev)
+{
+	int num_cores, num_rx_fqs, fqid;
+	int loop, ret = 0;
+	int dev_id;
+	struct rte_dpaa_device *dpaa_device;
+	struct dpaa_if *dpaa_intf;
+	struct fm_eth_port_cfg *cfg;
+	struct fman_if *fman_intf;
+	struct fman_if_bpool *bp, *tmp_bp;
+
+	PMD_INIT_FUNC_TRACE();
+
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	dpaa_device = DEV_TO_DPAA_DEVICE(eth_dev->device);
+	dev_id = dpaa_device->id.dev_id;
+	dpaa_intf = eth_dev->data->dev_private;
+	cfg = &dpaa_netcfg->port_cfg[dev_id];
+	fman_intf = cfg->fman_if;
+
+	dpaa_intf->name = dpaa_device->name;
+
+	/* save fman_if & cfg in the interface struture */
+	dpaa_intf->fif = fman_intf;
+	dpaa_intf->ifid = dev_id;
+	dpaa_intf->cfg = cfg;
+
+	/* Initialize Rx FQ's */
+	if (getenv("DPAA_NUM_RX_QUEUES"))
+		num_rx_fqs = atoi(getenv("DPAA_NUM_RX_QUEUES"));
+	else
+		num_rx_fqs = DPAA_DEFAULT_NUM_PCD_QUEUES;
+
+	/* Each device can not have more than DPAA_PCD_FQID_MULTIPLIER RX
+	 * queues.
+	 */
+	if (num_rx_fqs <= 0 || num_rx_fqs > DPAA_PCD_FQID_MULTIPLIER) {
+		DPAA_PMD_ERR("Invalid number of RX queues\n");
+		return -EINVAL;
+	}
+
+	dpaa_intf->rx_queues = rte_zmalloc(NULL,
+		sizeof(struct qman_fq) * num_rx_fqs, MAX_CACHELINE);
+	for (loop = 0; loop < num_rx_fqs; loop++) {
+		fqid = DPAA_PCD_FQID_START + dpaa_intf->ifid *
+			DPAA_PCD_FQID_MULTIPLIER + loop;
+		ret = dpaa_rx_queue_init(&dpaa_intf->rx_queues[loop], fqid);
+		if (ret)
+			return ret;
+		dpaa_intf->rx_queues[loop].dpaa_intf = dpaa_intf;
+	}
+	dpaa_intf->nb_rx_queues = num_rx_fqs;
+
+	/* Initialise Tx FQs. Have as many Tx FQ's as number of cores */
+	num_cores = rte_lcore_count();
+	dpaa_intf->tx_queues = rte_zmalloc(NULL, sizeof(struct qman_fq) *
+		num_cores, MAX_CACHELINE);
+	if (!dpaa_intf->tx_queues)
+		return -ENOMEM;
+
+	for (loop = 0; loop < num_cores; loop++) {
+		ret = dpaa_tx_queue_init(&dpaa_intf->tx_queues[loop],
+					 fman_intf);
+		if (ret)
+			return ret;
+		dpaa_intf->tx_queues[loop].dpaa_intf = dpaa_intf;
+	}
+	dpaa_intf->nb_tx_queues = num_cores;
+
+#ifdef RTE_LIBRTE_DPAA_DEBUG_DRIVER
+	dpaa_debug_queue_init(&dpaa_intf->debug_queues[
+		DPAA_DEBUG_FQ_RX_ERROR], fman_intf->fqid_rx_err);
+	dpaa_intf->debug_queues[DPAA_DEBUG_FQ_RX_ERROR].dpaa_intf = dpaa_intf;
+	dpaa_debug_queue_init(&dpaa_intf->debug_queues[
+		DPAA_DEBUG_FQ_TX_ERROR], fman_intf->fqid_tx_err);
+	dpaa_intf->debug_queues[DPAA_DEBUG_FQ_TX_ERROR].dpaa_intf = dpaa_intf;
+#endif
+
+	DPAA_PMD_DEBUG("All frame queues created");
+
+	/* Get the initial configuration for flow control */
+	dpaa_fc_set_default(dpaa_intf);
+
+	/* reset bpool list, initialize bpool dynamically */
+	list_for_each_entry_safe(bp, tmp_bp, &cfg->fman_if->bpool_list, node) {
+		list_del(&bp->node);
+		free(bp);
+	}
+
+	/* Populate ethdev structure */
+	eth_dev->dev_ops = &dpaa_devops;
+	eth_dev->rx_pkt_burst = dpaa_eth_queue_rx;
+	eth_dev->tx_pkt_burst = dpaa_eth_tx_drop_all;
+
+	/* Allocate memory for storing MAC addresses */
+	eth_dev->data->mac_addrs = rte_zmalloc("mac_addr",
+		ETHER_ADDR_LEN * DPAA_MAX_MAC_FILTER, 0);
+	if (eth_dev->data->mac_addrs == NULL) {
+		DPAA_PMD_ERR("Failed to allocate %d bytes needed to "
+						"store MAC addresses",
+				ETHER_ADDR_LEN * DPAA_MAX_MAC_FILTER);
+		rte_free(dpaa_intf->rx_queues);
+		rte_free(dpaa_intf->tx_queues);
+		dpaa_intf->rx_queues = NULL;
+		dpaa_intf->tx_queues = NULL;
+		dpaa_intf->nb_rx_queues = 0;
+		dpaa_intf->nb_tx_queues = 0;
+		return -ENOMEM;
+	}
+
+	/* copy the primary mac address */
+	ether_addr_copy(&fman_intf->mac_addr, &eth_dev->data->mac_addrs[0]);
+
+	RTE_LOG(INFO, PMD, "net: dpaa: %s: %02x:%02x:%02x:%02x:%02x:%02x\n",
+		dpaa_device->name,
+		fman_intf->mac_addr.addr_bytes[0],
+		fman_intf->mac_addr.addr_bytes[1],
+		fman_intf->mac_addr.addr_bytes[2],
+		fman_intf->mac_addr.addr_bytes[3],
+		fman_intf->mac_addr.addr_bytes[4],
+		fman_intf->mac_addr.addr_bytes[5]);
+
+	/* Disable RX mode */
+	fman_if_discard_rx_errors(fman_intf);
+	fman_if_disable_rx(fman_intf);
+	/* Disable promiscuous mode */
+	fman_if_promiscuous_disable(fman_intf);
+	/* Disable multicast */
+	fman_if_reset_mcast_filter_table(fman_intf);
+	/* Reset interface statistics */
+	fman_if_stats_reset(fman_intf);
+
+	return 0;
+}
+
+static int
+dpaa_dev_uninit(struct rte_eth_dev *dev)
+{
+	struct dpaa_if *dpaa_intf = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return -EPERM;
+
+	if (!dpaa_intf) {
+		DPAA_PMD_WARN("Already closed or not started");
+		return -1;
+	}
+
+	dpaa_eth_dev_close(dev);
+
+	/* release configuration memory */
+	if (dpaa_intf->fc_conf)
+		rte_free(dpaa_intf->fc_conf);
+
+	rte_free(dpaa_intf->rx_queues);
+	dpaa_intf->rx_queues = NULL;
+
+	rte_free(dpaa_intf->tx_queues);
+	dpaa_intf->tx_queues = NULL;
+
+	/* free memory for storing MAC addresses */
+	rte_free(dev->data->mac_addrs);
+	dev->data->mac_addrs = NULL;
+
+	dev->dev_ops = NULL;
+	dev->rx_pkt_burst = NULL;
+	dev->tx_pkt_burst = NULL;
+
+	return 0;
+}
+
+static int
+rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv,
+	       struct rte_dpaa_device *dpaa_dev)
+{
+	int diag;
+	int ret;
+	struct rte_eth_dev *eth_dev;
+
+	PMD_INIT_FUNC_TRACE();
+
+	/* In case of secondary process, the device is already configured
+	 * and no further action is required, except portal initialization
+	 * and verifying secondary attachment to port name.
+	 */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		eth_dev = rte_eth_dev_attach_secondary(dpaa_dev->name);
+		if (!eth_dev)
+			return -ENOMEM;
+		return 0;
+	}
+
+	if (!is_global_init) {
+		/* One time load of Qman/Bman drivers */
+		ret = qman_global_init();
+		if (ret) {
+			DPAA_PMD_ERR("QMAN initialization failed: %d",
+				     ret);
+			return ret;
+		}
+		ret = bman_global_init();
+		if (ret) {
+			DPAA_PMD_ERR("BMAN initialization failed: %d",
+				     ret);
+			return ret;
+		}
+
+		is_global_init = 1;
+	}
+
+	ret = rte_dpaa_portal_init((void *)1);
+	if (ret) {
+		DPAA_PMD_ERR("Unable to initialize portal");
+		return ret;
+	}
+
+	eth_dev = rte_eth_dev_allocate(dpaa_dev->name);
+	if (eth_dev == NULL)
+		return -ENOMEM;
+
+	eth_dev->data->dev_private = rte_zmalloc(
+					"ethdev private structure",
+					sizeof(struct dpaa_if),
+					RTE_CACHE_LINE_SIZE);
+	if (!eth_dev->data->dev_private) {
+		DPAA_PMD_ERR("Cannot allocate memzone for port data");
+		rte_eth_dev_release_port(eth_dev);
+		return -ENOMEM;
+	}
+
+	eth_dev->device = &dpaa_dev->device;
+	eth_dev->device->driver = &dpaa_drv->driver;
+	dpaa_dev->eth_dev = eth_dev;
+
+	/* Invoke PMD device initialization function */
+	diag = dpaa_dev_init(eth_dev);
+	if (diag == 0)
+		return 0;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+
+	rte_eth_dev_release_port(eth_dev);
+	return diag;
+}
+
+static int
+rte_dpaa_remove(struct rte_dpaa_device *dpaa_dev)
+{
+	struct rte_eth_dev *eth_dev;
+
+	PMD_INIT_FUNC_TRACE();
+
+	eth_dev = dpaa_dev->eth_dev;
+	dpaa_dev_uninit(eth_dev);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_dpaa_driver rte_dpaa_pmd = {
+	.drv_type = FSL_DPAA_ETH,
+	.probe = rte_dpaa_probe,
+	.remove = rte_dpaa_remove,
+};
+
+RTE_PMD_REGISTER_DPAA(net_dpaa, rte_dpaa_pmd);
diff --git a/drivers/net/dpaa/dpaa_ethdev.h b/drivers/net/dpaa/dpaa_ethdev.h
new file mode 100644
index 00000000..5457d61b
--- /dev/null
+++ b/drivers/net/dpaa/dpaa_ethdev.h
@@ -0,0 +1,182 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2014-2016 Freescale Semiconductor, Inc. All rights reserved.
+ *   Copyright 2017 NXP.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of  Freescale Semiconductor, Inc nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __DPAA_ETHDEV_H__
+#define __DPAA_ETHDEV_H__
+
+/* System headers */
+#include <stdbool.h>
+#include <rte_ethdev.h>
+
+#include <fsl_usd.h>
+#include <fsl_qman.h>
+#include <fsl_bman.h>
+#include <of.h>
+#include <netcfg.h>
+
+/* DPAA SoC identifier; If this is not available, it can be concluded
+ * that board is non-DPAA. Single slot is currently supported.
+ */
+#define DPAA_SOC_ID_FILE		"sys/devices/soc0/soc_id"
+
+#define DPAA_MBUF_HW_ANNOTATION		64
+#define DPAA_FD_PTA_SIZE		64
+
+#if (DPAA_MBUF_HW_ANNOTATION + DPAA_FD_PTA_SIZE) > RTE_PKTMBUF_HEADROOM
+#error "Annotation requirement is more than RTE_PKTMBUF_HEADROOM"
+#endif
+
+/* we will re-use the HEADROOM for annotation in RX */
+#define DPAA_HW_BUF_RESERVE	0
+#define DPAA_PACKET_LAYOUT_ALIGN	64
+
+/* Alignment to use for cpu-local structs to avoid coherency problems. */
+#define MAX_CACHELINE			64
+
+#define DPAA_MIN_RX_BUF_SIZE 512
+#define DPAA_MAX_RX_PKT_LEN  10240
+
+/* RX queue tail drop threshold
+ * currently considering 32 KB packets.
+ */
+#define CONG_THRESHOLD_RX_Q  (32 * 1024)
+
+/*max mac filter for memac(8) including primary mac addr*/
+#define DPAA_MAX_MAC_FILTER (MEMAC_NUM_OF_PADDRS + 1)
+
+/*Maximum number of slots available in TX ring*/
+#define MAX_TX_RING_SLOTS	8
+
+/* PCD frame queues */
+#define DPAA_PCD_FQID_START		0x400
+#define DPAA_PCD_FQID_MULTIPLIER	0x100
+#define DPAA_DEFAULT_NUM_PCD_QUEUES	1
+
+#define DPAA_IF_TX_PRIORITY		3
+#define DPAA_IF_RX_PRIORITY		4
+#define DPAA_IF_DEBUG_PRIORITY		7
+
+#define DPAA_IF_RX_ANNOTATION_STASH	1
+#define DPAA_IF_RX_DATA_STASH		1
+#define DPAA_IF_RX_CONTEXT_STASH		0
+
+/* Each "debug" FQ is represented by one of these */
+#define DPAA_DEBUG_FQ_RX_ERROR   0
+#define DPAA_DEBUG_FQ_TX_ERROR   1
+
+#define DPAA_RSS_OFFLOAD_ALL ( \
+	ETH_RSS_FRAG_IPV4 | \
+	ETH_RSS_NONFRAG_IPV4_TCP | \
+	ETH_RSS_NONFRAG_IPV4_UDP | \
+	ETH_RSS_NONFRAG_IPV4_SCTP | \
+	ETH_RSS_FRAG_IPV6 | \
+	ETH_RSS_NONFRAG_IPV6_TCP | \
+	ETH_RSS_NONFRAG_IPV6_UDP | \
+	ETH_RSS_NONFRAG_IPV6_SCTP)
+
+#define DPAA_TX_CKSUM_OFFLOAD_MASK (             \
+		PKT_TX_IP_CKSUM |                \
+		PKT_TX_TCP_CKSUM |               \
+		PKT_TX_UDP_CKSUM)
+
+/* DPAA Frame descriptor macros */
+
+#define DPAA_FD_CMD_FCO			0x80000000
+/**< Frame queue Context Override */
+#define DPAA_FD_CMD_RPD			0x40000000
+/**< Read Prepended Data */
+#define DPAA_FD_CMD_UPD			0x20000000
+/**< Update Prepended Data */
+#define DPAA_FD_CMD_DTC			0x10000000
+/**< Do IP/TCP/UDP Checksum */
+#define DPAA_FD_CMD_DCL4C		0x10000000
+/**< Didn't calculate L4 Checksum */
+#define DPAA_FD_CMD_CFQ			0x00ffffff
+/**< Confirmation Frame Queue */
+
+/* Each network interface is represented by one of these */
+struct dpaa_if {
+	int valid;
+	char *name;
+	const struct fm_eth_port_cfg *cfg;
+	struct qman_fq *rx_queues;
+	struct qman_fq *tx_queues;
+	struct qman_fq debug_queues[2];
+	uint16_t nb_rx_queues;
+	uint16_t nb_tx_queues;
+	uint32_t ifid;
+	struct fman_if *fif;
+	struct dpaa_bp_info *bp_info;
+	struct rte_eth_fc_conf *fc_conf;
+};
+
+struct dpaa_if_stats {
+	/* Rx Statistics Counter */
+	uint64_t reoct;		/**<Rx Eth Octets Counter */
+	uint64_t roct;		/**<Rx Octet Counters */
+	uint64_t raln;		/**<Rx Alignment Error Counter */
+	uint64_t rxpf;		/**<Rx valid Pause Frame */
+	uint64_t rfrm;		/**<Rx Frame counter */
+	uint64_t rfcs;		/**<Rx frame check seq error */
+	uint64_t rvlan;		/**<Rx Vlan Frame Counter */
+	uint64_t rerr;		/**<Rx Frame error */
+	uint64_t ruca;		/**<Rx Unicast */
+	uint64_t rmca;		/**<Rx Multicast */
+	uint64_t rbca;		/**<Rx Broadcast */
+	uint64_t rdrp;		/**<Rx Dropped Packet */
+	uint64_t rpkt;		/**<Rx packet */
+	uint64_t rund;		/**<Rx undersized packets */
+	uint32_t res_x[14];
+	uint64_t rovr;		/**<Rx oversized but good */
+	uint64_t rjbr;		/**<Rx oversized with bad csum */
+	uint64_t rfrg;		/**<Rx fragment Packet */
+	uint64_t rcnp;		/**<Rx control packets (0x8808 */
+	uint64_t rdrntp;	/**<Rx dropped due to FIFO overflow */
+	uint32_t res01d0[12];
+	/* Tx Statistics Counter */
+	uint64_t teoct;		/**<Tx eth octets */
+	uint64_t toct;		/**<Tx Octets */
+	uint32_t res0210[2];
+	uint64_t txpf;		/**<Tx valid pause frame */
+	uint64_t tfrm;		/**<Tx frame counter */
+	uint64_t tfcs;		/**<Tx FCS error */
+	uint64_t tvlan;		/**<Tx Vlan Frame */
+	uint64_t terr;		/**<Tx frame error */
+	uint64_t tuca;		/**<Tx Unicast */
+	uint64_t tmca;		/**<Tx Multicast */
+	uint64_t tbca;		/**<Tx Broadcast */
+	uint32_t res0258[2];
+	uint64_t tpkt;		/**<Tx Packet */
+	uint64_t tund;		/**<Tx Undersized */
+};
+
+#endif
diff --git a/drivers/net/dpaa/dpaa_rxtx.c b/drivers/net/dpaa/dpaa_rxtx.c
new file mode 100644
index 00000000..41e57f2e
--- /dev/null
+++ b/drivers/net/dpaa/dpaa_rxtx.c
@@ -0,0 +1,756 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 Freescale Semiconductor, Inc. All rights reserved.
+ *   Copyright 2017 NXP.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of  Freescale Semiconductor, Inc nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* System headers */
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <limits.h>
+#include <sched.h>
+#include <pthread.h>
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_interrupts.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_pci.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_memory.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_alarm.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_ring.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+
+#include "dpaa_ethdev.h"
+#include "dpaa_rxtx.h"
+#include <rte_dpaa_bus.h>
+#include <dpaa_mempool.h>
+
+#include <fsl_usd.h>
+#include <fsl_qman.h>
+#include <fsl_bman.h>
+#include <of.h>
+#include <netcfg.h>
+
+#define DPAA_MBUF_TO_CONTIG_FD(_mbuf, _fd, _bpid) \
+	do { \
+		(_fd)->cmd = 0; \
+		(_fd)->opaque_addr = 0; \
+		(_fd)->opaque = QM_FD_CONTIG << DPAA_FD_FORMAT_SHIFT; \
+		(_fd)->opaque |= ((_mbuf)->data_off) << DPAA_FD_OFFSET_SHIFT; \
+		(_fd)->opaque |= (_mbuf)->pkt_len; \
+		(_fd)->addr = (_mbuf)->buf_iova; \
+		(_fd)->bpid = _bpid; \
+	} while (0)
+
+#if (defined RTE_LIBRTE_DPAA_DEBUG_DRIVER)
+void dpaa_display_frame(const struct qm_fd *fd)
+{
+	int ii;
+	char *ptr;
+
+	printf("%s::bpid %x addr %08x%08x, format %d off %d, len %d stat %x\n",
+	       __func__, fd->bpid, fd->addr_hi, fd->addr_lo, fd->format,
+		fd->offset, fd->length20, fd->status);
+
+	ptr = (char *)rte_dpaa_mem_ptov(fd->addr);
+	ptr += fd->offset;
+	printf("%02x ", *ptr);
+	for (ii = 1; ii < fd->length20; ii++) {
+		printf("%02x ", *ptr);
+		if ((ii % 16) == 0)
+			printf("\n");
+		ptr++;
+	}
+	printf("\n");
+}
+#else
+#define dpaa_display_frame(a)
+#endif
+
+static inline void dpaa_slow_parsing(struct rte_mbuf *m __rte_unused,
+				     uint64_t prs __rte_unused)
+{
+	DPAA_DP_LOG(DEBUG, "Slow parsing");
+	/*TBD:XXX: to be implemented*/
+}
+
+static inline void dpaa_eth_packet_info(struct rte_mbuf *m,
+					uint64_t fd_virt_addr)
+{
+	struct annotations_t *annot = GET_ANNOTATIONS(fd_virt_addr);
+	uint64_t prs = *((uint64_t *)(&annot->parse)) & DPAA_PARSE_MASK;
+
+	DPAA_DP_LOG(DEBUG, " Parsing mbuf: %p with annotations: %p", m, annot);
+
+	switch (prs) {
+	case DPAA_PKT_TYPE_NONE:
+		m->packet_type = 0;
+		break;
+	case DPAA_PKT_TYPE_ETHER:
+		m->packet_type = RTE_PTYPE_L2_ETHER;
+		break;
+	case DPAA_PKT_TYPE_IPV4:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV4;
+		break;
+	case DPAA_PKT_TYPE_IPV6:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV6;
+		break;
+	case DPAA_PKT_TYPE_IPV4_FRAG:
+	case DPAA_PKT_TYPE_IPV4_FRAG_UDP:
+	case DPAA_PKT_TYPE_IPV4_FRAG_TCP:
+	case DPAA_PKT_TYPE_IPV4_FRAG_SCTP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_FRAG;
+		break;
+	case DPAA_PKT_TYPE_IPV6_FRAG:
+	case DPAA_PKT_TYPE_IPV6_FRAG_UDP:
+	case DPAA_PKT_TYPE_IPV6_FRAG_TCP:
+	case DPAA_PKT_TYPE_IPV6_FRAG_SCTP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_FRAG;
+		break;
+	case DPAA_PKT_TYPE_IPV4_EXT:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV4_EXT;
+		break;
+	case DPAA_PKT_TYPE_IPV6_EXT:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV6_EXT;
+		break;
+	case DPAA_PKT_TYPE_IPV4_TCP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP;
+		break;
+	case DPAA_PKT_TYPE_IPV6_TCP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP;
+		break;
+	case DPAA_PKT_TYPE_IPV4_UDP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP;
+		break;
+	case DPAA_PKT_TYPE_IPV6_UDP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP;
+		break;
+	case DPAA_PKT_TYPE_IPV4_EXT_UDP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP;
+		break;
+	case DPAA_PKT_TYPE_IPV6_EXT_UDP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP;
+		break;
+	case DPAA_PKT_TYPE_IPV4_EXT_TCP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP;
+		break;
+	case DPAA_PKT_TYPE_IPV6_EXT_TCP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP;
+		break;
+	case DPAA_PKT_TYPE_IPV4_SCTP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP;
+		break;
+	case DPAA_PKT_TYPE_IPV6_SCTP:
+		m->packet_type = RTE_PTYPE_L2_ETHER |
+			RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP;
+		break;
+	/* More switch cases can be added */
+	default:
+		dpaa_slow_parsing(m, prs);
+	}
+
+	m->tx_offload = annot->parse.ip_off[0];
+	m->tx_offload |= (annot->parse.l4_off - annot->parse.ip_off[0])
+					<< DPAA_PKT_L3_LEN_SHIFT;
+
+	/* Set the hash values */
+	m->hash.rss = (uint32_t)(rte_be_to_cpu_64(annot->hash));
+	m->ol_flags = PKT_RX_RSS_HASH;
+	/* All packets with Bad checksum are dropped by interface (and
+	 * corresponding notification issued to RX error queues).
+	 */
+	m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+
+	/* Check if Vlan is present */
+	if (prs & DPAA_PARSE_VLAN_MASK)
+		m->ol_flags |= PKT_RX_VLAN;
+	/* Packet received without stripping the vlan */
+}
+
+static inline void dpaa_checksum(struct rte_mbuf *mbuf)
+{
+	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+	char *l3_hdr = (char *)eth_hdr + mbuf->l2_len;
+	struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+	struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
+
+	DPAA_DP_LOG(DEBUG, "Calculating checksum for mbuf: %p", mbuf);
+
+	if (((mbuf->packet_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4) ||
+	    ((mbuf->packet_type & RTE_PTYPE_L3_MASK) ==
+	    RTE_PTYPE_L3_IPV4_EXT)) {
+		ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+		ipv4_hdr->hdr_checksum = 0;
+		ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
+	} else if (((mbuf->packet_type & RTE_PTYPE_L3_MASK) ==
+		   RTE_PTYPE_L3_IPV6) ||
+		   ((mbuf->packet_type & RTE_PTYPE_L3_MASK) ==
+		   RTE_PTYPE_L3_IPV6_EXT))
+		ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
+
+	if ((mbuf->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP) {
+		struct tcp_hdr *tcp_hdr = (struct tcp_hdr *)(l3_hdr +
+					  mbuf->l3_len);
+		tcp_hdr->cksum = 0;
+		if (eth_hdr->ether_type == htons(ETHER_TYPE_IPv4))
+			tcp_hdr->cksum = rte_ipv4_udptcp_cksum(ipv4_hdr,
+							       tcp_hdr);
+		else /* assume ethertype == ETHER_TYPE_IPv6 */
+			tcp_hdr->cksum = rte_ipv6_udptcp_cksum(ipv6_hdr,
+							       tcp_hdr);
+	} else if ((mbuf->packet_type & RTE_PTYPE_L4_MASK) ==
+		   RTE_PTYPE_L4_UDP) {
+		struct udp_hdr *udp_hdr = (struct udp_hdr *)(l3_hdr +
+							     mbuf->l3_len);
+		udp_hdr->dgram_cksum = 0;
+		if (eth_hdr->ether_type == htons(ETHER_TYPE_IPv4))
+			udp_hdr->dgram_cksum = rte_ipv4_udptcp_cksum(ipv4_hdr,
+								     udp_hdr);
+		else /* assume ethertype == ETHER_TYPE_IPv6 */
+			udp_hdr->dgram_cksum = rte_ipv6_udptcp_cksum(ipv6_hdr,
+								     udp_hdr);
+	}
+}
+
+static inline void dpaa_checksum_offload(struct rte_mbuf *mbuf,
+					 struct qm_fd *fd, char *prs_buf)
+{
+	struct dpaa_eth_parse_results_t *prs;
+
+	DPAA_DP_LOG(DEBUG, " Offloading checksum for mbuf: %p", mbuf);
+
+	prs = GET_TX_PRS(prs_buf);
+	prs->l3r = 0;
+	prs->l4r = 0;
+	if (((mbuf->packet_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4) ||
+	   ((mbuf->packet_type & RTE_PTYPE_L3_MASK) ==
+	   RTE_PTYPE_L3_IPV4_EXT))
+		prs->l3r = DPAA_L3_PARSE_RESULT_IPV4;
+	else if (((mbuf->packet_type & RTE_PTYPE_L3_MASK) ==
+		   RTE_PTYPE_L3_IPV6) ||
+		 ((mbuf->packet_type & RTE_PTYPE_L3_MASK) ==
+		RTE_PTYPE_L3_IPV6_EXT))
+		prs->l3r = DPAA_L3_PARSE_RESULT_IPV6;
+
+	if ((mbuf->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP)
+		prs->l4r = DPAA_L4_PARSE_RESULT_TCP;
+	else if ((mbuf->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP)
+		prs->l4r = DPAA_L4_PARSE_RESULT_UDP;
+
+	prs->ip_off[0] = mbuf->l2_len;
+	prs->l4_off = mbuf->l3_len + mbuf->l2_len;
+	/* Enable L3 (and L4, if TCP or UDP) HW checksum*/
+	fd->cmd = DPAA_FD_CMD_RPD | DPAA_FD_CMD_DTC;
+}
+
+struct rte_mbuf *
+dpaa_eth_sg_to_mbuf(struct qm_fd *fd, uint32_t ifid)
+{
+	struct dpaa_bp_info *bp_info = DPAA_BPID_TO_POOL_INFO(fd->bpid);
+	struct rte_mbuf *first_seg, *prev_seg, *cur_seg, *temp;
+	struct qm_sg_entry *sgt, *sg_temp;
+	void *vaddr, *sg_vaddr;
+	int i = 0;
+	uint8_t fd_offset = fd->offset;
+
+	DPAA_DP_LOG(DEBUG, "Received an SG frame");
+
+	vaddr = rte_dpaa_mem_ptov(qm_fd_addr(fd));
+	if (!vaddr) {
+		DPAA_PMD_ERR("unable to convert physical address");
+		return NULL;
+	}
+	sgt = vaddr + fd_offset;
+	sg_temp = &sgt[i++];
+	hw_sg_to_cpu(sg_temp);
+	temp = (struct rte_mbuf *)((char *)vaddr - bp_info->meta_data_size);
+	sg_vaddr = rte_dpaa_mem_ptov(qm_sg_entry_get64(sg_temp));
+
+	first_seg = (struct rte_mbuf *)((char *)sg_vaddr -
+						bp_info->meta_data_size);
+	first_seg->data_off = sg_temp->offset;
+	first_seg->data_len = sg_temp->length;
+	first_seg->pkt_len = sg_temp->length;
+	rte_mbuf_refcnt_set(first_seg, 1);
+
+	first_seg->port = ifid;
+	first_seg->nb_segs = 1;
+	first_seg->ol_flags = 0;
+	prev_seg = first_seg;
+	while (i < DPAA_SGT_MAX_ENTRIES) {
+		sg_temp = &sgt[i++];
+		hw_sg_to_cpu(sg_temp);
+		sg_vaddr = rte_dpaa_mem_ptov(qm_sg_entry_get64(sg_temp));
+		cur_seg = (struct rte_mbuf *)((char *)sg_vaddr -
+						      bp_info->meta_data_size);
+		cur_seg->data_off = sg_temp->offset;
+		cur_seg->data_len = sg_temp->length;
+		first_seg->pkt_len += sg_temp->length;
+		first_seg->nb_segs += 1;
+		rte_mbuf_refcnt_set(cur_seg, 1);
+		prev_seg->next = cur_seg;
+		if (sg_temp->final) {
+			cur_seg->next = NULL;
+			break;
+		}
+		prev_seg = cur_seg;
+	}
+
+	dpaa_eth_packet_info(first_seg, (uint64_t)vaddr);
+	rte_pktmbuf_free_seg(temp);
+
+	return first_seg;
+}
+
+static inline struct rte_mbuf *dpaa_eth_fd_to_mbuf(struct qm_fd *fd,
+							uint32_t ifid)
+{
+	struct dpaa_bp_info *bp_info = DPAA_BPID_TO_POOL_INFO(fd->bpid);
+	struct rte_mbuf *mbuf;
+	void *ptr;
+	uint8_t format =
+		(fd->opaque & DPAA_FD_FORMAT_MASK) >> DPAA_FD_FORMAT_SHIFT;
+	uint16_t offset =
+		(fd->opaque & DPAA_FD_OFFSET_MASK) >> DPAA_FD_OFFSET_SHIFT;
+	uint32_t length = fd->opaque & DPAA_FD_LENGTH_MASK;
+
+	DPAA_DP_LOG(DEBUG, " FD--->MBUF");
+
+	if (unlikely(format == qm_fd_sg))
+		return dpaa_eth_sg_to_mbuf(fd, ifid);
+
+	/* Ignoring case when format != qm_fd_contig */
+	dpaa_display_frame(fd);
+	ptr = rte_dpaa_mem_ptov(fd->addr);
+	/* Ignoring case when ptr would be NULL. That is only possible incase
+	 * of a corrupted packet
+	 */
+
+	mbuf = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
+	/* Prefetch the Parse results and packet data to L1 */
+	rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
+	rte_prefetch0((void *)((uint8_t *)ptr + offset));
+
+	mbuf->data_off = offset;
+	mbuf->data_len = length;
+	mbuf->pkt_len = length;
+
+	mbuf->port = ifid;
+	mbuf->nb_segs = 1;
+	mbuf->ol_flags = 0;
+	mbuf->next = NULL;
+	rte_mbuf_refcnt_set(mbuf, 1);
+	dpaa_eth_packet_info(mbuf, (uint64_t)mbuf->buf_addr);
+
+	return mbuf;
+}
+
+uint16_t dpaa_eth_queue_rx(void *q,
+			   struct rte_mbuf **bufs,
+			   uint16_t nb_bufs)
+{
+	struct qman_fq *fq = q;
+	struct qm_dqrr_entry *dq;
+	uint32_t num_rx = 0, ifid = ((struct dpaa_if *)fq->dpaa_intf)->ifid;
+	int ret;
+
+	ret = rte_dpaa_portal_init((void *)0);
+	if (ret) {
+		DPAA_PMD_ERR("Failure in affining portal");
+		return 0;
+	}
+
+	ret = qman_set_vdq(fq, (nb_bufs > DPAA_MAX_DEQUEUE_NUM_FRAMES) ?
+				DPAA_MAX_DEQUEUE_NUM_FRAMES : nb_bufs);
+	if (ret)
+		return 0;
+
+	do {
+		dq = qman_dequeue(fq);
+		if (!dq)
+			continue;
+		bufs[num_rx++] = dpaa_eth_fd_to_mbuf(&dq->fd, ifid);
+		qman_dqrr_consume(fq, dq);
+	} while (fq->flags & QMAN_FQ_STATE_VDQCR);
+
+	return num_rx;
+}
+
+static void *dpaa_get_pktbuf(struct dpaa_bp_info *bp_info)
+{
+	int ret;
+	uint64_t buf = 0;
+	struct bm_buffer bufs;
+
+	ret = bman_acquire(bp_info->bp, &bufs, 1, 0);
+	if (ret <= 0) {
+		DPAA_PMD_WARN("Failed to allocate buffers %d", ret);
+		return (void *)buf;
+	}
+
+	DPAA_DP_LOG(DEBUG, "got buffer 0x%lx from pool %d",
+		    (uint64_t)bufs.addr, bufs.bpid);
+
+	buf = (uint64_t)rte_dpaa_mem_ptov(bufs.addr) - bp_info->meta_data_size;
+	if (!buf)
+		goto out;
+
+out:
+	return (void *)buf;
+}
+
+static struct rte_mbuf *dpaa_get_dmable_mbuf(struct rte_mbuf *mbuf,
+					     struct dpaa_if *dpaa_intf)
+{
+	struct rte_mbuf *dpaa_mbuf;
+
+	/* allocate pktbuffer on bpid for dpaa port */
+	dpaa_mbuf = dpaa_get_pktbuf(dpaa_intf->bp_info);
+	if (!dpaa_mbuf)
+		return NULL;
+
+	memcpy((uint8_t *)(dpaa_mbuf->buf_addr) + mbuf->data_off, (void *)
+		((uint8_t *)(mbuf->buf_addr) + mbuf->data_off), mbuf->pkt_len);
+
+	/* Copy only the required fields */
+	dpaa_mbuf->data_off = mbuf->data_off;
+	dpaa_mbuf->pkt_len = mbuf->pkt_len;
+	dpaa_mbuf->ol_flags = mbuf->ol_flags;
+	dpaa_mbuf->packet_type = mbuf->packet_type;
+	dpaa_mbuf->tx_offload = mbuf->tx_offload;
+	rte_pktmbuf_free(mbuf);
+	return dpaa_mbuf;
+}
+
+int
+dpaa_eth_mbuf_to_sg_fd(struct rte_mbuf *mbuf,
+		struct qm_fd *fd,
+		uint32_t bpid)
+{
+	struct rte_mbuf *cur_seg = mbuf, *prev_seg = NULL;
+	struct dpaa_bp_info *bp_info = DPAA_BPID_TO_POOL_INFO(bpid);
+	struct rte_mbuf *temp, *mi;
+	struct qm_sg_entry *sg_temp, *sgt;
+	int i = 0;
+
+	DPAA_DP_LOG(DEBUG, "Creating SG FD to transmit");
+
+	temp = rte_pktmbuf_alloc(bp_info->mp);
+	if (!temp) {
+		DPAA_PMD_ERR("Failure in allocation of mbuf");
+		return -1;
+	}
+	if (temp->buf_len < ((mbuf->nb_segs * sizeof(struct qm_sg_entry))
+				+ temp->data_off)) {
+		DPAA_PMD_ERR("Insufficient space in mbuf for SG entries");
+		return -1;
+	}
+
+	fd->cmd = 0;
+	fd->opaque_addr = 0;
+
+	if (mbuf->ol_flags & DPAA_TX_CKSUM_OFFLOAD_MASK) {
+		if (temp->data_off < DEFAULT_TX_ICEOF
+			+ sizeof(struct dpaa_eth_parse_results_t))
+			temp->data_off = DEFAULT_TX_ICEOF
+				+ sizeof(struct dpaa_eth_parse_results_t);
+		dcbz_64(temp->buf_addr);
+		dpaa_checksum_offload(mbuf, fd, temp->buf_addr);
+	}
+
+	sgt = temp->buf_addr + temp->data_off;
+	fd->format = QM_FD_SG;
+	fd->addr = temp->buf_iova;
+	fd->offset = temp->data_off;
+	fd->bpid = bpid;
+	fd->length20 = mbuf->pkt_len;
+
+	while (i < DPAA_SGT_MAX_ENTRIES) {
+		sg_temp = &sgt[i++];
+		sg_temp->opaque = 0;
+		sg_temp->val = 0;
+		sg_temp->addr = cur_seg->buf_iova;
+		sg_temp->offset = cur_seg->data_off;
+		sg_temp->length = cur_seg->data_len;
+		if (RTE_MBUF_DIRECT(cur_seg)) {
+			if (rte_mbuf_refcnt_read(cur_seg) > 1) {
+				/*If refcnt > 1, invalid bpid is set to ensure
+				 * buffer is not freed by HW.
+				 */
+				sg_temp->bpid = 0xff;
+				rte_mbuf_refcnt_update(cur_seg, -1);
+			} else {
+				sg_temp->bpid =
+					DPAA_MEMPOOL_TO_BPID(cur_seg->pool);
+			}
+			cur_seg = cur_seg->next;
+		} else {
+			/* Get owner MBUF from indirect buffer */
+			mi = rte_mbuf_from_indirect(cur_seg);
+			if (rte_mbuf_refcnt_read(mi) > 1) {
+				/*If refcnt > 1, invalid bpid is set to ensure
+				 * owner buffer is not freed by HW.
+				 */
+				sg_temp->bpid = 0xff;
+			} else {
+				sg_temp->bpid = DPAA_MEMPOOL_TO_BPID(mi->pool);
+				rte_mbuf_refcnt_update(mi, 1);
+			}
+			prev_seg = cur_seg;
+			cur_seg = cur_seg->next;
+			prev_seg->next = NULL;
+			rte_pktmbuf_free(prev_seg);
+		}
+		if (cur_seg == NULL) {
+			sg_temp->final = 1;
+			cpu_to_hw_sg(sg_temp);
+			break;
+		}
+		cpu_to_hw_sg(sg_temp);
+	}
+	return 0;
+}
+
+/* Handle mbufs which are not segmented (non SG) */
+static inline void
+tx_on_dpaa_pool_unsegmented(struct rte_mbuf *mbuf,
+			    struct dpaa_bp_info *bp_info,
+			    struct qm_fd *fd_arr)
+{
+	struct rte_mbuf *mi = NULL;
+
+	if (RTE_MBUF_DIRECT(mbuf)) {
+		if (rte_mbuf_refcnt_read(mbuf) > 1) {
+			/* In case of direct mbuf and mbuf being cloned,
+			 * BMAN should _not_ release buffer.
+			 */
+			DPAA_MBUF_TO_CONTIG_FD(mbuf, fd_arr, 0xff);
+			/* Buffer should be releasd by EAL */
+			rte_mbuf_refcnt_update(mbuf, -1);
+		} else {
+			/* In case of direct mbuf and no cloning, mbuf can be
+			 * released by BMAN.
+			 */
+			DPAA_MBUF_TO_CONTIG_FD(mbuf, fd_arr, bp_info->bpid);
+		}
+	} else {
+		/* This is data-containing core mbuf: 'mi' */
+		mi = rte_mbuf_from_indirect(mbuf);
+		if (rte_mbuf_refcnt_read(mi) > 1) {
+			/* In case of indirect mbuf, and mbuf being cloned,
+			 * BMAN should _not_ release it and let EAL release
+			 * it through pktmbuf_free below.
+			 */
+			DPAA_MBUF_TO_CONTIG_FD(mbuf, fd_arr, 0xff);
+		} else {
+			/* In case of indirect mbuf, and no cloning, core mbuf
+			 * should be released by BMAN.
+			 * Increate refcnt of core mbuf so that when
+			 * pktmbuf_free is called and mbuf is released, EAL
+			 * doesn't try to release core mbuf which would have
+			 * been released by BMAN.
+			 */
+			rte_mbuf_refcnt_update(mi, 1);
+			DPAA_MBUF_TO_CONTIG_FD(mbuf, fd_arr, bp_info->bpid);
+		}
+		rte_pktmbuf_free(mbuf);
+	}
+
+	if (mbuf->ol_flags & DPAA_TX_CKSUM_OFFLOAD_MASK) {
+		if (mbuf->data_off < (DEFAULT_TX_ICEOF +
+		    sizeof(struct dpaa_eth_parse_results_t))) {
+			DPAA_DP_LOG(DEBUG, "Checksum offload Err: "
+				"Not enough Headroom "
+				"space for correct Checksum offload."
+				"So Calculating checksum in Software.");
+			dpaa_checksum(mbuf);
+		} else {
+			dpaa_checksum_offload(mbuf, fd_arr, mbuf->buf_addr);
+		}
+	}
+}
+
+/* Handle all mbufs on dpaa BMAN managed pool */
+static inline uint16_t
+tx_on_dpaa_pool(struct rte_mbuf *mbuf,
+		struct dpaa_bp_info *bp_info,
+		struct qm_fd *fd_arr)
+{
+	DPAA_DP_LOG(DEBUG, "BMAN offloaded buffer, mbuf: %p", mbuf);
+
+	if (mbuf->nb_segs == 1) {
+		/* Case for non-segmented buffers */
+		tx_on_dpaa_pool_unsegmented(mbuf, bp_info, fd_arr);
+	} else if (mbuf->nb_segs > 1 &&
+		   mbuf->nb_segs <= DPAA_SGT_MAX_ENTRIES) {
+		if (dpaa_eth_mbuf_to_sg_fd(mbuf, fd_arr, bp_info->bpid)) {
+			DPAA_PMD_DEBUG("Unable to create Scatter Gather FD");
+			return 1;
+		}
+	} else {
+		DPAA_PMD_DEBUG("Number of Segments not supported");
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Handle all mbufs on an external pool (non-dpaa) */
+static inline uint16_t
+tx_on_external_pool(struct qman_fq *txq, struct rte_mbuf *mbuf,
+		    struct qm_fd *fd_arr)
+{
+	struct dpaa_if *dpaa_intf = txq->dpaa_intf;
+	struct rte_mbuf *dmable_mbuf;
+
+	DPAA_DP_LOG(DEBUG, "Non-BMAN offloaded buffer."
+		    "Allocating an offloaded buffer");
+	dmable_mbuf = dpaa_get_dmable_mbuf(mbuf, dpaa_intf);
+	if (!dmable_mbuf) {
+		DPAA_DP_LOG(DEBUG, "no dpaa buffers.");
+		return 1;
+	}
+
+	DPAA_MBUF_TO_CONTIG_FD(mbuf, fd_arr, dpaa_intf->bp_info->bpid);
+
+	return 0;
+}
+
+uint16_t
+dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
+{
+	struct rte_mbuf *mbuf, *mi = NULL;
+	struct rte_mempool *mp;
+	struct dpaa_bp_info *bp_info;
+	struct qm_fd fd_arr[MAX_TX_RING_SLOTS];
+	uint32_t frames_to_send, loop, i = 0;
+	uint16_t state;
+	int ret;
+
+	ret = rte_dpaa_portal_init((void *)0);
+	if (ret) {
+		DPAA_PMD_ERR("Failure in affining portal");
+		return 0;
+	}
+
+	DPAA_DP_LOG(DEBUG, "Transmitting %d buffers on queue: %p", nb_bufs, q);
+
+	while (nb_bufs) {
+		frames_to_send = (nb_bufs >> 3) ? MAX_TX_RING_SLOTS : nb_bufs;
+		for (loop = 0; loop < frames_to_send; loop++, i++) {
+			mbuf = bufs[i];
+			if (RTE_MBUF_DIRECT(mbuf)) {
+				mp = mbuf->pool;
+			} else {
+				mi = rte_mbuf_from_indirect(mbuf);
+				mp = mi->pool;
+			}
+
+			bp_info = DPAA_MEMPOOL_TO_POOL_INFO(mp);
+			if (likely(mp->ops_index == bp_info->dpaa_ops_index)) {
+				state = tx_on_dpaa_pool(mbuf, bp_info,
+							&fd_arr[loop]);
+				if (unlikely(state)) {
+					/* Set frames_to_send & nb_bufs so
+					 * that packets are transmitted till
+					 * previous frame.
+					 */
+					frames_to_send = loop;
+					nb_bufs = loop;
+					goto send_pkts;
+				}
+			} else {
+				state = tx_on_external_pool(q, mbuf,
+							    &fd_arr[loop]);
+				if (unlikely(state)) {
+					/* Set frames_to_send & nb_bufs so
+					 * that packets are transmitted till
+					 * previous frame.
+					 */
+					frames_to_send = loop;
+					nb_bufs = loop;
+					goto send_pkts;
+				}
+			}
+		}
+
+send_pkts:
+		loop = 0;
+		while (loop < frames_to_send) {
+			loop += qman_enqueue_multi(q, &fd_arr[loop],
+					frames_to_send - loop);
+		}
+		nb_bufs -= frames_to_send;
+	}
+
+	DPAA_DP_LOG(DEBUG, "Transmitted %d buffers on queue: %p", i, q);
+
+	return i;
+}
+
+uint16_t dpaa_eth_tx_drop_all(void *q  __rte_unused,
+			      struct rte_mbuf **bufs __rte_unused,
+		uint16_t nb_bufs __rte_unused)
+{
+	DPAA_DP_LOG(DEBUG, "Drop all packets");
+
+	/* Drop all incoming packets. No need to free packets here
+	 * because the rte_eth f/w frees up the packets through tx_buffer
+	 * callback in case this functions returns count less than nb_bufs
+	 */
+	return 0;
+}
diff --git a/drivers/net/dpaa/dpaa_rxtx.h b/drivers/net/dpaa/dpaa_rxtx.h
new file mode 100644
index 00000000..2ffc4ffe
--- /dev/null
+++ b/drivers/net/dpaa/dpaa_rxtx.h
@@ -0,0 +1,297 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 Freescale Semiconductor, Inc. All rights reserved.
+ *   Copyright 2017 NXP.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of  Freescale Semiconductor, Inc nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DPDK_RXTX_H__
+#define __DPDK_RXTX_H__
+
+/* internal offset from where IC is copied to packet buffer*/
+#define DEFAULT_ICIOF          32
+/* IC transfer size */
+#define DEFAULT_ICSZ	48
+
+/* IC offsets from buffer header address */
+#define DEFAULT_RX_ICEOF	16
+#define DEFAULT_TX_ICEOF	16
+
+/*
+ * Values for the L3R field of the FM Parse Results
+ */
+/* L3 Type field: First IP Present IPv4 */
+#define DPAA_L3_PARSE_RESULT_IPV4 0x80
+/* L3 Type field: First IP Present IPv6 */
+#define DPAA_L3_PARSE_RESULT_IPV6	0x40
+/* Values for the L4R field of the FM Parse Results
+ * See $8.8.4.7.20 - L4 HXS - L4 Results from DPAA-Rev2 Reference Manual.
+ */
+/* L4 Type field: UDP */
+#define DPAA_L4_PARSE_RESULT_UDP	0x40
+/* L4 Type field: TCP */
+#define DPAA_L4_PARSE_RESULT_TCP	0x20
+
+#define DPAA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */
+
+#define DPAA_MAX_DEQUEUE_NUM_FRAMES    63
+	/** <Maximum number of frames to be dequeued in a single rx call*/
+
+/* FD structure masks and offset */
+#define DPAA_FD_FORMAT_MASK 0xE0000000
+#define DPAA_FD_OFFSET_MASK 0x1FF00000
+#define DPAA_FD_LENGTH_MASK 0xFFFFF
+#define DPAA_FD_FORMAT_SHIFT 29
+#define DPAA_FD_OFFSET_SHIFT 20
+
+/* Parsing mask (Little Endian) - 0x00E044ED00800000
+ *	Classification Plan ID 0x00
+ *	L4R 0xE0 -
+ *		0x20 - TCP
+ *		0x40 - UDP
+ *		0x80 - SCTP
+ *	L3R 0xEDC4 (in Big Endian) -
+ *		0x8000 - IPv4
+ *		0x4000 - IPv6
+ *		0x8140 - IPv4 Ext + Frag
+ *		0x8040 - IPv4 Frag
+ *		0x8100 - IPv4 Ext
+ *		0x4140 - IPv6 Ext + Frag
+ *		0x4040 - IPv6 Frag
+ *		0x4100 - IPv6 Ext
+ *	L2R 0x8000 (in Big Endian) -
+ *		0x8000 - Ethernet type
+ *	ShimR & Logical Port ID 0x0000
+ */
+#define DPAA_PARSE_MASK			0x00E044ED00800000
+#define DPAA_PARSE_VLAN_MASK		0x0000000000700000
+
+/* Parsed values (Little Endian) */
+#define DPAA_PKT_TYPE_NONE		0x0000000000000000
+#define DPAA_PKT_TYPE_ETHER		0x0000000000800000
+#define DPAA_PKT_TYPE_IPV4 \
+			(0x0000008000000000 | DPAA_PKT_TYPE_ETHER)
+#define DPAA_PKT_TYPE_IPV6 \
+			(0x0000004000000000 | DPAA_PKT_TYPE_ETHER)
+#define DPAA_PKT_TYPE_GRE \
+			(0x0000002000000000 | DPAA_PKT_TYPE_ETHER)
+#define DPAA_PKT_TYPE_IPV4_FRAG	\
+			(0x0000400000000000 | DPAA_PKT_TYPE_IPV4)
+#define DPAA_PKT_TYPE_IPV6_FRAG	\
+			(0x0000400000000000 | DPAA_PKT_TYPE_IPV6)
+#define DPAA_PKT_TYPE_IPV4_EXT \
+			(0x0000000100000000 | DPAA_PKT_TYPE_IPV4)
+#define DPAA_PKT_TYPE_IPV6_EXT \
+			(0x0000000100000000 | DPAA_PKT_TYPE_IPV6)
+#define DPAA_PKT_TYPE_IPV4_TCP \
+			(0x0020000000000000 | DPAA_PKT_TYPE_IPV4)
+#define DPAA_PKT_TYPE_IPV6_TCP \
+			(0x0020000000000000 | DPAA_PKT_TYPE_IPV6)
+#define DPAA_PKT_TYPE_IPV4_UDP \
+			(0x0040000000000000 | DPAA_PKT_TYPE_IPV4)
+#define DPAA_PKT_TYPE_IPV6_UDP \
+			(0x0040000000000000 | DPAA_PKT_TYPE_IPV6)
+#define DPAA_PKT_TYPE_IPV4_SCTP	\
+			(0x0080000000000000 | DPAA_PKT_TYPE_IPV4)
+#define DPAA_PKT_TYPE_IPV6_SCTP	\
+			(0x0080000000000000 | DPAA_PKT_TYPE_IPV6)
+#define DPAA_PKT_TYPE_IPV4_FRAG_TCP \
+			(0x0020000000000000 | DPAA_PKT_TYPE_IPV4_FRAG)
+#define DPAA_PKT_TYPE_IPV6_FRAG_TCP \
+			(0x0020000000000000 | DPAA_PKT_TYPE_IPV6_FRAG)
+#define DPAA_PKT_TYPE_IPV4_FRAG_UDP \
+			(0x0040000000000000 | DPAA_PKT_TYPE_IPV4_FRAG)
+#define DPAA_PKT_TYPE_IPV6_FRAG_UDP \
+			(0x0040000000000000 | DPAA_PKT_TYPE_IPV6_FRAG)
+#define DPAA_PKT_TYPE_IPV4_FRAG_SCTP \
+			(0x0080000000000000 | DPAA_PKT_TYPE_IPV4_FRAG)
+#define DPAA_PKT_TYPE_IPV6_FRAG_SCTP \
+			(0x0080000000000000 | DPAA_PKT_TYPE_IPV6_FRAG)
+#define DPAA_PKT_TYPE_IPV4_EXT_UDP \
+			(0x0040000000000000 | DPAA_PKT_TYPE_IPV4_EXT)
+#define DPAA_PKT_TYPE_IPV6_EXT_UDP \
+			(0x0040000000000000 | DPAA_PKT_TYPE_IPV6_EXT)
+#define DPAA_PKT_TYPE_IPV4_EXT_TCP \
+			(0x0020000000000000 | DPAA_PKT_TYPE_IPV4_EXT)
+#define DPAA_PKT_TYPE_IPV6_EXT_TCP \
+			(0x0020000000000000 | DPAA_PKT_TYPE_IPV6_EXT)
+#define DPAA_PKT_TYPE_TUNNEL_4_4 \
+			(0x0000000800000000 | DPAA_PKT_TYPE_IPV4)
+#define DPAA_PKT_TYPE_TUNNEL_6_6 \
+			(0x0000000400000000 | DPAA_PKT_TYPE_IPV6)
+#define DPAA_PKT_TYPE_TUNNEL_4_6 \
+			(0x0000000400000000 | DPAA_PKT_TYPE_IPV4)
+#define DPAA_PKT_TYPE_TUNNEL_6_4 \
+			(0x0000000800000000 | DPAA_PKT_TYPE_IPV6)
+#define DPAA_PKT_TYPE_TUNNEL_4_4_UDP \
+			(0x0040000000000000 | DPAA_PKT_TYPE_TUNNEL_4_4)
+#define DPAA_PKT_TYPE_TUNNEL_6_6_UDP \
+			(0x0040000000000000 | DPAA_PKT_TYPE_TUNNEL_6_6)
+#define DPAA_PKT_TYPE_TUNNEL_4_6_UDP \
+			(0x0040000000000000 | DPAA_PKT_TYPE_TUNNEL_4_6)
+#define DPAA_PKT_TYPE_TUNNEL_6_4_UDP \
+			(0x0040000000000000 | DPAA_PKT_TYPE_TUNNEL_6_4)
+#define DPAA_PKT_TYPE_TUNNEL_4_4_TCP \
+			(0x0020000000000000 | DPAA_PKT_TYPE_TUNNEL_4_4)
+#define DPAA_PKT_TYPE_TUNNEL_6_6_TCP \
+			(0x0020000000000000 | DPAA_PKT_TYPE_TUNNEL_6_6)
+#define DPAA_PKT_TYPE_TUNNEL_4_6_TCP \
+			(0x0020000000000000 | DPAA_PKT_TYPE_TUNNEL_4_6)
+#define DPAA_PKT_TYPE_TUNNEL_6_4_TCP \
+			(0x0020000000000000 | DPAA_PKT_TYPE_TUNNEL_6_4)
+#define DPAA_PKT_L3_LEN_SHIFT	7
+
+/**
+ * FMan parse result array
+ */
+struct dpaa_eth_parse_results_t {
+	 uint8_t     lpid;		 /**< Logical port id */
+	 uint8_t     shimr;		 /**< Shim header result  */
+	 union {
+		uint16_t              l2r;	/**< Layer 2 result */
+		struct {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+			uint16_t      ethernet:1;
+			uint16_t      vlan:1;
+			uint16_t      llc_snap:1;
+			uint16_t      mpls:1;
+			uint16_t      ppoe_ppp:1;
+			uint16_t      unused_1:3;
+			uint16_t      unknown_eth_proto:1;
+			uint16_t      eth_frame_type:2;
+			uint16_t      l2r_err:5;
+			/*00-unicast, 01-multicast, 11-broadcast*/
+#else
+			uint16_t      l2r_err:5;
+			uint16_t      eth_frame_type:2;
+			uint16_t      unknown_eth_proto:1;
+			uint16_t      unused_1:3;
+			uint16_t      ppoe_ppp:1;
+			uint16_t      mpls:1;
+			uint16_t      llc_snap:1;
+			uint16_t      vlan:1;
+			uint16_t      ethernet:1;
+#endif
+		} __attribute__((__packed__));
+	 } __attribute__((__packed__));
+	 union {
+		uint16_t              l3r;	/**< Layer 3 result */
+		struct {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+			uint16_t      first_ipv4:1;
+			uint16_t      first_ipv6:1;
+			uint16_t      gre:1;
+			uint16_t      min_enc:1;
+			uint16_t      last_ipv4:1;
+			uint16_t      last_ipv6:1;
+			uint16_t      first_info_err:1;/*0 info, 1 error*/
+			uint16_t      first_ip_err_code:5;
+			uint16_t      last_info_err:1;	/*0 info, 1 error*/
+			uint16_t      last_ip_err_code:3;
+#else
+			uint16_t      last_ip_err_code:3;
+			uint16_t      last_info_err:1;	/*0 info, 1 error*/
+			uint16_t      first_ip_err_code:5;
+			uint16_t      first_info_err:1;/*0 info, 1 error*/
+			uint16_t      last_ipv6:1;
+			uint16_t      last_ipv4:1;
+			uint16_t      min_enc:1;
+			uint16_t      gre:1;
+			uint16_t      first_ipv6:1;
+			uint16_t      first_ipv4:1;
+#endif
+		} __attribute__((__packed__));
+	 } __attribute__((__packed__));
+	 union {
+		uint8_t               l4r;	/**< Layer 4 result */
+		struct{
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+			uint8_t	       l4_type:3;
+			uint8_t	       l4_info_err:1;
+			uint8_t	       l4_result:4;
+					/* if type IPSec: 1 ESP, 2 AH */
+#else
+			uint8_t        l4_result:4;
+					/* if type IPSec: 1 ESP, 2 AH */
+			uint8_t        l4_info_err:1;
+			uint8_t        l4_type:3;
+#endif
+		} __attribute__((__packed__));
+	 } __attribute__((__packed__));
+	 uint8_t     cplan;		 /**< Classification plan id */
+	 uint16_t    nxthdr;		 /**< Next Header  */
+	 uint16_t    cksum;		 /**< Checksum */
+	 uint32_t    lcv;		 /**< LCV */
+	 uint8_t     shim_off[3];	 /**< Shim offset */
+	 uint8_t     eth_off;		 /**< ETH offset */
+	 uint8_t     llc_snap_off;	 /**< LLC_SNAP offset */
+	 uint8_t     vlan_off[2];	 /**< VLAN offset */
+	 uint8_t     etype_off;		 /**< ETYPE offset */
+	 uint8_t     pppoe_off;		 /**< PPP offset */
+	 uint8_t     mpls_off[2];	 /**< MPLS offset */
+	 uint8_t     ip_off[2];		 /**< IP offset */
+	 uint8_t     gre_off;		 /**< GRE offset */
+	 uint8_t     l4_off;		 /**< Layer 4 offset */
+	 uint8_t     nxthdr_off;	 /**< Parser end point */
+} __attribute__ ((__packed__));
+
+/* The structure is the Prepended Data to the Frame which is used by FMAN */
+struct annotations_t {
+	uint8_t reserved[DEFAULT_RX_ICEOF];
+	struct dpaa_eth_parse_results_t parse;	/**< Pointer to Parsed result*/
+	uint64_t reserved1;
+	uint64_t hash;			/**< Hash Result */
+};
+
+#define GET_ANNOTATIONS(_buf) \
+	(struct annotations_t *)(_buf)
+
+#define GET_RX_PRS(_buf) \
+	(struct dpaa_eth_parse_results_t *)((uint8_t *)(_buf) + \
+	DEFAULT_RX_ICEOF)
+
+#define GET_TX_PRS(_buf) \
+	(struct dpaa_eth_parse_results_t *)((uint8_t *)(_buf) + \
+	DEFAULT_TX_ICEOF)
+
+uint16_t dpaa_eth_queue_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs);
+
+uint16_t dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs);
+
+uint16_t dpaa_eth_tx_drop_all(void *q  __rte_unused,
+			      struct rte_mbuf **bufs __rte_unused,
+			      uint16_t nb_bufs __rte_unused);
+
+struct rte_mbuf *dpaa_eth_sg_to_mbuf(struct qm_fd *fd, uint32_t ifid);
+
+int dpaa_eth_mbuf_to_sg_fd(struct rte_mbuf *mbuf,
+			   struct qm_fd *fd,
+			   uint32_t bpid);
+
+#endif
diff --git a/drivers/net/dpaa/rte_pmd_dpaa_version.map b/drivers/net/dpaa/rte_pmd_dpaa_version.map
new file mode 100644
index 00000000..a70bd197
--- /dev/null
+++ b/drivers/net/dpaa/rte_pmd_dpaa_version.map
@@ -0,0 +1,4 @@
+DPDK_17.11 {
+
+	local: *;
+};
diff --git a/drivers/net/dpaa2/Makefile b/drivers/net/dpaa2/Makefile
index 32cd819b..ee9b2cce 100644
--- a/drivers/net/dpaa2/Makefile
+++ b/drivers/net/dpaa2/Makefile
@@ -63,8 +63,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += base/dpaa2_hw_dpni.c
 SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += dpaa2_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += dpaa2_ethdev.c
 SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += mc/dpni.c
+SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += mc/dpkg.c
 
 LDLIBS += -lrte_bus_fslmc
 LDLIBS += -lrte_mempool_dpaa2
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
index 4c82aa87..e3ab90ac 100644
--- a/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
+++ b/drivers/net/dpaa2/base/dpaa2_hw_dpni.c
@@ -49,14 +49,14 @@
 
 #include "../dpaa2_ethdev.h"
 
-static void
+static int
 dpaa2_distset_to_dpkg_profile_cfg(
-		uint32_t req_dist_set,
+		uint64_t req_dist_set,
 		struct dpkg_profile_cfg *kg_cfg);
 
 int
 dpaa2_setup_flow_dist(struct rte_eth_dev *eth_dev,
-		      uint32_t req_dist_set)
+		      uint64_t req_dist_set)
 {
 	struct dpaa2_dev_priv *priv = eth_dev->data->dev_private;
 	struct fsl_mc_io *dpni = priv->hw;
@@ -68,20 +68,26 @@ dpaa2_setup_flow_dist(struct rte_eth_dev *eth_dev,
 	p_params = rte_malloc(
 		NULL, DIST_PARAM_IOVA_SIZE, RTE_CACHE_LINE_SIZE);
 	if (!p_params) {
-		RTE_LOG(ERR, PMD, "Memory unavaialble\n");
+		PMD_INIT_LOG(ERR, "Memory unavailable");
 		return -ENOMEM;
 	}
 	memset(p_params, 0, DIST_PARAM_IOVA_SIZE);
 	memset(&tc_cfg, 0, sizeof(struct dpni_rx_tc_dist_cfg));
 
-	dpaa2_distset_to_dpkg_profile_cfg(req_dist_set, &kg_cfg);
+	ret = dpaa2_distset_to_dpkg_profile_cfg(req_dist_set, &kg_cfg);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "given rss_hf (%lx) not supported",
+			     req_dist_set);
+		rte_free(p_params);
+		return ret;
+	}
 	tc_cfg.key_cfg_iova = (uint64_t)(DPAA2_VADDR_TO_IOVA(p_params));
 	tc_cfg.dist_size = eth_dev->data->nb_rx_queues;
 	tc_cfg.dist_mode = DPNI_DIST_MODE_HASH;
 
-	ret = dpni_prepare_key_cfg(&kg_cfg, p_params);
+	ret = dpkg_prepare_key_cfg(&kg_cfg, p_params);
 	if (ret) {
-		RTE_LOG(ERR, PMD, "Unable to prepare extract parameters\n");
+		PMD_INIT_LOG(ERR, "Unable to prepare extract parameters");
 		rte_free(p_params);
 		return ret;
 	}
@@ -90,9 +96,9 @@ dpaa2_setup_flow_dist(struct rte_eth_dev *eth_dev,
 				  &tc_cfg);
 	rte_free(p_params);
 	if (ret) {
-		RTE_LOG(ERR, PMD,
-			"Setting distribution for Rx failed with err: %d\n",
-			ret);
+		PMD_INIT_LOG(ERR,
+			     "Setting distribution for Rx failed with err: %d",
+			     ret);
 		return ret;
 	}
 
@@ -113,19 +119,19 @@ int dpaa2_remove_flow_dist(
 	p_params = rte_malloc(
 		NULL, DIST_PARAM_IOVA_SIZE, RTE_CACHE_LINE_SIZE);
 	if (!p_params) {
-		RTE_LOG(ERR, PMD, "Memory unavaialble\n");
+		PMD_INIT_LOG(ERR, "Memory unavailable");
 		return -ENOMEM;
 	}
 	memset(p_params, 0, DIST_PARAM_IOVA_SIZE);
 	memset(&tc_cfg, 0, sizeof(struct dpni_rx_tc_dist_cfg));
-
+	kg_cfg.num_extracts = 0;
 	tc_cfg.key_cfg_iova = (uint64_t)(DPAA2_VADDR_TO_IOVA(p_params));
 	tc_cfg.dist_size = 0;
 	tc_cfg.dist_mode = DPNI_DIST_MODE_NONE;
 
-	ret = dpni_prepare_key_cfg(&kg_cfg, p_params);
+	ret = dpkg_prepare_key_cfg(&kg_cfg, p_params);
 	if (ret) {
-		RTE_LOG(ERR, PMD, "Unable to prepare extract parameters\n");
+		PMD_INIT_LOG(ERR, "Unable to prepare extract parameters");
 		rte_free(p_params);
 		return ret;
 	}
@@ -133,18 +139,16 @@ int dpaa2_remove_flow_dist(
 	ret = dpni_set_rx_tc_dist(dpni, CMD_PRI_LOW, priv->token, tc_index,
 				  &tc_cfg);
 	rte_free(p_params);
-	if (ret) {
-		RTE_LOG(ERR, PMD,
-			"Setting distribution for Rx failed with err: %d\n",
-			ret);
-		return ret;
-	}
+	if (ret)
+		PMD_INIT_LOG(ERR,
+			     "Setting distribution for Rx failed with err:%d",
+			     ret);
 	return ret;
 }
 
-static void
+static int
 dpaa2_distset_to_dpkg_profile_cfg(
-		uint32_t req_dist_set,
+		uint64_t req_dist_set,
 		struct dpkg_profile_cfg *kg_cfg)
 {
 	uint32_t loop = 0, i = 0, dist_field = 0;
@@ -278,14 +282,17 @@ dpaa2_distset_to_dpkg_profile_cfg(
 				break;
 
 			default:
-				PMD_DRV_LOG(WARNING, "Bad flow distribution"
-					    " option %x\n", dist_field);
+				PMD_INIT_LOG(WARNING,
+					     "Unsupported flow dist option %x",
+					     dist_field);
+				return -EINVAL;
 			}
 		}
 		req_dist_set = req_dist_set >> 1;
 		loop++;
 	}
 	kg_cfg->num_extracts = i;
+	return 0;
 }
 
 int
@@ -337,6 +344,7 @@ dpaa2_attach_bp_list(struct dpaa2_dev_priv *priv,
 	bpool_cfg.pools[0].backup_pool = 0;
 	bpool_cfg.pools[0].buffer_size = RTE_ALIGN_CEIL(bp_list->buf_pool.size,
 						DPAA2_PACKET_LAYOUT_ALIGN);
+	bpool_cfg.pools[0].priority_mask = 0;
 
 	retcode = dpni_set_pools(dpni, CMD_PRI_LOW, priv->token, &bpool_cfg);
 	if (retcode != 0) {
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c
index 429b3a08..202f84f0 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.c
+++ b/drivers/net/dpaa2/dpaa2_ethdev.c
@@ -52,8 +52,32 @@
 #include <mc/fsl_dpmng.h>
 #include "dpaa2_ethdev.h"
 
+struct rte_dpaa2_xstats_name_off {
+	char name[RTE_ETH_XSTATS_NAME_SIZE];
+	uint8_t page_id; /* dpni statistics page id */
+	uint8_t stats_id; /* stats id in the given page */
+};
+
+static const struct rte_dpaa2_xstats_name_off dpaa2_xstats_strings[] = {
+	{"ingress_multicast_frames", 0, 2},
+	{"ingress_multicast_bytes", 0, 3},
+	{"ingress_broadcast_frames", 0, 4},
+	{"ingress_broadcast_bytes", 0, 5},
+	{"egress_multicast_frames", 1, 2},
+	{"egress_multicast_bytes", 1, 3},
+	{"egress_broadcast_frames", 1, 4},
+	{"egress_broadcast_bytes", 1, 5},
+	{"ingress_filtered_frames", 2, 0},
+	{"ingress_discarded_frames", 2, 1},
+	{"ingress_nobuffer_discards", 2, 2},
+	{"egress_discarded_frames", 2, 3},
+	{"egress_confirmed_frames", 2, 4},
+};
+
 static struct rte_dpaa2_driver rte_dpaa2_pmd;
 static int dpaa2_dev_uninit(struct rte_eth_dev *eth_dev);
+static int dpaa2_dev_link_update(struct rte_eth_dev *dev,
+				 int wait_to_complete);
 static int dpaa2_dev_set_link_up(struct rte_eth_dev *dev);
 static int dpaa2_dev_set_link_down(struct rte_eth_dev *dev);
 static int dpaa2_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
@@ -138,7 +162,7 @@ dpaa2_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 	return ret;
 }
 
-static void
+static int
 dpaa2_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	struct dpaa2_dev_priv *priv = dev->data->dev_private;
@@ -158,6 +182,14 @@ dpaa2_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 			RTE_LOG(ERR, PMD, "Unable to set vlan filter = %d\n",
 				ret);
 	}
+
+	if (mask & ETH_VLAN_EXTEND_MASK) {
+		if (dev->data->dev_conf.rxmode.hw_vlan_extend)
+			RTE_LOG(INFO, PMD,
+				"VLAN extend offload not supported\n");
+	}
+
+	return 0;
 }
 
 static int
@@ -304,8 +336,10 @@ fail:
 static int
 dpaa2_eth_dev_configure(struct rte_eth_dev *dev)
 {
-	struct rte_eth_dev_data *data = dev->data;
-	struct rte_eth_conf *eth_conf = &data->dev_conf;
+	struct dpaa2_dev_priv *priv = dev->data->dev_private;
+	struct fsl_mc_io *dpni = priv->hw;
+	struct rte_eth_conf *eth_conf = &dev->data->dev_conf;
+	int rx_ip_csum_offload = false;
 	int ret;
 
 	PMD_INIT_FUNC_TRACE();
@@ -324,18 +358,7 @@ dpaa2_eth_dev_configure(struct rte_eth_dev *dev)
 		}
 	}
 
-	/* Check for correct configuration */
-	if (eth_conf->rxmode.mq_mode != ETH_MQ_RX_RSS &&
-	    data->nb_rx_queues > 1) {
-		PMD_INIT_LOG(ERR, "Distribution is not enabled, "
-			    "but Rx queues more than 1\n");
-		return -1;
-	}
-
 	if (eth_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) {
-		/* Return in case number of Rx queues is 1 */
-		if (data->nb_rx_queues == 1)
-			return 0;
 		ret = dpaa2_setup_flow_dist(dev,
 				eth_conf->rx_adv_conf.rss_conf.rss_hf);
 		if (ret) {
@@ -344,6 +367,41 @@ dpaa2_eth_dev_configure(struct rte_eth_dev *dev)
 			return ret;
 		}
 	}
+
+	if (eth_conf->rxmode.hw_ip_checksum)
+		rx_ip_csum_offload = true;
+
+	ret = dpni_set_offload(dpni, CMD_PRI_LOW, priv->token,
+			       DPNI_OFF_RX_L3_CSUM, rx_ip_csum_offload);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Error to set RX l3 csum:Error = %d\n", ret);
+		return ret;
+	}
+
+	ret = dpni_set_offload(dpni, CMD_PRI_LOW, priv->token,
+			       DPNI_OFF_RX_L4_CSUM, rx_ip_csum_offload);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Error to get RX l4 csum:Error = %d\n", ret);
+		return ret;
+	}
+
+	ret = dpni_set_offload(dpni, CMD_PRI_LOW, priv->token,
+			       DPNI_OFF_TX_L3_CSUM, true);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Error to set TX l3 csum:Error = %d\n", ret);
+		return ret;
+	}
+
+	ret = dpni_set_offload(dpni, CMD_PRI_LOW, priv->token,
+			       DPNI_OFF_TX_L4_CSUM, true);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Error to get TX l4 csum:Error = %d\n", ret);
+		return ret;
+	}
+
+	/* update the current status */
+	dpaa2_dev_link_update(dev, 0);
+
 	return 0;
 }
 
@@ -370,8 +428,8 @@ dpaa2_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	PMD_INIT_FUNC_TRACE();
 
-	PMD_INIT_LOG(DEBUG, "dev =%p, queue =%d, pool = %p, conf =%p",
-		     dev, rx_queue_id, mb_pool, rx_conf);
+	PMD_DRV_LOG(DEBUG, "dev =%p, queue =%d, pool = %p, conf =%p",
+		    dev, rx_queue_id, mb_pool, rx_conf);
 
 	if (!priv->bp_list || priv->bp_list->mp != mb_pool) {
 		bpid = mempool_to_bpid(mb_pool);
@@ -419,8 +477,9 @@ dpaa2_dev_rx_queue_setup(struct rte_eth_dev *dev,
 		/*enabling per rx queue congestion control */
 		taildrop.threshold = CONG_THRESHOLD_RX_Q;
 		taildrop.units = DPNI_CONGESTION_UNIT_BYTES;
-		PMD_INIT_LOG(DEBUG, "Enabling Early Drop on queue = %d",
-			     rx_queue_id);
+		taildrop.oal = CONG_RX_OAL;
+		PMD_DRV_LOG(DEBUG, "Enabling Early Drop on queue = %d",
+			    rx_queue_id);
 		ret = dpni_set_taildrop(dpni, CMD_PRI_LOW, priv->token,
 					DPNI_CP_QUEUE, DPNI_QUEUE_RX,
 					dpaa2_q->tc_index, flow_id, &taildrop);
@@ -455,8 +514,10 @@ dpaa2_dev_tx_queue_setup(struct rte_eth_dev *dev,
 	PMD_INIT_FUNC_TRACE();
 
 	/* Return if queue already configured */
-	if (dpaa2_q->flow_id != 0xffff)
+	if (dpaa2_q->flow_id != 0xffff) {
+		dev->data->tx_queues[tx_queue_id] = dpaa2_q;
 		return 0;
+	}
 
 	memset(&tx_conf_cfg, 0, sizeof(struct dpni_queue));
 	memset(&tx_flow_cfg, 0, sizeof(struct dpni_queue));
@@ -555,9 +616,87 @@ dpaa2_supported_ptypes_get(struct rte_eth_dev *dev)
 	return NULL;
 }
 
+/**
+ * Dpaa2 link Interrupt handler
+ *
+ * @param param
+ *  The address of parameter (struct rte_eth_dev *) regsitered before.
+ *
+ * @return
+ *  void
+ */
+static void
+dpaa2_interrupt_handler(void *param)
+{
+	struct rte_eth_dev *dev = param;
+	struct dpaa2_dev_priv *priv = dev->data->dev_private;
+	struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw;
+	int ret;
+	int irq_index = DPNI_IRQ_INDEX;
+	unsigned int status = 0, clear = 0;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (dpni == NULL) {
+		RTE_LOG(ERR, PMD, "dpni is NULL");
+		return;
+	}
+
+	ret = dpni_get_irq_status(dpni, CMD_PRI_LOW, priv->token,
+				  irq_index, &status);
+	if (unlikely(ret)) {
+		RTE_LOG(ERR, PMD, "Can't get irq status (err %d)", ret);
+		clear = 0xffffffff;
+		goto out;
+	}
+
+	if (status & DPNI_IRQ_EVENT_LINK_CHANGED) {
+		clear = DPNI_IRQ_EVENT_LINK_CHANGED;
+		dpaa2_dev_link_update(dev, 0);
+		/* calling all the apps registered for link status event */
+		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+					      NULL, NULL);
+	}
+out:
+	ret = dpni_clear_irq_status(dpni, CMD_PRI_LOW, priv->token,
+				    irq_index, clear);
+	if (unlikely(ret))
+		RTE_LOG(ERR, PMD, "Can't clear irq status (err %d)", ret);
+}
+
+static int
+dpaa2_eth_setup_irqs(struct rte_eth_dev *dev, int enable)
+{
+	int err = 0;
+	struct dpaa2_dev_priv *priv = dev->data->dev_private;
+	struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw;
+	int irq_index = DPNI_IRQ_INDEX;
+	unsigned int mask = DPNI_IRQ_EVENT_LINK_CHANGED;
+
+	PMD_INIT_FUNC_TRACE();
+
+	err = dpni_set_irq_mask(dpni, CMD_PRI_LOW, priv->token,
+				irq_index, mask);
+	if (err < 0) {
+		PMD_INIT_LOG(ERR, "Error: dpni_set_irq_mask():%d (%s)", err,
+			     strerror(-err));
+		return err;
+	}
+
+	err = dpni_set_irq_enable(dpni, CMD_PRI_LOW, priv->token,
+				  irq_index, enable);
+	if (err < 0)
+		PMD_INIT_LOG(ERR, "Error: dpni_set_irq_enable():%d (%s)", err,
+			     strerror(-err));
+
+	return err;
+}
+
 static int
 dpaa2_dev_start(struct rte_eth_dev *dev)
 {
+	struct rte_device *rdev = dev->device;
+	struct rte_dpaa2_device *dpaa2_dev;
 	struct rte_eth_dev_data *data = dev->data;
 	struct dpaa2_dev_priv *priv = data->dev_private;
 	struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw;
@@ -567,6 +706,10 @@ dpaa2_dev_start(struct rte_eth_dev *dev)
 	struct dpni_queue_id qid;
 	struct dpaa2_queue *dpaa2_q;
 	int ret, i;
+	struct rte_intr_handle *intr_handle;
+
+	dpaa2_dev = container_of(rdev, struct rte_dpaa2_device, device);
+	intr_handle = &dpaa2_dev->intr_handle;
 
 	PMD_INIT_FUNC_TRACE();
 
@@ -577,7 +720,7 @@ dpaa2_dev_start(struct rte_eth_dev *dev)
 		return ret;
 	}
 
-	/* Power up the phy. Needed to make the link go Up */
+	/* Power up the phy. Needed to make the link go UP */
 	dpaa2_dev_set_link_up(dev);
 
 	ret = dpni_get_qdid(dpni, CMD_PRI_LOW, priv->token,
@@ -601,34 +744,6 @@ dpaa2_dev_start(struct rte_eth_dev *dev)
 		dpaa2_q->fqid = qid.fqid;
 	}
 
-	ret = dpni_set_offload(dpni, CMD_PRI_LOW, priv->token,
-			       DPNI_OFF_RX_L3_CSUM, true);
-	if (ret) {
-		PMD_INIT_LOG(ERR, "Error to set RX l3 csum:Error = %d\n", ret);
-		return ret;
-	}
-
-	ret = dpni_set_offload(dpni, CMD_PRI_LOW, priv->token,
-			       DPNI_OFF_RX_L4_CSUM, true);
-	if (ret) {
-		PMD_INIT_LOG(ERR, "Error to get RX l4 csum:Error = %d\n", ret);
-		return ret;
-	}
-
-	ret = dpni_set_offload(dpni, CMD_PRI_LOW, priv->token,
-			       DPNI_OFF_TX_L3_CSUM, true);
-	if (ret) {
-		PMD_INIT_LOG(ERR, "Error to set TX l3 csum:Error = %d\n", ret);
-		return ret;
-	}
-
-	ret = dpni_set_offload(dpni, CMD_PRI_LOW, priv->token,
-			       DPNI_OFF_TX_L4_CSUM, true);
-	if (ret) {
-		PMD_INIT_LOG(ERR, "Error to get TX l4 csum:Error = %d\n", ret);
-		return ret;
-	}
-
 	/*checksum errors, send them to normal path and set it in annotation */
 	err_cfg.errors = DPNI_ERROR_L3CE | DPNI_ERROR_L4CE;
 
@@ -643,8 +758,33 @@ dpaa2_dev_start(struct rte_eth_dev *dev)
 		return ret;
 	}
 	/* VLAN Offload Settings */
-	if (priv->max_vlan_filters)
-		dpaa2_vlan_offload_set(dev, ETH_VLAN_FILTER_MASK);
+	if (priv->max_vlan_filters) {
+		ret = dpaa2_vlan_offload_set(dev, ETH_VLAN_FILTER_MASK);
+		if (ret) {
+			PMD_INIT_LOG(ERR, "Error to dpaa2_vlan_offload_set:"
+				     "code = %d\n", ret);
+			return ret;
+		}
+	}
+
+
+	/* if the interrupts were configured on this devices*/
+	if (intr_handle && (intr_handle->fd) &&
+	    (dev->data->dev_conf.intr_conf.lsc != 0)) {
+		/* Registering LSC interrupt handler */
+		rte_intr_callback_register(intr_handle,
+					   dpaa2_interrupt_handler,
+					   (void *)dev);
+
+		/* enable vfio intr/eventfd mapping
+		 * Interrupt index 0 is required, so we can not use
+		 * rte_intr_enable.
+		 */
+		rte_dpaa2_intr_enable(intr_handle, DPNI_IRQ_INDEX);
+
+		/* enable dpni_irqs */
+		dpaa2_eth_setup_irqs(dev, 1);
+	}
 
 	return 0;
 }
@@ -660,9 +800,25 @@ dpaa2_dev_stop(struct rte_eth_dev *dev)
 	struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw;
 	int ret;
 	struct rte_eth_link link;
+	struct rte_intr_handle *intr_handle = dev->intr_handle;
 
 	PMD_INIT_FUNC_TRACE();
 
+	/* reset interrupt callback  */
+	if (intr_handle && (intr_handle->fd) &&
+	    (dev->data->dev_conf.intr_conf.lsc != 0)) {
+		/*disable dpni irqs */
+		dpaa2_eth_setup_irqs(dev, 0);
+
+		/* disable vfio intr before callback unregister */
+		rte_dpaa2_intr_disable(intr_handle, DPNI_IRQ_INDEX);
+
+		/* Unregistering LSC interrupt handler */
+		rte_intr_callback_unregister(intr_handle,
+					     dpaa2_interrupt_handler,
+					     (void *)dev);
+	}
+
 	dpaa2_dev_set_link_down(dev);
 
 	ret = dpni_disable(dpni, CMD_PRI_LOW, priv->token);
@@ -838,7 +994,7 @@ dpaa2_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 		PMD_DRV_LOG(ERR, "setting the max frame length failed");
 		return -1;
 	}
-	PMD_DRV_LOG(INFO, "MTU is configured %d for the device\n", mtu);
+	PMD_DRV_LOG(INFO, "MTU is configured %d for the device", mtu);
 	return 0;
 }
 
@@ -916,7 +1072,7 @@ dpaa2_dev_set_mac_addr(struct rte_eth_dev *dev,
 			"error: Setting the MAC ADDR failed %d\n", ret);
 }
 static
-void dpaa2_dev_stats_get(struct rte_eth_dev *dev,
+int dpaa2_dev_stats_get(struct rte_eth_dev *dev,
 			 struct rte_eth_stats *stats)
 {
 	struct dpaa2_dev_priv *priv = dev->data->dev_private;
@@ -931,17 +1087,17 @@ void dpaa2_dev_stats_get(struct rte_eth_dev *dev,
 
 	if (!dpni) {
 		RTE_LOG(ERR, PMD, "dpni is NULL\n");
-		return;
+		return -EINVAL;
 	}
 
 	if (!stats) {
 		RTE_LOG(ERR, PMD, "stats is NULL\n");
-		return;
+		return -EINVAL;
 	}
 
 	/*Get Counters from page_0*/
 	retcode = dpni_get_statistics(dpni, CMD_PRI_LOW, priv->token,
-				      page0, &value);
+				      page0, 0, &value);
 	if (retcode)
 		goto err;
 
@@ -950,7 +1106,7 @@ void dpaa2_dev_stats_get(struct rte_eth_dev *dev,
 
 	/*Get Counters from page_1*/
 	retcode = dpni_get_statistics(dpni, CMD_PRI_LOW, priv->token,
-				      page1, &value);
+				      page1, 0, &value);
 	if (retcode)
 		goto err;
 
@@ -959,7 +1115,7 @@ void dpaa2_dev_stats_get(struct rte_eth_dev *dev,
 
 	/*Get Counters from page_2*/
 	retcode = dpni_get_statistics(dpni, CMD_PRI_LOW, priv->token,
-				      page2, &value);
+				      page2, 0, &value);
 	if (retcode)
 		goto err;
 
@@ -971,15 +1127,158 @@ void dpaa2_dev_stats_get(struct rte_eth_dev *dev,
 	stats->oerrors = value.page_2.egress_discarded_frames;
 	stats->imissed = value.page_2.ingress_nobuffer_discards;
 
-	return;
+	return 0;
 
 err:
 	RTE_LOG(ERR, PMD, "Operation not completed:Error Code = %d\n", retcode);
-	return;
+	return retcode;
 };
 
-static
-void dpaa2_dev_stats_reset(struct rte_eth_dev *dev)
+static int
+dpaa2_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
+		     unsigned int n)
+{
+	struct dpaa2_dev_priv *priv = dev->data->dev_private;
+	struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw;
+	int32_t  retcode;
+	union dpni_statistics value[3] = {};
+	unsigned int i = 0, num = RTE_DIM(dpaa2_xstats_strings);
+
+	if (xstats == NULL)
+		return 0;
+
+	if (n < num)
+		return num;
+
+	/* Get Counters from page_0*/
+	retcode = dpni_get_statistics(dpni, CMD_PRI_LOW, priv->token,
+				      0, 0, &value[0]);
+	if (retcode)
+		goto err;
+
+	/* Get Counters from page_1*/
+	retcode = dpni_get_statistics(dpni, CMD_PRI_LOW, priv->token,
+				      1, 0, &value[1]);
+	if (retcode)
+		goto err;
+
+	/* Get Counters from page_2*/
+	retcode = dpni_get_statistics(dpni, CMD_PRI_LOW, priv->token,
+				      2, 0, &value[2]);
+	if (retcode)
+		goto err;
+
+	for (i = 0; i < num; i++) {
+		xstats[i].id = i;
+		xstats[i].value = value[dpaa2_xstats_strings[i].page_id].
+			raw.counter[dpaa2_xstats_strings[i].stats_id];
+	}
+	return i;
+err:
+	RTE_LOG(ERR, PMD, "Error in obtaining extended stats (%d)\n", retcode);
+	return retcode;
+}
+
+static int
+dpaa2_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
+		       struct rte_eth_xstat_name *xstats_names,
+		       __rte_unused unsigned int limit)
+{
+	unsigned int i, stat_cnt = RTE_DIM(dpaa2_xstats_strings);
+
+	if (xstats_names != NULL)
+		for (i = 0; i < stat_cnt; i++)
+			snprintf(xstats_names[i].name,
+				 sizeof(xstats_names[i].name),
+				 "%s",
+				 dpaa2_xstats_strings[i].name);
+
+	return stat_cnt;
+}
+
+static int
+dpaa2_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
+		       uint64_t *values, unsigned int n)
+{
+	unsigned int i, stat_cnt = RTE_DIM(dpaa2_xstats_strings);
+	uint64_t values_copy[stat_cnt];
+
+	if (!ids) {
+		struct dpaa2_dev_priv *priv = dev->data->dev_private;
+		struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw;
+		int32_t  retcode;
+		union dpni_statistics value[3] = {};
+
+		if (n < stat_cnt)
+			return stat_cnt;
+
+		if (!values)
+			return 0;
+
+		/* Get Counters from page_0*/
+		retcode = dpni_get_statistics(dpni, CMD_PRI_LOW, priv->token,
+					      0, 0, &value[0]);
+		if (retcode)
+			return 0;
+
+		/* Get Counters from page_1*/
+		retcode = dpni_get_statistics(dpni, CMD_PRI_LOW, priv->token,
+					      1, 0, &value[1]);
+		if (retcode)
+			return 0;
+
+		/* Get Counters from page_2*/
+		retcode = dpni_get_statistics(dpni, CMD_PRI_LOW, priv->token,
+					      2, 0, &value[2]);
+		if (retcode)
+			return 0;
+
+		for (i = 0; i < stat_cnt; i++) {
+			values[i] = value[dpaa2_xstats_strings[i].page_id].
+				raw.counter[dpaa2_xstats_strings[i].stats_id];
+		}
+		return stat_cnt;
+	}
+
+	dpaa2_xstats_get_by_id(dev, NULL, values_copy, stat_cnt);
+
+	for (i = 0; i < n; i++) {
+		if (ids[i] >= stat_cnt) {
+			PMD_INIT_LOG(ERR, "id value isn't valid");
+			return -1;
+		}
+		values[i] = values_copy[ids[i]];
+	}
+	return n;
+}
+
+static int
+dpaa2_xstats_get_names_by_id(
+	struct rte_eth_dev *dev,
+	struct rte_eth_xstat_name *xstats_names,
+	const uint64_t *ids,
+	unsigned int limit)
+{
+	unsigned int i, stat_cnt = RTE_DIM(dpaa2_xstats_strings);
+	struct rte_eth_xstat_name xstats_names_copy[stat_cnt];
+
+	if (!ids)
+		return dpaa2_xstats_get_names(dev, xstats_names, limit);
+
+	dpaa2_xstats_get_names(dev, xstats_names_copy, limit);
+
+	for (i = 0; i < limit; i++) {
+		if (ids[i] >= stat_cnt) {
+			PMD_INIT_LOG(ERR, "id value isn't valid");
+			return -1;
+		}
+		strcpy(xstats_names[i].name, xstats_names_copy[ids[i]].name);
+	}
+	return limit;
+}
+
+static void
+dpaa2_dev_stats_reset(struct rte_eth_dev *dev)
 {
 	struct dpaa2_dev_priv *priv = dev->data->dev_private;
 	struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw;
@@ -1014,8 +1313,6 @@ dpaa2_dev_link_update(struct rte_eth_dev *dev,
 	struct rte_eth_link link, old;
 	struct dpni_link_state state = {0};
 
-	PMD_INIT_FUNC_TRACE();
-
 	if (dpni == NULL) {
 		RTE_LOG(ERR, PMD, "dpni is NULL\n");
 		return 0;
@@ -1048,7 +1345,7 @@ dpaa2_dev_link_update(struct rte_eth_dev *dev,
 	if (link.link_status)
 		PMD_DRV_LOG(INFO, "Port %d Link is Up\n", dev->data->port_id);
 	else
-		PMD_DRV_LOG(INFO, "Port %d Link is Down\n", dev->data->port_id);
+		PMD_DRV_LOG(INFO, "Port %d Link is Down", dev->data->port_id);
 	return 0;
 }
 
@@ -1063,8 +1360,7 @@ dpaa2_dev_set_link_up(struct rte_eth_dev *dev)
 	struct dpaa2_dev_priv *priv;
 	struct fsl_mc_io *dpni;
 	int en = 0;
-
-	PMD_INIT_FUNC_TRACE();
+	struct dpni_link_state state = {0};
 
 	priv = dev->data->dev_private;
 	dpni = (struct fsl_mc_io *)priv->hw;
@@ -1090,11 +1386,21 @@ dpaa2_dev_set_link_up(struct rte_eth_dev *dev)
 			return -EINVAL;
 		}
 	}
+	ret = dpni_get_link_state(dpni, CMD_PRI_LOW, priv->token, &state);
+	if (ret < 0) {
+		RTE_LOG(ERR, PMD, "error: dpni_get_link_state %d\n", ret);
+		return -1;
+	}
+
 	/* changing tx burst function to start enqueues */
 	dev->tx_pkt_burst = dpaa2_dev_tx;
-	dev->data->dev_link.link_status = 1;
+	dev->data->dev_link.link_status = state.up;
 
-	PMD_DRV_LOG(INFO, "Port %d Link UP successful", dev->data->port_id);
+	if (state.up)
+		PMD_DRV_LOG(INFO, "Port %d Link is set as UP",
+			    dev->data->port_id);
+	else
+		PMD_DRV_LOG(INFO, "Port %d Link is DOWN", dev->data->port_id);
 	return ret;
 }
 
@@ -1299,6 +1605,108 @@ dpaa2_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
 	return ret;
 }
 
+static int
+dpaa2_dev_rss_hash_update(struct rte_eth_dev *dev,
+			  struct rte_eth_rss_conf *rss_conf)
+{
+	struct rte_eth_dev_data *data = dev->data;
+	struct rte_eth_conf *eth_conf = &data->dev_conf;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (rss_conf->rss_hf) {
+		ret = dpaa2_setup_flow_dist(dev, rss_conf->rss_hf);
+		if (ret) {
+			PMD_INIT_LOG(ERR, "unable to set flow dist");
+			return ret;
+		}
+	} else {
+		ret = dpaa2_remove_flow_dist(dev, 0);
+		if (ret) {
+			PMD_INIT_LOG(ERR, "unable to remove flow dist");
+			return ret;
+		}
+	}
+	eth_conf->rx_adv_conf.rss_conf.rss_hf = rss_conf->rss_hf;
+	return 0;
+}
+
+static int
+dpaa2_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
+			    struct rte_eth_rss_conf *rss_conf)
+{
+	struct rte_eth_dev_data *data = dev->data;
+	struct rte_eth_conf *eth_conf = &data->dev_conf;
+
+	/* dpaa2 does not support rss_key, so length should be 0*/
+	rss_conf->rss_key_len = 0;
+	rss_conf->rss_hf = eth_conf->rx_adv_conf.rss_conf.rss_hf;
+	return 0;
+}
+
+int dpaa2_eth_eventq_attach(const struct rte_eth_dev *dev,
+		int eth_rx_queue_id,
+		uint16_t dpcon_id,
+		const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
+{
+	struct dpaa2_dev_priv *eth_priv = dev->data->dev_private;
+	struct fsl_mc_io *dpni = (struct fsl_mc_io *)eth_priv->hw;
+	struct dpaa2_queue *dpaa2_ethq = eth_priv->rx_vq[eth_rx_queue_id];
+	uint8_t flow_id = dpaa2_ethq->flow_id;
+	struct dpni_queue cfg;
+	uint8_t options;
+	int ret;
+
+	if (queue_conf->ev.sched_type == RTE_SCHED_TYPE_PARALLEL)
+		dpaa2_ethq->cb = dpaa2_dev_process_parallel_event;
+	else
+		return -EINVAL;
+
+	memset(&cfg, 0, sizeof(struct dpni_queue));
+	options = DPNI_QUEUE_OPT_DEST;
+	cfg.destination.type = DPNI_DEST_DPCON;
+	cfg.destination.id = dpcon_id;
+	cfg.destination.priority = queue_conf->ev.priority;
+
+	options |= DPNI_QUEUE_OPT_USER_CTX;
+	cfg.user_context = (uint64_t)(dpaa2_ethq);
+
+	ret = dpni_set_queue(dpni, CMD_PRI_LOW, eth_priv->token, DPNI_QUEUE_RX,
+			     dpaa2_ethq->tc_index, flow_id, options, &cfg);
+	if (ret) {
+		RTE_LOG(ERR, PMD, "Error in dpni_set_queue: ret: %d\n", ret);
+		return ret;
+	}
+
+	memcpy(&dpaa2_ethq->ev, &queue_conf->ev, sizeof(struct rte_event));
+
+	return 0;
+}
+
+int dpaa2_eth_eventq_detach(const struct rte_eth_dev *dev,
+		int eth_rx_queue_id)
+{
+	struct dpaa2_dev_priv *eth_priv = dev->data->dev_private;
+	struct fsl_mc_io *dpni = (struct fsl_mc_io *)eth_priv->hw;
+	struct dpaa2_queue *dpaa2_ethq = eth_priv->rx_vq[eth_rx_queue_id];
+	uint8_t flow_id = dpaa2_ethq->flow_id;
+	struct dpni_queue cfg;
+	uint8_t options;
+	int ret;
+
+	memset(&cfg, 0, sizeof(struct dpni_queue));
+	options = DPNI_QUEUE_OPT_DEST;
+	cfg.destination.type = DPNI_DEST_NONE;
+
+	ret = dpni_set_queue(dpni, CMD_PRI_LOW, eth_priv->token, DPNI_QUEUE_RX,
+			     dpaa2_ethq->tc_index, flow_id, options, &cfg);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Error in dpni_set_queue: ret: %d\n", ret);
+
+	return ret;
+}
+
 static struct eth_dev_ops dpaa2_ethdev_ops = {
 	.dev_configure	  = dpaa2_eth_dev_configure,
 	.dev_start	      = dpaa2_dev_start,
@@ -1312,7 +1720,12 @@ static struct eth_dev_ops dpaa2_ethdev_ops = {
 	.dev_set_link_down    = dpaa2_dev_set_link_down,
 	.link_update	   = dpaa2_dev_link_update,
 	.stats_get	       = dpaa2_dev_stats_get,
+	.xstats_get	       = dpaa2_dev_xstats_get,
+	.xstats_get_by_id     = dpaa2_xstats_get_by_id,
+	.xstats_get_names_by_id = dpaa2_xstats_get_names_by_id,
+	.xstats_get_names      = dpaa2_xstats_get_names,
 	.stats_reset	   = dpaa2_dev_stats_reset,
+	.xstats_reset	      = dpaa2_dev_stats_reset,
 	.fw_version_get	   = dpaa2_fw_version_get,
 	.dev_infos_get	   = dpaa2_dev_info_get,
 	.dev_supported_ptypes_get = dpaa2_supported_ptypes_get,
@@ -1328,6 +1741,8 @@ static struct eth_dev_ops dpaa2_ethdev_ops = {
 	.mac_addr_add         = dpaa2_dev_add_mac_addr,
 	.mac_addr_remove      = dpaa2_dev_remove_mac_addr,
 	.mac_addr_set         = dpaa2_dev_set_mac_addr,
+	.rss_hash_update      = dpaa2_dev_rss_hash_update,
+	.rss_hash_conf_get    = dpaa2_dev_rss_hash_conf_get,
 };
 
 static int
@@ -1384,22 +1799,19 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev)
 		goto init_err;
 	}
 
-	priv->num_tc = attr.num_tcs;
+	priv->num_rx_tc = attr.num_rx_tcs;
 
-	/* Resetting the "num_rx_vqueues" to equal number of queues in first TC
+	/* Resetting the "num_rx_queues" to equal number of queues in first TC
 	 * as only one TC is supported on Rx Side. Once Multiple TCs will be
 	 * in use for Rx processing then this will be changed or removed.
 	 */
 	priv->nb_rx_queues = attr.num_queues;
 
-	/* TODO:Using hard coded value for number of TX queues due to dependency
-	 * in MC.
-	 */
-	priv->nb_tx_queues = 8;
+	/* Using number of TX queues as number of TX TCs */
+	priv->nb_tx_queues = attr.num_tx_tcs;
 
-	PMD_INIT_LOG(DEBUG, "num TC - RX %d", priv->num_tc);
-	PMD_INIT_LOG(DEBUG, "nb_tx_queues %d", priv->nb_tx_queues);
-	PMD_INIT_LOG(DEBUG, "nb_rx_queues %d", priv->nb_rx_queues);
+	PMD_DRV_LOG(DEBUG, "RX-TC= %d, nb_rx_queues= %d, nb_tx_queues=%d",
+		    priv->num_rx_tc, priv->nb_rx_queues, priv->nb_tx_queues);
 
 	priv->hw = dpni_dev;
 	priv->hw_id = hw_id;
@@ -1460,11 +1872,13 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev)
 	}
 
 	eth_dev->dev_ops = &dpaa2_ethdev_ops;
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
 
 	eth_dev->rx_pkt_burst = dpaa2_dev_prefetch_rx;
 	eth_dev->tx_pkt_burst = dpaa2_dev_tx;
 	rte_fslmc_vfio_dmamap();
 
+	RTE_LOG(INFO, PMD, "%s: netdev created\n", eth_dev->data->name);
 	return 0;
 init_err:
 	dpaa2_dev_uninit(eth_dev);
@@ -1525,6 +1939,7 @@ dpaa2_dev_uninit(struct rte_eth_dev *eth_dev)
 	eth_dev->rx_pkt_burst = NULL;
 	eth_dev->tx_pkt_burst = NULL;
 
+	RTE_LOG(INFO, PMD, "%s: netdev created\n", eth_dev->data->name);
 	return 0;
 }
 
@@ -1588,7 +2003,7 @@ rte_dpaa2_remove(struct rte_dpaa2_device *dpaa2_dev)
 }
 
 static struct rte_dpaa2_driver rte_dpaa2_pmd = {
-	.drv_type = DPAA2_MC_DPNI_DEVID,
+	.drv_type = DPAA2_ETH,
 	.probe = rte_dpaa2_probe,
 	.remove = rte_dpaa2_remove,
 };
diff --git a/drivers/net/dpaa2/dpaa2_ethdev.h b/drivers/net/dpaa2/dpaa2_ethdev.h
index a2902da2..b8e94aa3 100644
--- a/drivers/net/dpaa2/dpaa2_ethdev.h
+++ b/drivers/net/dpaa2/dpaa2_ethdev.h
@@ -34,6 +34,8 @@
 #ifndef _DPAA2_ETHDEV_H
 #define _DPAA2_ETHDEV_H
 
+#include <rte_event_eth_rx_adapter.h>
+
 #include <mc/fsl_dpni.h>
 #include <mc/fsl_mc_sys.h>
 
@@ -61,6 +63,7 @@
  * currently considering 32 KB packets
  */
 #define CONG_THRESHOLD_RX_Q  (64 * 1024)
+#define CONG_RX_OAL	128
 
 /* Size of the input SMMU mapped memory required by MC */
 #define DIST_PARAM_IOVA_SIZE 256
@@ -87,20 +90,33 @@ struct dpaa2_dev_priv {
 	uint32_t options;
 	uint8_t max_mac_filters;
 	uint8_t max_vlan_filters;
-	uint8_t num_tc;
+	uint8_t num_rx_tc;
 	uint8_t flags; /*dpaa2 config flags */
 };
 
 int dpaa2_setup_flow_dist(struct rte_eth_dev *eth_dev,
-			  uint32_t req_dist_set);
+			  uint64_t req_dist_set);
 
 int dpaa2_remove_flow_dist(struct rte_eth_dev *eth_dev,
 			   uint8_t tc_index);
 
 int dpaa2_attach_bp_list(struct dpaa2_dev_priv *priv, void *blist);
 
+int dpaa2_eth_eventq_attach(const struct rte_eth_dev *dev,
+		int eth_rx_queue_id,
+		uint16_t dpcon_id,
+		const struct rte_event_eth_rx_adapter_queue_conf *queue_conf);
+
+int dpaa2_eth_eventq_detach(const struct rte_eth_dev *dev,
+		int eth_rx_queue_id);
+
 uint16_t dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs,
 			       uint16_t nb_pkts);
+void dpaa2_dev_process_parallel_event(struct qbman_swp *swp,
+				      const struct qbman_fd *fd,
+				      const struct qbman_result *dq,
+				      struct dpaa2_queue *rxq,
+				      struct rte_event *ev);
 uint16_t dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts);
 uint16_t dummy_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts);
 #endif /* _DPAA2_ETHDEV_H */
diff --git a/drivers/net/dpaa2/dpaa2_rxtx.c b/drivers/net/dpaa2/dpaa2_rxtx.c
index 3c057a39..8ecd238d 100644
--- a/drivers/net/dpaa2/dpaa2_rxtx.c
+++ b/drivers/net/dpaa2/dpaa2_rxtx.c
@@ -122,7 +122,7 @@ dpaa2_dev_rx_offload(uint64_t hw_annot_addr, struct rte_mbuf *mbuf)
 
 	if (BIT_ISSET_AT_POS(annotation->word3,
 			     L2_VLAN_1_PRESENT | L2_VLAN_N_PRESENT))
-		mbuf->ol_flags |= PKT_RX_VLAN_PKT;
+		mbuf->ol_flags |= PKT_RX_VLAN;
 
 	if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L3CE))
 		mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
@@ -350,7 +350,6 @@ eth_copy_mbuf_to_fd(struct rte_mbuf *mbuf,
 	if (rte_dpaa2_mbuf_alloc_bulk(
 		rte_dpaa2_bpid_info[bpid].bp_list->mp, &mb, 1)) {
 		PMD_TX_LOG(WARNING, "Unable to allocated DPAA2 buffer");
-		rte_pktmbuf_free(mbuf);
 		return -1;
 	}
 	m = (struct rte_mbuf *)mb;
@@ -382,8 +381,6 @@ eth_copy_mbuf_to_fd(struct rte_mbuf *mbuf,
 		rte_dpaa2_bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size,
 		DPAA2_GET_FD_OFFSET(fd),
 		DPAA2_GET_FD_LEN(fd));
-	/*free the original packet */
-	rte_pktmbuf_free(mbuf);
 
 	return 0;
 }
@@ -422,7 +419,7 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
 			(dma_addr_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
 		if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
-			while (!qbman_check_command_complete(swp,
+			while (!qbman_check_command_complete(
 			       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
 				;
 			clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
@@ -445,7 +442,7 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	 * Also seems like the SWP is shared between the Ethernet Driver
 	 * and the SEC driver.
 	 */
-	while (!qbman_check_command_complete(swp, dq_storage))
+	while (!qbman_check_command_complete(dq_storage))
 		;
 	if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
 		clear_swp_active_dqs(q_storage->active_dpio_id);
@@ -453,7 +450,7 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		/* Loop until the dq_storage is updated with
 		 * new token by QBMAN
 		 */
-		while (!qbman_result_has_new_result(swp, dq_storage))
+		while (!qbman_check_new_result(dq_storage))
 			;
 		rte_prefetch0((void *)((uint64_t)(dq_storage + 1)));
 		/* Check whether Last Pull command is Expired and
@@ -486,7 +483,7 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	}
 
 	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
-		while (!qbman_check_command_complete(swp,
+		while (!qbman_check_command_complete(
 		       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
 			;
 		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
@@ -517,6 +514,26 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	return num_rx;
 }
 
+void __attribute__((hot))
+dpaa2_dev_process_parallel_event(struct qbman_swp *swp,
+				 const struct qbman_fd *fd,
+				 const struct qbman_result *dq,
+				 struct dpaa2_queue *rxq,
+				 struct rte_event *ev)
+{
+	ev->mbuf = eth_fd_to_mbuf(fd);
+
+	ev->flow_id = rxq->ev.flow_id;
+	ev->sub_event_type = rxq->ev.sub_event_type;
+	ev->event_type = RTE_EVENT_TYPE_ETHDEV;
+	ev->op = RTE_EVENT_OP_NEW;
+	ev->sched_type = rxq->ev.sched_type;
+	ev->queue_id = rxq->ev.queue_id;
+	ev->priority = rxq->ev.priority;
+
+	qbman_swp_dqrr_consume(swp, dq);
+}
+
 /*
  * Callback to handle sending packets through WRIOP based interface
  */
@@ -560,7 +577,7 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	while (nb_pkts) {
 		/*Check if the queue is congested*/
 		retry_count = 0;
-		if (qbman_result_SCN_state_in_mem(dpaa2_q->cscn)) {
+		while (qbman_result_SCN_state(dpaa2_q->cscn)) {
 			retry_count++;
 			/* Retry for some time before giving up */
 			if (retry_count > CONG_RETRY_COUNT)
@@ -580,39 +597,35 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 				mp = mi->pool;
 			}
 			/* Not a hw_pkt pool allocated frame */
-			if (!mp) {
+			if (unlikely(!mp || !priv->bp_list)) {
 				PMD_TX_LOG(ERR, "err: no bpool attached");
-				goto skip_tx;
+				goto send_n_return;
 			}
+
 			if (mp->ops_index != priv->bp_list->dpaa2_ops_index) {
 				PMD_TX_LOG(ERR, "non hw offload bufffer ");
 				/* alloc should be from the default buffer pool
 				 * attached to this interface
 				 */
-				if (priv->bp_list) {
-					bpid = priv->bp_list->buf_pool.bpid;
-				} else {
-					PMD_TX_LOG(ERR,
-						   "err: no bpool attached");
-					num_tx = 0;
-					goto skip_tx;
-				}
+				bpid = priv->bp_list->buf_pool.bpid;
+
 				if (unlikely((*bufs)->nb_segs > 1)) {
 					PMD_TX_LOG(ERR, "S/G support not added"
 						" for non hw offload buffer");
-					goto skip_tx;
+					goto send_n_return;
 				}
 				if (eth_copy_mbuf_to_fd(*bufs,
 							&fd_arr[loop], bpid)) {
-					bufs++;
-					continue;
+					goto send_n_return;
 				}
+				/* free the original packet */
+				rte_pktmbuf_free(*bufs);
 			} else {
 				bpid = mempool_to_bpid(mp);
 				if (unlikely((*bufs)->nb_segs > 1)) {
 					if (eth_mbuf_to_sg_fd(*bufs,
 							&fd_arr[loop], bpid))
-						goto skip_tx;
+						goto send_n_return;
 				} else {
 					eth_mbuf_to_fd(*bufs,
 						       &fd_arr[loop], bpid);
@@ -622,7 +635,7 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		}
 		loop = 0;
 		while (loop < frames_to_send) {
-			loop += qbman_swp_send_multiple(swp, &eqdesc,
+			loop += qbman_swp_enqueue_multiple(swp, &eqdesc,
 					&fd_arr[loop], frames_to_send - loop);
 		}
 
@@ -630,6 +643,20 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		dpaa2_q->tx_pkts += frames_to_send;
 		nb_pkts -= frames_to_send;
 	}
+	return num_tx;
+
+send_n_return:
+	/* send any already prepared fd */
+	if (loop) {
+		unsigned int i = 0;
+
+		while (i < loop) {
+			i += qbman_swp_enqueue_multiple(swp, &eqdesc,
+							&fd_arr[i], loop - i);
+		}
+		num_tx += loop;
+		dpaa2_q->tx_pkts += loop;
+	}
 skip_tx:
 	return num_tx;
 }
diff --git a/drivers/net/dpaa2/mc/dpkg.c b/drivers/net/dpaa2/mc/dpkg.c
new file mode 100644
index 00000000..3f98907f
--- /dev/null
+++ b/drivers/net/dpaa2/mc/dpkg.c
@@ -0,0 +1,107 @@
+/*-
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ *   BSD LICENSE
+ *
+ * Copyright 2017 NXP
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the above-listed copyright holders nor the
+ * names of any contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <fsl_mc_sys.h>
+#include <fsl_mc_cmd.h>
+#include <fsl_dpkg.h>
+
+/**
+ * dpkg_prepare_key_cfg() - function prepare extract parameters
+ * @cfg: defining a full Key Generation profile (rule)
+ * @key_cfg_buf: Zeroed 256 bytes of memory before mapping it to DMA
+ *
+ * This function has to be called before the following functions:
+ *	- dpni_set_rx_tc_dist()
+ *	- dpni_set_qos_table()
+ *	- dpkg_prepare_key_cfg()
+ */
+int
+dpkg_prepare_key_cfg(const struct dpkg_profile_cfg *cfg, uint8_t *key_cfg_buf)
+{
+	int i, j;
+	struct dpni_ext_set_rx_tc_dist *dpni_ext;
+	struct dpni_dist_extract *extr;
+
+	if (cfg->num_extracts > DPKG_MAX_NUM_OF_EXTRACTS)
+		return -EINVAL;
+
+	dpni_ext = (struct dpni_ext_set_rx_tc_dist *)key_cfg_buf;
+	dpni_ext->num_extracts = cfg->num_extracts;
+
+	for (i = 0; i < cfg->num_extracts; i++) {
+		extr = &dpni_ext->extracts[i];
+
+		switch (cfg->extracts[i].type) {
+		case DPKG_EXTRACT_FROM_HDR:
+			extr->prot = cfg->extracts[i].extract.from_hdr.prot;
+			dpkg_set_field(extr->efh_type, EFH_TYPE,
+				       cfg->extracts[i].extract.from_hdr.type);
+			extr->size = cfg->extracts[i].extract.from_hdr.size;
+			extr->offset = cfg->extracts[i].extract.from_hdr.offset;
+			extr->field = cpu_to_le32(
+				cfg->extracts[i].extract.from_hdr.field);
+			extr->hdr_index =
+				cfg->extracts[i].extract.from_hdr.hdr_index;
+			break;
+		case DPKG_EXTRACT_FROM_DATA:
+			extr->size = cfg->extracts[i].extract.from_data.size;
+			extr->offset =
+				cfg->extracts[i].extract.from_data.offset;
+			break;
+		case DPKG_EXTRACT_FROM_PARSE:
+			extr->size = cfg->extracts[i].extract.from_parse.size;
+			extr->offset =
+				cfg->extracts[i].extract.from_parse.offset;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		extr->num_of_byte_masks = cfg->extracts[i].num_of_byte_masks;
+		dpkg_set_field(extr->extract_type, EXTRACT_TYPE,
+			       cfg->extracts[i].type);
+
+		for (j = 0; j < DPKG_NUM_OF_MASKS; j++) {
+			extr->masks[j].mask = cfg->extracts[i].masks[j].mask;
+			extr->masks[j].offset =
+				cfg->extracts[i].masks[j].offset;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/net/dpaa2/mc/dpni.c b/drivers/net/dpaa2/mc/dpni.c
index c2d39691..6f671fe0 100644
--- a/drivers/net/dpaa2/mc/dpni.c
+++ b/drivers/net/dpaa2/mc/dpni.c
@@ -42,100 +42,39 @@
 #include <fsl_dpni.h>
 #include <fsl_dpni_cmd.h>
 
-int dpni_prepare_key_cfg(const struct dpkg_profile_cfg *cfg,
-			 uint8_t *key_cfg_buf)
-{
-	int i, j;
-	int offset = 0;
-	int param = 1;
-	uint64_t *params = (uint64_t *)key_cfg_buf;
-
-	if (!key_cfg_buf || !cfg)
-		return -EINVAL;
-
-	params[0] |= mc_enc(0, 8, cfg->num_extracts);
-	params[0] = cpu_to_le64(params[0]);
-
-	if (cfg->num_extracts >= DPKG_MAX_NUM_OF_EXTRACTS)
-		return -EINVAL;
-
-	for (i = 0; i < cfg->num_extracts; i++) {
-		switch (cfg->extracts[i].type) {
-		case DPKG_EXTRACT_FROM_HDR:
-			params[param] |= mc_enc(0, 8,
-					cfg->extracts[i].extract.from_hdr.prot);
-			params[param] |= mc_enc(8, 4,
-					cfg->extracts[i].extract.from_hdr.type);
-			params[param] |= mc_enc(16, 8,
-					cfg->extracts[i].extract.from_hdr.size);
-			params[param] |= mc_enc(24, 8,
-					cfg->extracts[i].extract.
-					from_hdr.offset);
-			params[param] |= mc_enc(32, 32,
-					cfg->extracts[i].extract.
-					from_hdr.field);
-			params[param] = cpu_to_le64(params[param]);
-			param++;
-			params[param] |= mc_enc(0, 8,
-					cfg->extracts[i].extract.
-					from_hdr.hdr_index);
-			break;
-		case DPKG_EXTRACT_FROM_DATA:
-			params[param] |= mc_enc(16, 8,
-					cfg->extracts[i].extract.
-					from_data.size);
-			params[param] |= mc_enc(24, 8,
-					cfg->extracts[i].extract.
-					from_data.offset);
-			params[param] = cpu_to_le64(params[param]);
-			param++;
-			break;
-		case DPKG_EXTRACT_FROM_PARSE:
-			params[param] |= mc_enc(16, 8,
-					cfg->extracts[i].extract.
-					from_parse.size);
-			params[param] |= mc_enc(24, 8,
-					cfg->extracts[i].extract.
-					from_parse.offset);
-			params[param] = cpu_to_le64(params[param]);
-			param++;
-			break;
-		default:
-			return -EINVAL;
-		}
-		params[param] |= mc_enc(
-			24, 8, cfg->extracts[i].num_of_byte_masks);
-		params[param] |= mc_enc(32, 4, cfg->extracts[i].type);
-		params[param] = cpu_to_le64(params[param]);
-		param++;
-		for (offset = 0, j = 0;
-			j < DPKG_NUM_OF_MASKS;
-			offset += 16, j++) {
-			params[param] |= mc_enc(
-				(offset), 8, cfg->extracts[i].masks[j].mask);
-			params[param] |= mc_enc(
-				(offset + 8), 8,
-				cfg->extracts[i].masks[j].offset);
-		}
-		params[param] = cpu_to_le64(params[param]);
-		param++;
-	}
-	return 0;
-}
-
+/**
+ * dpni_open() - Open a control session for the specified object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @dpni_id:	DPNI unique ID
+ * @token:	Returned token; use in subsequent API calls
+ *
+ * This function can be used to open a control session for an
+ * already created object; an object may have been declared in
+ * the DPL or by calling the dpni_create() function.
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent commands for
+ * this specific object.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_open(struct fsl_mc_io *mc_io,
 	      uint32_t cmd_flags,
 	      int dpni_id,
 	      uint16_t *token)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_open *cmd_params;
+
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_OPEN,
 					  cmd_flags,
 					  0);
-	DPNI_CMD_OPEN(cmd, dpni_id);
+	cmd_params = (struct dpni_cmd_open *)cmd.params;
+	cmd_params->dpni_id = cpu_to_le32(dpni_id);
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -143,11 +82,22 @@ int dpni_open(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	*token = MC_CMD_HDR_READ_TOKEN(cmd.header);
+	*token = mc_cmd_hdr_read_token(&cmd);
 
 	return 0;
 }
 
+/**
+ * dpni_close() - Close the control session of the object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ *
+ * After this function is called, no further operations are
+ * allowed on the object without opening a new control session.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_close(struct fsl_mc_io *mc_io,
 	       uint32_t cmd_flags,
 	       uint16_t token)
@@ -163,12 +113,35 @@ int dpni_close(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
-int dpni_create(struct fsl_mc_io	*mc_io,
-		uint16_t	dprc_token,
-		uint32_t	cmd_flags,
-		const struct dpni_cfg	*cfg,
-		uint32_t	*obj_id)
+/**
+ * dpni_create() - Create the DPNI object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @dprc_token:	Parent container token; '0' for default container
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @cfg:	Configuration structure
+ * @obj_id:	Returned object id
+ *
+ * Create the DPNI object, allocate required resources and
+ * perform required initialization.
+ *
+ * The object can be created either by declaring it in the
+ * DPL file, or by calling this function.
+ *
+ * The function accepts an authentication token of a parent
+ * container that this object should be assigned to. The token
+ * can be '0' so the object will be assigned to the default container.
+ * The newly created object can be opened with the returned
+ * object id and using the container's associated tokens and MC portals.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_create(struct fsl_mc_io *mc_io,
+		uint16_t dprc_token,
+		uint32_t cmd_flags,
+		const struct dpni_cfg *cfg,
+		uint32_t *obj_id)
 {
+	struct dpni_cmd_create *cmd_params;
 	struct mc_command cmd = { 0 };
 	int err;
 
@@ -176,7 +149,14 @@ int dpni_create(struct fsl_mc_io	*mc_io,
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_CREATE,
 					  cmd_flags,
 					  dprc_token);
-	DPNI_CMD_CREATE(cmd, cfg);
+	cmd_params = (struct dpni_cmd_create *)cmd.params;
+	cmd_params->options = cpu_to_le32(cfg->options);
+	cmd_params->num_queues = cfg->num_queues;
+	cmd_params->num_tcs = cfg->num_tcs;
+	cmd_params->mac_filter_entries = cfg->mac_filter_entries;
+	cmd_params->vlan_filter_entries =  cfg->vlan_filter_entries;
+	cmd_params->qos_entries = cfg->qos_entries;
+	cmd_params->fs_entries = cpu_to_le16(cfg->fs_entries);
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -184,16 +164,32 @@ int dpni_create(struct fsl_mc_io	*mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	CMD_CREATE_RSP_GET_OBJ_ID_PARAM0(cmd, *obj_id);
+	*obj_id = mc_cmd_read_object_id(&cmd);
 
 	return 0;
 }
 
-int dpni_destroy(struct fsl_mc_io	*mc_io,
-		 uint16_t	dprc_token,
-		uint32_t	cmd_flags,
-		uint32_t	object_id)
+/**
+ * dpni_destroy() - Destroy the DPNI object and release all its resources.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @dprc_token: Parent container token; '0' for default container
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @object_id:	The object id; it must be a valid id within the container that
+ * created this object;
+ *
+ * The function accepts the authentication token of the parent container that
+ * created the object (not the one that currently owns the object). The object
+ * is searched within parent using the provided 'object_id'.
+ * All tokens to the object must be closed before calling destroy.
+ *
+ * Return:	'0' on Success; error code otherwise.
+ */
+int dpni_destroy(struct fsl_mc_io *mc_io,
+		 uint16_t dprc_token,
+		 uint32_t cmd_flags,
+		 uint32_t object_id)
 {
+	struct dpni_cmd_destroy *cmd_params;
 	struct mc_command cmd = { 0 };
 
 	/* prepare command */
@@ -201,28 +197,63 @@ int dpni_destroy(struct fsl_mc_io	*mc_io,
 					  cmd_flags,
 					  dprc_token);
 	/* set object id to destroy */
-	CMD_DESTROY_SET_OBJ_ID_PARAM0(cmd, object_id);
+	cmd_params = (struct dpni_cmd_destroy *)cmd.params;
+	cmd_params->dpsw_id = cpu_to_le32(object_id);
+
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_set_pools() - Set buffer pools configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	Buffer pools configuration
+ *
+ * mandatory for DPNI operation
+ * warning:Allowed only when DPNI is disabled
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_set_pools(struct fsl_mc_io *mc_io,
 		   uint32_t cmd_flags,
 		   uint16_t token,
 		   const struct dpni_pools_cfg *cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_pools *cmd_params;
+	int i;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_POOLS,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_POOLS(cmd, cfg);
+	cmd_params = (struct dpni_cmd_set_pools *)cmd.params;
+	cmd_params->num_dpbp = cfg->num_dpbp;
+	for (i = 0; i < DPNI_MAX_DPBP; i++) {
+		cmd_params->pool[i].dpbp_id =
+			cpu_to_le16(cfg->pools[i].dpbp_id);
+		cmd_params->pool[i].priority_mask =
+			cfg->pools[i].priority_mask;
+		cmd_params->buffer_size[i] =
+			cpu_to_le16(cfg->pools[i].buffer_size);
+		cmd_params->backup_pool_mask |=
+			DPNI_BACKUP_POOL(cfg->pools[i].backup_pool, i);
+	}
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_enable() - Enable the DPNI, allow sending and receiving frames.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_enable(struct fsl_mc_io *mc_io,
 		uint32_t cmd_flags,
 		uint16_t token)
@@ -238,6 +269,14 @@ int dpni_enable(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_disable() - Disable the DPNI, stop sending and receiving frames.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_disable(struct fsl_mc_io *mc_io,
 		 uint32_t cmd_flags,
 		 uint16_t token)
@@ -253,15 +292,27 @@ int dpni_disable(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_is_enabled() - Check if the DPNI is enabled.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Returns '1' if object is enabled; '0' otherwise
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_is_enabled(struct fsl_mc_io *mc_io,
 		    uint32_t cmd_flags,
 		    uint16_t token,
 		    int *en)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_rsp_is_enabled *rsp_params;
 	int err;
+
 	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPNI_CMDID_IS_ENABLED, cmd_flags,
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_IS_ENABLED,
+					  cmd_flags,
 					  token);
 
 	/* send command to mc*/
@@ -270,11 +321,20 @@ int dpni_is_enabled(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_IS_ENABLED(cmd, *en);
+	rsp_params = (struct dpni_rsp_is_enabled *)cmd.params;
+	*en = dpni_get_field(rsp_params->enabled, ENABLE);
 
 	return 0;
 }
 
+/**
+ * dpni_reset() - Reset the DPNI, returns the object to initial state.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_reset(struct fsl_mc_io *mc_io,
 	       uint32_t cmd_flags,
 	       uint16_t token)
@@ -290,12 +350,256 @@ int dpni_reset(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_set_irq_enable() - Set overall interrupt state.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @en:		Interrupt state: - enable = 1, disable = 0
+ *
+ * Allows GPP software to control when interrupts are generated.
+ * Each interrupt can have up to 32 causes.  The enable/disable control's the
+ * overall interrupt state. if the interrupt is disabled no causes will cause
+ * an interrupt.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_irq_enable(struct fsl_mc_io *mc_io,
+			uint32_t cmd_flags,
+			uint16_t token,
+			uint8_t irq_index,
+			uint8_t en)
+{
+	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_irq_enable *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_IRQ_ENABLE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_irq_enable *)cmd.params;
+	dpni_set_field(cmd_params->enable, ENABLE, en);
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_irq_enable() - Get overall interrupt state
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @en:		Returned interrupt state - enable = 1, disable = 0
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_irq_enable(struct fsl_mc_io *mc_io,
+			uint32_t cmd_flags,
+			uint16_t token,
+			uint8_t irq_index,
+			uint8_t *en)
+{
+	struct mc_command cmd = { 0 };
+	struct dpni_cmd_get_irq_enable *cmd_params;
+	struct dpni_rsp_get_irq_enable *rsp_params;
+
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_IRQ_ENABLE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_irq_enable *)cmd.params;
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_irq_enable *)cmd.params;
+	*en = dpni_get_field(rsp_params->enabled, ENABLE);
+
+	return 0;
+}
+
+/**
+ * dpni_set_irq_mask() - Set interrupt mask.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @mask:	Event mask to trigger interrupt;
+ *		each bit:
+ *			0 = ignore event
+ *			1 = consider event for asserting IRQ
+ *
+ * Every interrupt can have up to 32 causes and the interrupt model supports
+ * masking/unmasking each cause independently
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_irq_mask(struct fsl_mc_io *mc_io,
+		      uint32_t cmd_flags,
+		      uint16_t token,
+		      uint8_t irq_index,
+		      uint32_t mask)
+{
+	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_irq_mask *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_IRQ_MASK,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_irq_mask *)cmd.params;
+	cmd_params->mask = cpu_to_le32(mask);
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_irq_mask() - Get interrupt mask.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @mask:	Returned event mask to trigger interrupt
+ *
+ * Every interrupt can have up to 32 causes and the interrupt model supports
+ * masking/unmasking each cause independently
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_irq_mask(struct fsl_mc_io *mc_io,
+		      uint32_t cmd_flags,
+		      uint16_t token,
+		      uint8_t irq_index,
+		      uint32_t *mask)
+{
+	struct mc_command cmd = { 0 };
+	struct dpni_cmd_get_irq_mask *cmd_params;
+	struct dpni_rsp_get_irq_mask *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_IRQ_MASK,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_irq_mask *)cmd.params;
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_irq_mask *)cmd.params;
+	*mask = le32_to_cpu(rsp_params->mask);
+
+	return 0;
+}
+
+/**
+ * dpni_get_irq_status() - Get the current status of any pending interrupts.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @status:	Returned interrupts status - one bit per cause:
+ *			0 = no interrupt pending
+ *			1 = interrupt pending
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_irq_status(struct fsl_mc_io *mc_io,
+			uint32_t cmd_flags,
+			uint16_t token,
+			uint8_t irq_index,
+			uint32_t *status)
+{
+	struct mc_command cmd = { 0 };
+	struct dpni_cmd_get_irq_status *cmd_params;
+	struct dpni_rsp_get_irq_status *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_IRQ_STATUS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_get_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(*status);
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_irq_status *)cmd.params;
+	*status = le32_to_cpu(rsp_params->status);
+
+	return 0;
+}
+
+/**
+ * dpni_clear_irq_status() - Clear a pending interrupt's status
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @irq_index:	The interrupt index to configure
+ * @status:	bits to clear (W1C) - one bit per cause:
+ *			0 = don't change
+ *			1 = clear status bit
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_clear_irq_status(struct fsl_mc_io *mc_io,
+			  uint32_t cmd_flags,
+			  uint16_t token,
+			  uint8_t irq_index,
+			  uint32_t status)
+{
+	struct mc_command cmd = { 0 };
+	struct dpni_cmd_clear_irq_status *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLEAR_IRQ_STATUS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_clear_irq_status *)cmd.params;
+	cmd_params->irq_index = irq_index;
+	cmd_params->status = cpu_to_le32(status);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_attributes() - Retrieve DPNI attributes.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @attr:	Object's attributes
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_attributes(struct fsl_mc_io *mc_io,
 			uint32_t cmd_flags,
 			uint16_t token,
 			struct dpni_attr *attr)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_rsp_get_attr *rsp_params;
+
 	int err;
 
 	/* prepare command */
@@ -309,28 +613,65 @@ int dpni_get_attributes(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_ATTR(cmd, attr);
+	rsp_params = (struct dpni_rsp_get_attr *)cmd.params;
+	attr->options = le32_to_cpu(rsp_params->options);
+	attr->num_queues = rsp_params->num_queues;
+	attr->num_rx_tcs = rsp_params->num_rx_tcs;
+	attr->num_tx_tcs = rsp_params->num_tx_tcs;
+	attr->mac_filter_entries = rsp_params->mac_filter_entries;
+	attr->vlan_filter_entries = rsp_params->vlan_filter_entries;
+	attr->qos_entries = rsp_params->qos_entries;
+	attr->fs_entries = le16_to_cpu(rsp_params->fs_entries);
+	attr->qos_key_size = rsp_params->qos_key_size;
+	attr->fs_key_size = rsp_params->fs_key_size;
+	attr->wriop_version = le16_to_cpu(rsp_params->wriop_version);
 
 	return 0;
 }
 
+/**
+ * dpni_set_errors_behavior() - Set errors behavior
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	Errors configuration
+ *
+ * This function may be called numerous times with different
+ * error masks
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_set_errors_behavior(struct fsl_mc_io *mc_io,
 			     uint32_t cmd_flags,
 			     uint16_t token,
-			      struct dpni_error_cfg *cfg)
+			     struct dpni_error_cfg *cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_errors_behavior *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_ERRORS_BEHAVIOR,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_ERRORS_BEHAVIOR(cmd, cfg);
+	cmd_params = (struct dpni_cmd_set_errors_behavior *)cmd.params;
+	cmd_params->errors = cpu_to_le32(cfg->errors);
+	dpni_set_field(cmd_params->flags, ERROR_ACTION, cfg->error_action);
+	dpni_set_field(cmd_params->flags, FRAME_ANN, cfg->set_frame_annotation);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_get_buffer_layout() - Retrieve buffer layout attributes.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue to retrieve configuration for
+ * @layout:	Returns buffer layout attributes
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_buffer_layout(struct fsl_mc_io *mc_io,
 			   uint32_t cmd_flags,
 			   uint16_t token,
@@ -338,13 +679,16 @@ int dpni_get_buffer_layout(struct fsl_mc_io *mc_io,
 			   struct dpni_buffer_layout *layout)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_get_buffer_layout *cmd_params;
+	struct dpni_rsp_get_buffer_layout *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_BUFFER_LAYOUT,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_GET_BUFFER_LAYOUT(cmd, qtype);
+	cmd_params = (struct dpni_cmd_get_buffer_layout *)cmd.params;
+	cmd_params->qtype = qtype;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -352,29 +696,72 @@ int dpni_get_buffer_layout(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_BUFFER_LAYOUT(cmd, layout);
+	rsp_params = (struct dpni_rsp_get_buffer_layout *)cmd.params;
+	layout->pass_timestamp = dpni_get_field(rsp_params->flags, PASS_TS);
+	layout->pass_parser_result = dpni_get_field(rsp_params->flags, PASS_PR);
+	layout->pass_frame_status = dpni_get_field(rsp_params->flags, PASS_FS);
+	layout->private_data_size = le16_to_cpu(rsp_params->private_data_size);
+	layout->data_align = le16_to_cpu(rsp_params->data_align);
+	layout->data_head_room = le16_to_cpu(rsp_params->head_room);
+	layout->data_tail_room = le16_to_cpu(rsp_params->tail_room);
 
 	return 0;
 }
 
+/**
+ * dpni_set_buffer_layout() - Set buffer layout configuration.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue this configuration applies to
+ * @layout:	Buffer layout configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ *
+ * @warning	Allowed only when DPNI is disabled
+ */
 int dpni_set_buffer_layout(struct fsl_mc_io *mc_io,
 			   uint32_t cmd_flags,
-			      uint16_t token,
-			      enum dpni_queue_type qtype,
-			      const struct dpni_buffer_layout *layout)
+			   uint16_t token,
+			   enum dpni_queue_type qtype,
+			   const struct dpni_buffer_layout *layout)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_buffer_layout *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_BUFFER_LAYOUT,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_BUFFER_LAYOUT(cmd, qtype, layout);
+	cmd_params = (struct dpni_cmd_set_buffer_layout *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->options = cpu_to_le16(layout->options);
+	dpni_set_field(cmd_params->flags, PASS_TS, layout->pass_timestamp);
+	dpni_set_field(cmd_params->flags, PASS_PR, layout->pass_parser_result);
+	dpni_set_field(cmd_params->flags, PASS_FS, layout->pass_frame_status);
+	cmd_params->private_data_size = cpu_to_le16(layout->private_data_size);
+	cmd_params->data_align = cpu_to_le16(layout->data_align);
+	cmd_params->head_room = cpu_to_le16(layout->data_head_room);
+	cmd_params->tail_room = cpu_to_le16(layout->data_tail_room);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_set_offload() - Set DPNI offload configuration.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @type:	Type of DPNI offload
+ * @config:	Offload configuration.
+ *		For checksum offloads, non-zero value enables the offload
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ *
+ * @warning    Allowed only when DPNI is disabled
+ */
+
 int dpni_set_offload(struct fsl_mc_io *mc_io,
 		     uint32_t cmd_flags,
 		     uint16_t token,
@@ -382,17 +769,32 @@ int dpni_set_offload(struct fsl_mc_io *mc_io,
 		     uint32_t config)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_offload *cmd_params;
 
-	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_OFFLOAD,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_OFFLOAD(cmd, type, config);
+	cmd_params = (struct dpni_cmd_set_offload *)cmd.params;
+	cmd_params->dpni_offload = type;
+	cmd_params->config = cpu_to_le32(config);
 
-	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_get_offload() - Get DPNI offload configuration.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @type:	Type of DPNI offload
+ * @config:	Offload configuration.
+ *			For checksum offloads, a value of 1 indicates that the
+ *			offload is enabled.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ *
+ * @warning	Allowed only when DPNI is disabled
+ */
 int dpni_get_offload(struct fsl_mc_io *mc_io,
 		     uint32_t cmd_flags,
 		     uint16_t token,
@@ -400,13 +802,16 @@ int dpni_get_offload(struct fsl_mc_io *mc_io,
 		     uint32_t *config)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_get_offload *cmd_params;
+	struct dpni_rsp_get_offload *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_OFFLOAD,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_GET_OFFLOAD(cmd, type);
+	cmd_params = (struct dpni_cmd_get_offload *)cmd.params;
+	cmd_params->dpni_offload = type;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -414,11 +819,24 @@ int dpni_get_offload(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_OFFLOAD(cmd, *config);
+	rsp_params = (struct dpni_rsp_get_offload *)cmd.params;
+	*config = le32_to_cpu(rsp_params->config);
 
 	return 0;
 }
 
+/**
+ * dpni_get_qdid() - Get the Queuing Destination ID (QDID) that should be used
+ *			for enqueue operations
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue to receive QDID for
+ * @qdid:	Returned virtual QDID value that should be used as an argument
+ *			in all enqueue operations
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_qdid(struct fsl_mc_io *mc_io,
 		  uint32_t cmd_flags,
 		  uint16_t token,
@@ -426,13 +844,16 @@ int dpni_get_qdid(struct fsl_mc_io *mc_io,
 		  uint16_t *qdid)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_get_qdid *cmd_params;
+	struct dpni_rsp_get_qdid *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_QDID,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_GET_QDID(cmd, qtype);
+	cmd_params = (struct dpni_cmd_get_qdid *)cmd.params;
+	cmd_params->qtype = qtype;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
@@ -440,34 +861,92 @@ int dpni_get_qdid(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_QDID(cmd, *qdid);
+	rsp_params = (struct dpni_rsp_get_qdid *)cmd.params;
+	*qdid = le16_to_cpu(rsp_params->qdid);
 
 	return 0;
 }
 
+/**
+ * dpni_get_tx_data_offset() - Get the Tx data offset (from start of buffer)
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @data_offset: Tx data offset (from start of buffer)
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_get_tx_data_offset(struct fsl_mc_io *mc_io,
+			    uint32_t cmd_flags,
+			    uint16_t token,
+			    uint16_t *data_offset)
+{
+	struct mc_command cmd = { 0 };
+	struct dpni_rsp_get_tx_data_offset *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_TX_DATA_OFFSET,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpni_rsp_get_tx_data_offset *)cmd.params;
+	*data_offset = le16_to_cpu(rsp_params->data_offset);
+
+	return 0;
+}
+
+/**
+ * dpni_set_link_cfg() - set the link configuration.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	Link configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_set_link_cfg(struct fsl_mc_io *mc_io,
 		      uint32_t cmd_flags,
 		      uint16_t token,
 		      const struct dpni_link_cfg *cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_link_cfg *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_LINK_CFG,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_LINK_CFG(cmd, cfg);
+	cmd_params = (struct dpni_cmd_set_link_cfg *)cmd.params;
+	cmd_params->rate = cpu_to_le32(cfg->rate);
+	cmd_params->options = cpu_to_le64(cfg->options);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_get_link_state() - Return the link state (either up or down)
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @state:	Returned link state;
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_link_state(struct fsl_mc_io *mc_io,
 			uint32_t cmd_flags,
 			uint16_t token,
 			struct dpni_link_state *state)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_rsp_get_link_state *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -481,34 +960,60 @@ int dpni_get_link_state(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_LINK_STATE(cmd, state);
+	rsp_params = (struct dpni_rsp_get_link_state *)cmd.params;
+	state->up = dpni_get_field(rsp_params->flags, LINK_STATE);
+	state->rate = le32_to_cpu(rsp_params->rate);
+	state->options = le64_to_cpu(rsp_params->options);
 
 	return 0;
 }
 
+/**
+ * dpni_set_max_frame_length() - Set the maximum received frame length.
+ * @mc_io:		Pointer to MC portal's I/O object
+ * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:		Token of DPNI object
+ * @max_frame_length:	Maximum received frame length (in bytes);
+ *			frame is discarded if its length exceeds this value
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_set_max_frame_length(struct fsl_mc_io *mc_io,
 			      uint32_t cmd_flags,
 			      uint16_t token,
 			      uint16_t max_frame_length)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_max_frame_length *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_MAX_FRAME_LENGTH,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_MAX_FRAME_LENGTH(cmd, max_frame_length);
+	cmd_params = (struct dpni_cmd_set_max_frame_length *)cmd.params;
+	cmd_params->max_frame_length = cpu_to_le16(max_frame_length);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_get_max_frame_length() - Get the maximum received frame length.
+ * @mc_io:		Pointer to MC portal's I/O object
+ * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:		Token of DPNI object
+ * @max_frame_length:	Maximum received frame length (in bytes);
+ *			frame is discarded if its length exceeds this value
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_max_frame_length(struct fsl_mc_io *mc_io,
 			      uint32_t cmd_flags,
 			      uint16_t token,
 			      uint16_t *max_frame_length)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_rsp_get_max_frame_length *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -522,34 +1027,56 @@ int dpni_get_max_frame_length(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_MAX_FRAME_LENGTH(cmd, *max_frame_length);
+	rsp_params = (struct dpni_rsp_get_max_frame_length *)cmd.params;
+	*max_frame_length = le16_to_cpu(rsp_params->max_frame_length);
 
 	return 0;
 }
 
+/**
+ * dpni_set_multicast_promisc() - Enable/disable multicast promiscuous mode
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Set to '1' to enable; '0' to disable
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_set_multicast_promisc(struct fsl_mc_io *mc_io,
 			       uint32_t cmd_flags,
 			       uint16_t token,
 			       int en)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_multicast_promisc *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_MCAST_PROMISC,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_MULTICAST_PROMISC(cmd, en);
+	cmd_params = (struct dpni_cmd_set_multicast_promisc *)cmd.params;
+	dpni_set_field(cmd_params->enable, ENABLE, en);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_get_multicast_promisc() - Get multicast promiscuous mode
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Returns '1' if enabled; '0' otherwise
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_multicast_promisc(struct fsl_mc_io *mc_io,
 			       uint32_t cmd_flags,
 			       uint16_t token,
 			       int *en)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_rsp_get_multicast_promisc *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -563,34 +1090,56 @@ int dpni_get_multicast_promisc(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_MULTICAST_PROMISC(cmd, *en);
+	rsp_params = (struct dpni_rsp_get_multicast_promisc *)cmd.params;
+	*en = dpni_get_field(rsp_params->enabled, ENABLE);
 
 	return 0;
 }
 
+/**
+ * dpni_set_unicast_promisc() - Enable/disable unicast promiscuous mode
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Set to '1' to enable; '0' to disable
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_set_unicast_promisc(struct fsl_mc_io *mc_io,
 			     uint32_t cmd_flags,
 			     uint16_t token,
 			     int en)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_unicast_promisc *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_UNICAST_PROMISC,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_UNICAST_PROMISC(cmd, en);
+	cmd_params = (struct dpni_cmd_set_unicast_promisc *)cmd.params;
+	dpni_set_field(cmd_params->enable, ENABLE, en);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_get_unicast_promisc() - Get unicast promiscuous mode
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Returns '1' if enabled; '0' otherwise
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_unicast_promisc(struct fsl_mc_io *mc_io,
 			     uint32_t cmd_flags,
 			     uint16_t token,
 			     int *en)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_rsp_get_unicast_promisc *rsp_params;
 	int err;
 
 	/* prepare command */
@@ -604,35 +1153,59 @@ int dpni_get_unicast_promisc(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_UNICAST_PROMISC(cmd, *en);
+	rsp_params = (struct dpni_rsp_get_unicast_promisc *)cmd.params;
+	*en = dpni_get_field(rsp_params->enabled, ENABLE);
 
 	return 0;
 }
 
+/**
+ * dpni_set_primary_mac_addr() - Set the primary MAC address
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mac_addr:	MAC address to set as primary address
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_set_primary_mac_addr(struct fsl_mc_io *mc_io,
 			      uint32_t cmd_flags,
 			      uint16_t token,
 			      const uint8_t mac_addr[6])
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_primary_mac_addr *cmd_params;
+	int i;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_PRIM_MAC,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_PRIMARY_MAC_ADDR(cmd, mac_addr);
+	cmd_params = (struct dpni_cmd_set_primary_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = mac_addr[5 - i];
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_get_primary_mac_addr() - Get the primary MAC address
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mac_addr:	Returned MAC address
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_primary_mac_addr(struct fsl_mc_io *mc_io,
 			      uint32_t cmd_flags,
 			      uint16_t token,
 			      uint8_t mac_addr[6])
 {
 	struct mc_command cmd = { 0 };
-	int err;
+	struct dpni_rsp_get_primary_mac_addr *rsp_params;
+	int i, err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_PRIM_MAC,
@@ -645,45 +1218,85 @@ int dpni_get_primary_mac_addr(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_PRIMARY_MAC_ADDR(cmd, mac_addr);
+	rsp_params = (struct dpni_rsp_get_primary_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		mac_addr[5 - i] = rsp_params->mac_addr[i];
 
 	return 0;
 }
 
+/**
+ * dpni_add_mac_addr() - Add MAC address filter
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mac_addr:	MAC address to add
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_add_mac_addr(struct fsl_mc_io *mc_io,
 		      uint32_t cmd_flags,
 		      uint16_t token,
 		      const uint8_t mac_addr[6])
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_add_mac_addr *cmd_params;
+	int i;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_MAC_ADDR,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_ADD_MAC_ADDR(cmd, mac_addr);
+	cmd_params = (struct dpni_cmd_add_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = mac_addr[5 - i];
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_remove_mac_addr() - Remove MAC address filter
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mac_addr:	MAC address to remove
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_remove_mac_addr(struct fsl_mc_io *mc_io,
 			 uint32_t cmd_flags,
 			 uint16_t token,
 			 const uint8_t mac_addr[6])
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_remove_mac_addr *cmd_params;
+	int i;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_REMOVE_MAC_ADDR,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_REMOVE_MAC_ADDR(cmd, mac_addr);
+	cmd_params = (struct dpni_cmd_remove_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = mac_addr[5 - i];
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_clear_mac_filters() - Clear all unicast and/or multicast MAC filters
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @unicast:	Set to '1' to clear unicast addresses
+ * @multicast:	Set to '1' to clear multicast addresses
+ *
+ * The primary MAC address is not cleared by this operation.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_clear_mac_filters(struct fsl_mc_io *mc_io,
 			   uint32_t cmd_flags,
 			   uint16_t token,
@@ -691,24 +1304,40 @@ int dpni_clear_mac_filters(struct fsl_mc_io *mc_io,
 			   int multicast)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_clear_mac_filters *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLR_MAC_FILTERS,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_CLEAR_MAC_FILTERS(cmd, unicast, multicast);
+	cmd_params = (struct dpni_cmd_clear_mac_filters *)cmd.params;
+	dpni_set_field(cmd_params->flags, UNICAST_FILTERS, unicast);
+	dpni_set_field(cmd_params->flags, MULTICAST_FILTERS, multicast);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_get_port_mac_addr() - Retrieve MAC address associated to the physical
+ *			port the DPNI is attached to
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mac_addr:	MAC address of the physical port, if any, otherwise 0
+ *
+ * The primary MAC address is not cleared by this operation.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_port_mac_addr(struct fsl_mc_io *mc_io,
 			   uint32_t cmd_flags,
 			   uint16_t token,
 			   uint8_t mac_addr[6])
 {
 	struct mc_command cmd = { 0 };
-	int err;
+	struct dpni_rsp_get_port_mac_addr *rsp_params;
+	int i, err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_PORT_MAC_ADDR,
@@ -721,62 +1350,105 @@ int dpni_get_port_mac_addr(struct fsl_mc_io *mc_io,
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_PORT_MAC_ADDR(cmd, mac_addr);
+	rsp_params = (struct dpni_rsp_get_port_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		mac_addr[5 - i] = rsp_params->mac_addr[i];
 
 	return 0;
 }
 
+/**
+ * dpni_enable_vlan_filter() - Enable/disable VLAN filtering mode
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @en:		Set to '1' to enable; '0' to disable
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_enable_vlan_filter(struct fsl_mc_io *mc_io,
 			    uint32_t cmd_flags,
-			  uint16_t token,
-			  int en)
+			    uint16_t token,
+			    int en)
 {
+	struct dpni_cmd_enable_vlan_filter *cmd_params;
 	struct mc_command cmd = { 0 };
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_ENABLE_VLAN_FILTER,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_ENABLE_VLAN_FILTER(cmd, en);
+	cmd_params = (struct dpni_cmd_enable_vlan_filter *)cmd.params;
+	dpni_set_field(cmd_params->en, ENABLE, en);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_add_vlan_id() - Add VLAN ID filter
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @vlan_id:	VLAN ID to add
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_add_vlan_id(struct fsl_mc_io *mc_io,
 		     uint32_t cmd_flags,
 		     uint16_t token,
 		     uint16_t vlan_id)
 {
+	struct dpni_cmd_vlan_id *cmd_params;
 	struct mc_command cmd = { 0 };
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_VLAN_ID,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_ADD_VLAN_ID(cmd, vlan_id);
+	cmd_params = (struct dpni_cmd_vlan_id *)cmd.params;
+	cmd_params->vlan_id = cpu_to_le16(vlan_id);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_remove_vlan_id() - Remove VLAN ID filter
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @vlan_id:	VLAN ID to remove
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_remove_vlan_id(struct fsl_mc_io *mc_io,
 			uint32_t cmd_flags,
 			uint16_t token,
 			uint16_t vlan_id)
 {
+	struct dpni_cmd_vlan_id *cmd_params;
 	struct mc_command cmd = { 0 };
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_REMOVE_VLAN_ID,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_REMOVE_VLAN_ID(cmd, vlan_id);
+	cmd_params = (struct dpni_cmd_vlan_id *)cmd.params;
+	cmd_params->vlan_id = cpu_to_le16(vlan_id);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_clear_vlan_filters() - Clear all VLAN filters
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_clear_vlan_filters(struct fsl_mc_io *mc_io,
 			    uint32_t cmd_flags,
 			    uint16_t token)
@@ -792,6 +1464,19 @@ int dpni_clear_vlan_filters(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_set_rx_tc_dist() - Set Rx traffic class distribution configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @tc_id:	Traffic class selection (0-7)
+ * @cfg:	Traffic class distribution configuration
+ *
+ * warning: if 'dist_mode != DPNI_DIST_MODE_NONE', call dpkg_prepare_key_cfg()
+ *			first to prepare the key_cfg_iova parameter
+ *
+ * Return:	'0' on Success; error code otherwise.
+ */
 int dpni_set_rx_tc_dist(struct fsl_mc_io *mc_io,
 			uint32_t cmd_flags,
 			uint16_t token,
@@ -799,87 +1484,185 @@ int dpni_set_rx_tc_dist(struct fsl_mc_io *mc_io,
 			const struct dpni_rx_tc_dist_cfg *cfg)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_rx_tc_dist *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_RX_TC_DIST,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_RX_TC_DIST(cmd, tc_id, cfg);
+	cmd_params = (struct dpni_cmd_set_rx_tc_dist *)cmd.params;
+	cmd_params->dist_size = cpu_to_le16(cfg->dist_size);
+	cmd_params->tc_id = tc_id;
+	cmd_params->default_flow_id = cpu_to_le16(cfg->fs_cfg.default_flow_id);
+	cmd_params->key_cfg_iova = cpu_to_le64(cfg->key_cfg_iova);
+	dpni_set_field(cmd_params->flags,
+		       DIST_MODE,
+		       cfg->dist_mode);
+	dpni_set_field(cmd_params->flags,
+		       MISS_ACTION,
+		       cfg->fs_cfg.miss_action);
+	dpni_set_field(cmd_params->keep_hash_key,
+		       KEEP_HASH_KEY,
+		       cfg->fs_cfg.keep_hash_key);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
-int dpni_set_tx_confirmation_mode(struct fsl_mc_io	*mc_io,
-				  uint32_t		cmd_flags,
-			    uint16_t		token,
-			    enum dpni_confirmation_mode mode)
+/**
+ * dpni_set_tx_confirmation_mode() - Tx confirmation mode
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @mode:	Tx confirmation mode
+ *
+ * This function is useful only when 'DPNI_OPT_TX_CONF_DISABLED' is not
+ * selected at DPNI creation.
+ * Calling this function with 'mode' set to DPNI_CONF_DISABLE disables all
+ * transmit confirmation (including the private confirmation queues), regardless
+ * of previous settings; Note that in this case, Tx error frames are still
+ * enqueued to the general transmit errors queue.
+ * Calling this function with 'mode' set to DPNI_CONF_SINGLE switches all
+ * Tx confirmations to a shared Tx conf queue. 'index' field in dpni_get_queue
+ * command will be ignored.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_tx_confirmation_mode(struct fsl_mc_io *mc_io,
+				  uint32_t cmd_flags,
+				  uint16_t token,
+				  enum dpni_confirmation_mode mode)
 {
+	struct dpni_tx_confirmation_mode *cmd_params;
 	struct mc_command cmd = { 0 };
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TX_CONFIRMATION_MODE,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_TX_CONFIRMATION_MODE(cmd, mode);
+	cmd_params = (struct dpni_tx_confirmation_mode *)cmd.params;
+	cmd_params->confirmation_mode = mode;
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
-int dpni_set_congestion_notification(
-			struct fsl_mc_io	*mc_io,
-			uint32_t		cmd_flags,
-			uint16_t		token,
-			enum dpni_queue_type qtype,
-			uint8_t		tc_id,
+/**
+ * dpni_set_congestion_notification() - Set traffic class congestion
+ *	notification configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue - Rx, Tx and Tx confirm types are supported
+ * @tc_id:	Traffic class selection (0-7)
+ * @cfg:	congestion notification configuration
+ *
+ * Return:	'0' on Success; error code otherwise.
+ */
+int dpni_set_congestion_notification(struct fsl_mc_io *mc_io,
+				     uint32_t cmd_flags,
+				     uint16_t token,
+				     enum dpni_queue_type qtype,
+				     uint8_t tc_id,
 			const struct dpni_congestion_notification_cfg *cfg)
 {
+	struct dpni_cmd_set_congestion_notification *cmd_params;
 	struct mc_command cmd = { 0 };
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(
-			DPNI_CMDID_SET_CONGESTION_NOTIFICATION,
-			cmd_flags,
-			token);
-	DPNI_CMD_SET_CONGESTION_NOTIFICATION(cmd, qtype, tc_id, cfg);
+					DPNI_CMDID_SET_CONGESTION_NOTIFICATION,
+					cmd_flags,
+					token);
+	cmd_params = (struct dpni_cmd_set_congestion_notification *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc_id;
+	cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
+	cmd_params->notification_mode = cpu_to_le16(cfg->notification_mode);
+	cmd_params->dest_priority = cfg->dest_cfg.priority;
+	cmd_params->message_iova = cpu_to_le64(cfg->message_iova);
+	cmd_params->message_ctx = cpu_to_le64(cfg->message_ctx);
+	cmd_params->threshold_entry = cpu_to_le32(cfg->threshold_entry);
+	cmd_params->threshold_exit = cpu_to_le32(cfg->threshold_exit);
+	dpni_set_field(cmd_params->type_units,
+		       DEST_TYPE,
+		       cfg->dest_cfg.dest_type);
+	dpni_set_field(cmd_params->type_units,
+		       CONG_UNITS,
+		       cfg->units);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
 
-int dpni_get_congestion_notification(struct fsl_mc_io	*mc_io,
-				     uint32_t		cmd_flags,
-					   uint16_t		token,
+/**
+ * dpni_get_congestion_notification() - Get traffic class congestion
+ *	notification configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue - Rx, Tx and Tx confirm types are supported
+ * @tc_id:	Traffic class selection (0-7)
+ * @cfg:	congestion notification configuration
+ *
+ * Return:	'0' on Success; error code otherwise.
+ */
+int dpni_get_congestion_notification(struct fsl_mc_io *mc_io,
+				     uint32_t cmd_flags,
+				     uint16_t token,
 				     enum dpni_queue_type qtype,
-					   uint8_t		tc_id,
+				     uint8_t tc_id,
 				struct dpni_congestion_notification_cfg *cfg)
 {
+	struct dpni_rsp_get_congestion_notification *rsp_params;
+	struct dpni_cmd_get_congestion_notification *cmd_params;
 	struct mc_command cmd = { 0 };
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(
-			DPNI_CMDID_GET_CONGESTION_NOTIFICATION,
-			cmd_flags,
-			token);
-	DPNI_CMD_GET_CONGESTION_NOTIFICATION(cmd, qtype, tc_id);
+					DPNI_CMDID_GET_CONGESTION_NOTIFICATION,
+					cmd_flags,
+					token);
+	cmd_params = (struct dpni_cmd_get_congestion_notification *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc_id;
 
 	/* send command to mc*/
 	err = mc_send_command(mc_io, &cmd);
 	if (err)
 		return err;
 
-	DPNI_RSP_GET_CONGESTION_NOTIFICATION(cmd, cfg);
+	rsp_params = (struct dpni_rsp_get_congestion_notification *)cmd.params;
+	cfg->units = dpni_get_field(rsp_params->type_units, CONG_UNITS);
+	cfg->threshold_entry = le32_to_cpu(rsp_params->threshold_entry);
+	cfg->threshold_exit = le32_to_cpu(rsp_params->threshold_exit);
+	cfg->message_ctx = le64_to_cpu(rsp_params->message_ctx);
+	cfg->message_iova = le64_to_cpu(rsp_params->message_iova);
+	cfg->notification_mode = le16_to_cpu(rsp_params->notification_mode);
+	cfg->dest_cfg.dest_id = le32_to_cpu(rsp_params->dest_id);
+	cfg->dest_cfg.priority = rsp_params->dest_priority;
+	cfg->dest_cfg.dest_type = dpni_get_field(rsp_params->type_units,
+						 DEST_TYPE);
 
 	return 0;
 }
 
+/**
+ * dpni_get_api_version() - Get Data Path Network Interface API version
+ * @mc_io:  Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @major_ver:	Major version of data path network interface API
+ * @minor_ver:	Minor version of data path network interface API
+ *
+ * Return:  '0' on Success; Error code otherwise.
+ */
 int dpni_get_api_version(struct fsl_mc_io *mc_io,
 			 uint32_t cmd_flags,
-			   uint16_t *major_ver,
-			   uint16_t *minor_ver)
+			 uint16_t *major_ver,
+			 uint16_t *minor_ver)
 {
+	struct dpni_rsp_get_api_version *rsp_params;
 	struct mc_command cmd = { 0 };
 	int err;
 
@@ -891,87 +1674,177 @@ int dpni_get_api_version(struct fsl_mc_io *mc_io,
 	if (err)
 		return err;
 
-	DPNI_RSP_GET_API_VERSION(cmd, *major_ver, *minor_ver);
+	rsp_params = (struct dpni_rsp_get_api_version *)cmd.params;
+	*major_ver = le16_to_cpu(rsp_params->major);
+	*minor_ver = le16_to_cpu(rsp_params->minor);
 
 	return 0;
 }
 
+/**
+ * dpni_set_queue() - Set queue parameters
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue - all queue types are supported, although
+ *		the command is ignored for Tx
+ * @tc:		Traffic class, in range 0 to NUM_TCS - 1
+ * @index:	Selects the specific queue out of the set allocated for the
+ *		same TC. Value must be in range 0 to NUM_QUEUES - 1
+ * @options:	A combination of DPNI_QUEUE_OPT_ values that control what
+ *		configuration options are set on the queue
+ * @queue:	Queue structure
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_set_queue(struct fsl_mc_io *mc_io,
 		   uint32_t cmd_flags,
-		     uint16_t token,
+		   uint16_t token,
 		   enum dpni_queue_type qtype,
-			 uint8_t tc,
-			 uint8_t index,
+		   uint8_t tc,
+		   uint8_t index,
 		   uint8_t options,
-		     const struct dpni_queue *queue)
+		   const struct dpni_queue *queue)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_queue *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_QUEUE,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_QUEUE(cmd, qtype, tc, index, options, queue);
-
-	/* send command to mc*/
+	cmd_params = (struct dpni_cmd_set_queue *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc;
+	cmd_params->index = index;
+	cmd_params->options = options;
+	cmd_params->dest_id = cpu_to_le32(queue->destination.id);
+	cmd_params->dest_prio = queue->destination.priority;
+	dpni_set_field(cmd_params->flags, DEST_TYPE, queue->destination.type);
+	dpni_set_field(cmd_params->flags, STASH_CTRL, queue->flc.stash_control);
+	dpni_set_field(cmd_params->flags, HOLD_ACTIVE,
+		       queue->destination.hold_active);
+	cmd_params->flc = cpu_to_le64(queue->flc.value);
+	cmd_params->user_context = cpu_to_le64(queue->user_context);
+
+	/* send command to mc */
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_get_queue() - Get queue parameters
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue - all queue types are supported
+ * @tc:		Traffic class, in range 0 to NUM_TCS - 1
+ * @index:	Selects the specific queue out of the set allocated for the
+ *		same TC. Value must be in range 0 to NUM_QUEUES - 1
+ * @queue:	Queue configuration structure
+ * @qid:	Queue identification
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_queue(struct fsl_mc_io *mc_io,
 		   uint32_t cmd_flags,
-		     uint16_t token,
+		   uint16_t token,
 		   enum dpni_queue_type qtype,
-			 uint8_t tc,
-			 uint8_t index,
+		   uint8_t tc,
+		   uint8_t index,
 		   struct dpni_queue *queue,
 		   struct dpni_queue_id *qid)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_get_queue *cmd_params;
+	struct dpni_rsp_get_queue *rsp_params;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_QUEUE,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_GET_QUEUE(cmd, qtype, tc, index);
+	cmd_params = (struct dpni_cmd_get_queue *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc;
+	cmd_params->index = index;
 
-	/* send command to mc*/
+	/* send command to mc */
 	err = mc_send_command(mc_io, &cmd);
 	if (err)
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_QUEUE(cmd, queue, qid);
+	rsp_params = (struct dpni_rsp_get_queue *)cmd.params;
+	queue->destination.id = le32_to_cpu(rsp_params->dest_id);
+	queue->destination.priority = rsp_params->dest_prio;
+	queue->destination.type = dpni_get_field(rsp_params->flags,
+						     DEST_TYPE);
+	queue->flc.stash_control = dpni_get_field(rsp_params->flags,
+						  STASH_CTRL);
+	queue->destination.hold_active = dpni_get_field(rsp_params->flags,
+							HOLD_ACTIVE);
+	queue->flc.value = le64_to_cpu(rsp_params->flc);
+	queue->user_context = le64_to_cpu(rsp_params->user_context);
+	qid->fqid = le32_to_cpu(rsp_params->fqid);
+	qid->qdbin = le16_to_cpu(rsp_params->qdbin);
 
 	return 0;
 }
 
+/**
+ * dpni_get_statistics() - Get DPNI statistics
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @page:	Selects the statistics page to retrieve, see
+ *		DPNI_GET_STATISTICS output. Pages are numbered 0 to 2.
+ * @param:  Custom parameter for some pages used to select
+ *		a certain statistic source, for example the TC.
+ * @stat:	Structure containing the statistics
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_statistics(struct fsl_mc_io *mc_io,
 			uint32_t cmd_flags,
 			uint16_t token,
 			uint8_t page,
+			uint8_t param,
 			union dpni_statistics *stat)
 {
 	struct mc_command cmd = { 0 };
-	int err;
+	struct dpni_cmd_get_statistics *cmd_params;
+	struct dpni_rsp_get_statistics *rsp_params;
+	int i, err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_STATISTICS,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_GET_STATISTICS(cmd, page);
+	cmd_params = (struct dpni_cmd_get_statistics *)cmd.params;
+	cmd_params->page_number = page;
+	cmd_params->param = param;
 
-	/* send command to mc*/
+	/* send command to mc */
 	err = mc_send_command(mc_io, &cmd);
 	if (err)
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_STATISTICS(cmd, stat);
+	rsp_params = (struct dpni_rsp_get_statistics *)cmd.params;
+	for (i = 0; i < DPNI_STATISTICS_CNT; i++)
+		stat->raw.counter[i] = le64_to_cpu(rsp_params->counter[i]);
 
 	return 0;
 }
 
+/**
+ * dpni_reset_statistics() - Clears DPNI statistics
+ * @mc_io:		Pointer to MC portal's I/O object
+ * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:		Token of DPNI object
+ *
+ * Return:  '0' on Success; Error code otherwise.
+ */
 int dpni_reset_statistics(struct fsl_mc_io *mc_io,
 			  uint32_t cmd_flags,
 		     uint16_t token)
@@ -987,52 +1860,117 @@ int dpni_reset_statistics(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_set_taildrop() - Set taildrop per queue or TC
+ *
+ * Setting a per-TC taildrop (cg_point = DPNI_CP_GROUP) will reset any current
+ * congestion notification or early drop (WRED) configuration previously applied
+ * to the same TC.
+ *
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cg_point:	Congestion point, DPNI_CP_QUEUE is only supported in
+ *		combination with DPNI_QUEUE_RX.
+ * @q_type:	Queue type, can be DPNI_QUEUE_RX or DPNI_QUEUE_TX.
+ * @tc:		Traffic class to apply this taildrop to
+ * @q_index:	Index of the queue if the DPNI supports multiple queues for
+ *		traffic distribution.
+ *		Ignored if CONGESTION_POINT is not DPNI_CP_QUEUE.
+ * @taildrop:	Taildrop structure
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_set_taildrop(struct fsl_mc_io *mc_io,
 		      uint32_t cmd_flags,
 		      uint16_t token,
 		      enum dpni_congestion_point cg_point,
-		      enum dpni_queue_type q_type,
+		      enum dpni_queue_type qtype,
 		      uint8_t tc,
-		      uint8_t q_index,
+		      uint8_t index,
 		      struct dpni_taildrop *taildrop)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_set_taildrop *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_TAILDROP,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_SET_TAILDROP(cmd, cg_point, q_type, tc, q_index, taildrop);
-
-	/* send command to mc*/
+	cmd_params = (struct dpni_cmd_set_taildrop *)cmd.params;
+	cmd_params->congestion_point = cg_point;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc;
+	cmd_params->index = index;
+	cmd_params->units = taildrop->units;
+	cmd_params->threshold = cpu_to_le32(taildrop->threshold);
+	dpni_set_field(cmd_params->enable_oal_lo, ENABLE, taildrop->enable);
+	dpni_set_field(cmd_params->enable_oal_lo, OAL_LO, taildrop->oal);
+	dpni_set_field(cmd_params->oal_hi,
+		       OAL_HI,
+		       taildrop->oal >> DPNI_OAL_LO_SIZE);
+
+	/* send command to mc */
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_get_taildrop() - Get taildrop information
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cg_point:	Congestion point
+ * @q_type:	Queue type on which the taildrop is configured.
+ *		Only Rx queues are supported for now
+ * @tc:		Traffic class to apply this taildrop to
+ * @q_index:	Index of the queue if the DPNI supports multiple queues for
+ *		traffic distribution. Ignored if CONGESTION_POINT is not 0.
+ * @taildrop:	Taildrop structure
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
 int dpni_get_taildrop(struct fsl_mc_io *mc_io,
 		      uint32_t cmd_flags,
-		     uint16_t token,
-			 enum dpni_congestion_point cg_point,
-			 enum dpni_queue_type q_type,
-			 uint8_t tc,
-			 uint8_t q_index,
-			 struct dpni_taildrop *taildrop)
+		      uint16_t token,
+		      enum dpni_congestion_point cg_point,
+		      enum dpni_queue_type qtype,
+		      uint8_t tc,
+		      uint8_t index,
+		      struct dpni_taildrop *taildrop)
 {
 	struct mc_command cmd = { 0 };
+	struct dpni_cmd_get_taildrop *cmd_params;
+	struct dpni_rsp_get_taildrop *rsp_params;
+	uint8_t oal_lo, oal_hi;
 	int err;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_TAILDROP,
 					  cmd_flags,
 					  token);
-	DPNI_CMD_GET_TAILDROP(cmd, cg_point, q_type, tc, q_index);
+	cmd_params = (struct dpni_cmd_get_taildrop *)cmd.params;
+	cmd_params->congestion_point = cg_point;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc;
+	cmd_params->index = index;
 
-	/* send command to mc*/
+	/* send command to mc */
 	err = mc_send_command(mc_io, &cmd);
 	if (err)
 		return err;
 
 	/* retrieve response parameters */
-	DPNI_RSP_GET_TAILDROP(cmd, taildrop);
+	rsp_params = (struct dpni_rsp_get_taildrop *)cmd.params;
+	taildrop->enable = dpni_get_field(rsp_params->enable_oal_lo, ENABLE);
+	taildrop->units = rsp_params->units;
+	taildrop->threshold = le32_to_cpu(rsp_params->threshold);
+	oal_lo = dpni_get_field(rsp_params->enable_oal_lo, OAL_LO);
+	oal_hi = dpni_get_field(rsp_params->oal_hi, OAL_HI);
+	taildrop->oal = oal_hi << DPNI_OAL_LO_SIZE | oal_lo;
+
+	/* Fill the first 4 bits, 'oal' is a 2's complement value of 12 bits */
+	if (taildrop->oal >= 0x0800)
+		taildrop->oal |= 0xF000;
 
 	return 0;
 }
diff --git a/drivers/net/dpaa2/mc/fsl_dpkg.h b/drivers/net/dpaa2/mc/fsl_dpkg.h
index 2391e401..7f46bafb 100644
--- a/drivers/net/dpaa2/mc/fsl_dpkg.h
+++ b/drivers/net/dpaa2/mc/fsl_dpkg.h
@@ -5,7 +5,7 @@
  *   BSD LICENSE
  *
  * Copyright 2013-2015 Freescale Semiconductor Inc.
- * Copyright 2016 NXP.
+ * Copyright 2016-2017 NXP.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -93,6 +93,15 @@ struct dpkg_mask {
 	uint8_t offset;
 };
 
+/* Macros for accessing command fields smaller than 1byte */
+#define DPKG_MASK(field)	\
+	GENMASK(DPKG_##field##_SHIFT + DPKG_##field##_SIZE - 1, \
+		DPKG_##field##_SHIFT)
+#define dpkg_set_field(var, field, val)	\
+	((var) |= (((val) << DPKG_##field##_SHIFT) & DPKG_MASK(field)))
+#define dpkg_get_field(var, field)	\
+	(((var) & DPKG_MASK(field)) >> DPKG_##field##_SHIFT)
+
 /**
  * struct dpkg_extract - A structure for defining a single extraction
  * @type: Determines how the union below is interpreted:
@@ -136,12 +145,12 @@ struct dpkg_extract {
 		 */
 
 		struct {
-			enum net_prot			prot;
+			enum net_prot prot;
 			enum dpkg_extract_from_hdr_type type;
-			uint32_t			field;
-			uint8_t				size;
-			uint8_t				offset;
-			uint8_t				hdr_index;
+			uint32_t field;
+			uint8_t size;
+			uint8_t offset;
+			uint8_t hdr_index;
 		} from_hdr;
 		/**
 		 * struct from_data
@@ -166,8 +175,8 @@ struct dpkg_extract {
 		} from_parse;
 	} extract;
 
-	uint8_t			num_of_byte_masks;
-	struct dpkg_mask	masks[DPKG_NUM_OF_MASKS];
+	uint8_t num_of_byte_masks;
+	struct dpkg_mask masks[DPKG_NUM_OF_MASKS];
 };
 
 /**
@@ -181,4 +190,48 @@ struct dpkg_profile_cfg {
 	struct dpkg_extract extracts[DPKG_MAX_NUM_OF_EXTRACTS];
 };
 
+/* dpni_set_rx_tc_dist extension (structure of the DMA-able memory at
+ * key_cfg_iova)
+ */
+struct dpni_mask_cfg {
+	uint8_t mask;
+	uint8_t offset;
+};
+
+#define DPKG_EFH_TYPE_SHIFT		0
+#define DPKG_EFH_TYPE_SIZE		4
+#define DPKG_EXTRACT_TYPE_SHIFT		0
+#define DPKG_EXTRACT_TYPE_SIZE		4
+
+struct dpni_dist_extract {
+	/* word 0 */
+	uint8_t prot;
+	/* EFH type stored in the 4 least significant bits */
+	uint8_t efh_type;
+	uint8_t size;
+	uint8_t offset;
+	uint32_t field;
+	/* word 1 */
+	uint8_t hdr_index;
+	uint8_t constant;
+	uint8_t num_of_repeats;
+	uint8_t num_of_byte_masks;
+	/* Extraction type is stored in the 4 LSBs */
+	uint8_t extract_type;
+	uint8_t pad[3];
+	/* word 2 */
+	struct dpni_mask_cfg masks[4];
+};
+
+struct dpni_ext_set_rx_tc_dist {
+	/* extension word 0 */
+	uint8_t num_extracts;
+	uint8_t pad[7];
+	/* words 1..25 */
+	struct dpni_dist_extract extracts[10];
+};
+
+int dpkg_prepare_key_cfg(const struct dpkg_profile_cfg *cfg,
+			 uint8_t *key_cfg_buf);
+
 #endif /* __FSL_DPKG_H_ */
diff --git a/drivers/net/dpaa2/mc/fsl_dpni.h b/drivers/net/dpaa2/mc/fsl_dpni.h
index 64db70dc..5227ea15 100644
--- a/drivers/net/dpaa2/mc/fsl_dpni.h
+++ b/drivers/net/dpaa2/mc/fsl_dpni.h
@@ -5,7 +5,7 @@
  *   BSD LICENSE
  *
  * Copyright 2013-2016 Freescale Semiconductor Inc.
- * Copyright 2016 NXP.
+ * Copyright 2016-2017 NXP.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -110,47 +110,19 @@ struct fsl_mc_io;
  */
 #define DPNI_OPT_NO_FS				0x000020
 
-/**
- * dpni_open() - Open a control session for the specified object
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @dpni_id:	DPNI unique ID
- * @token:	Returned token; use in subsequent API calls
- *
- * This function can be used to open a control session for an
- * already created object; an object may have been declared in
- * the DPL or by calling the dpni_create() function.
- * This function returns a unique authentication token,
- * associated with the specific object ID and the specific MC
- * portal; this token must be used in all subsequent commands for
- * this specific object.
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_open(struct fsl_mc_io	*mc_io,
-	      uint32_t		cmd_flags,
-	      int		dpni_id,
-	      uint16_t		*token);
+int dpni_open(struct fsl_mc_io *mc_io,
+	      uint32_t cmd_flags,
+	      int dpni_id,
+	      uint16_t *token);
 
-/**
- * dpni_close() - Close the control session of the object
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- *
- * After this function is called, no further operations are
- * allowed on the object without opening a new control session.
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_close(struct fsl_mc_io	*mc_io,
-	       uint32_t		cmd_flags,
-	       uint16_t		token);
+int dpni_close(struct fsl_mc_io *mc_io,
+	       uint32_t cmd_flags,
+	       uint16_t token);
 
 /**
  * struct dpni_cfg - Structure representing DPNI configuration
- * @mac_addr: Primary MAC address
- * @adv: Advanced parameters; default is all zeros;
+ * @mac_addr:	Primary MAC address
+ * @adv:	Advanced parameters; default is all zeros;
  *		use this structure to change default settings
  */
 struct dpni_cfg {
@@ -217,141 +189,111 @@ struct dpni_cfg {
 	uint8_t  qos_entries;
 };
 
-/**
- * dpni_create() - Create the DPNI object
- * @mc_io:	Pointer to MC portal's I/O object
- * @dprc_token:	Parent container token; '0' for default container
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @cfg:	Configuration structure
- * @obj_id: returned object id
- *
- * Create the DPNI object, allocate required resources and
- * perform required initialization.
- *
- * The object can be created either by declaring it in the
- * DPL file, or by calling this function.
- *
- * The function accepts an authentication token of a parent
- * container that this object should be assigned to. The token
- * can be '0' so the object will be assigned to the default container.
- * The newly created object can be opened with the returned
- * object id and using the container's associated tokens and MC portals.
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_create(struct fsl_mc_io	*mc_io,
-		uint16_t		dprc_token,
-		uint32_t		cmd_flags,
-		const struct dpni_cfg	*cfg,
-		uint32_t		*obj_id);
+int dpni_create(struct fsl_mc_io *mc_io,
+		uint16_t dprc_token,
+		uint32_t cmd_flags,
+		const struct dpni_cfg *cfg,
+		uint32_t *obj_id);
 
-/**
- * dpni_destroy() - Destroy the DPNI object and release all its resources.
- * @mc_io:	Pointer to MC portal's I/O object
- * @dprc_token: Parent container token; '0' for default container
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @object_id:	The object id; it must be a valid id within the container that
- * created this object;
- *
- * The function accepts the authentication token of the parent container that
- * created the object (not the one that currently owns the object). The object
- * is searched within parent using the provided 'object_id'.
- * All tokens to the object must be closed before calling destroy.
- *
- * Return:	'0' on Success; error code otherwise.
- */
-int dpni_destroy(struct fsl_mc_io	*mc_io,
-		 uint16_t		dprc_token,
-		 uint32_t		cmd_flags,
-		 uint32_t		object_id);
+int dpni_destroy(struct fsl_mc_io *mc_io,
+		 uint16_t dprc_token,
+		 uint32_t cmd_flags,
+		 uint32_t object_id);
 
 /**
  * struct dpni_pools_cfg - Structure representing buffer pools configuration
- * @num_dpbp: Number of DPBPs
- * @pools: Array of buffer pools parameters; The number of valid entries
- *	must match 'num_dpbp' value
+ * @num_dpbp:	Number of DPBPs
+ * @pools:	Array of buffer pools parameters; The number of valid entries
+ *		must match 'num_dpbp' value
  */
 struct dpni_pools_cfg {
-	uint8_t		num_dpbp;
+	uint8_t num_dpbp;
 	/**
 	 * struct pools - Buffer pools parameters
 	 * @dpbp_id: DPBP object ID
+	 * @priority: priority mask that indicates TC's used with this buffer.
+	 * I set to 0x00 MC will assume value 0xff.
 	 * @buffer_size: Buffer size
 	 * @backup_pool: Backup pool
 	 */
 	struct {
 		int		dpbp_id;
+		uint8_t		priority_mask;
 		uint16_t	buffer_size;
 		int		backup_pool;
 	} pools[DPNI_MAX_DPBP];
 };
 
-/**
- * dpni_set_pools() - Set buffer pools configuration
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @cfg:	Buffer pools configuration
- *
- * mandatory for DPNI operation
- * warning:Allowed only when DPNI is disabled
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_set_pools(struct fsl_mc_io		*mc_io,
-		   uint32_t			cmd_flags,
-		   uint16_t			token,
-		   const struct dpni_pools_cfg	*cfg);
+int dpni_set_pools(struct fsl_mc_io *mc_io,
+		   uint32_t cmd_flags,
+		   uint16_t token,
+		   const struct dpni_pools_cfg *cfg);
 
-/**
- * dpni_enable() - Enable the DPNI, allow sending and receiving frames.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:		Token of DPNI object
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_enable(struct fsl_mc_io	*mc_io,
-		uint32_t		cmd_flags,
-		uint16_t		token);
+int dpni_enable(struct fsl_mc_io *mc_io,
+		uint32_t cmd_flags,
+		uint16_t token);
+
+int dpni_disable(struct fsl_mc_io *mc_io,
+		 uint32_t cmd_flags,
+		 uint16_t token);
+
+int dpni_is_enabled(struct fsl_mc_io *mc_io,
+		    uint32_t cmd_flags,
+		    uint16_t token,
+		    int *en);
+
+int dpni_reset(struct fsl_mc_io *mc_io,
+	       uint32_t cmd_flags,
+	       uint16_t token);
 
 /**
- * dpni_disable() - Disable the DPNI, stop sending and receiving frames.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- *
- * Return:	'0' on Success; Error code otherwise.
+ * DPNI IRQ Index and Events
  */
-int dpni_disable(struct fsl_mc_io	*mc_io,
-		 uint32_t		cmd_flags,
-		 uint16_t		token);
 
 /**
- * dpni_is_enabled() - Check if the DPNI is enabled.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @en:		Returns '1' if object is enabled; '0' otherwise
- *
- * Return:	'0' on Success; Error code otherwise.
+ * IRQ index
  */
-int dpni_is_enabled(struct fsl_mc_io	*mc_io,
-		    uint32_t		cmd_flags,
-		    uint16_t		token,
-		    int			*en);
-
+#define DPNI_IRQ_INDEX				0
 /**
- * dpni_reset() - Reset the DPNI, returns the object to initial state.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- *
- * Return:	'0' on Success; Error code otherwise.
+ * IRQ event - indicates a change in link state
  */
-int dpni_reset(struct fsl_mc_io	*mc_io,
-	       uint32_t		cmd_flags,
-	       uint16_t		token);
+#define DPNI_IRQ_EVENT_LINK_CHANGED		0x00000001
+
+int dpni_set_irq_enable(struct fsl_mc_io *mc_io,
+			uint32_t cmd_flags,
+			uint16_t token,
+			uint8_t irq_index,
+			uint8_t en);
+
+int dpni_get_irq_enable(struct fsl_mc_io *mc_io,
+			uint32_t cmd_flags,
+			uint16_t token,
+			uint8_t irq_index,
+			uint8_t *en);
+
+int dpni_set_irq_mask(struct fsl_mc_io *mc_io,
+		      uint32_t cmd_flags,
+		      uint16_t token,
+		      uint8_t irq_index,
+		      uint32_t mask);
+
+int dpni_get_irq_mask(struct fsl_mc_io *mc_io,
+		      uint32_t cmd_flags,
+		      uint16_t token,
+		      uint8_t irq_index,
+		      uint32_t *mask);
+
+int dpni_get_irq_status(struct fsl_mc_io *mc_io,
+			uint32_t cmd_flags,
+			uint16_t token,
+			uint8_t irq_index,
+			uint32_t *status);
+
+int dpni_clear_irq_status(struct fsl_mc_io *mc_io,
+			  uint32_t cmd_flags,
+			  uint16_t token,
+			  uint8_t irq_index,
+			  uint32_t status);
 
 /**
  * struct dpni_attr - Structure representing DPNI attributes
@@ -363,7 +305,8 @@ int dpni_reset(struct fsl_mc_io	*mc_io,
  *		DPNI_OPT_HAS_KEY_MASKING
  *		DPNI_OPT_NO_FS
  * @num_queues: Number of Tx and Rx queues used for traffic distribution.
- * @num_tcs: Number of traffic classes (TCs), reserved for the DPNI.
+ * @num_rx_tcs: Number of RX traffic classes (TCs), reserved for the DPNI.
+ * @num_tx_tcs: Number of TX traffic classes (TCs), reserved for the DPNI.
  * @mac_filter_entries: Number of entries in the MAC address filtering
  *		table.
  * @vlan_filter_entries: Number of entries in the VLAN address filtering
@@ -390,7 +333,8 @@ int dpni_reset(struct fsl_mc_io	*mc_io,
 struct dpni_attr {
 	uint32_t options;
 	uint8_t  num_queues;
-	uint8_t  num_tcs;
+	uint8_t  num_rx_tcs;
+	uint8_t  num_tx_tcs;
 	uint8_t  mac_filter_entries;
 	uint8_t  vlan_filter_entries;
 	uint8_t  qos_entries;
@@ -400,19 +344,10 @@ struct dpni_attr {
 	uint16_t wriop_version;
 };
 
-/**
- * dpni_get_attributes() - Retrieve DPNI attributes.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @attr:	Object's attributes
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_get_attributes(struct fsl_mc_io	*mc_io,
-			uint32_t		cmd_flags,
-			uint16_t		token,
-			struct dpni_attr	*attr);
+int dpni_get_attributes(struct fsl_mc_io *mc_io,
+			uint32_t cmd_flags,
+			uint16_t token,
+			struct dpni_attr *attr);
 
 /**
  * DPNI errors
@@ -457,33 +392,22 @@ enum dpni_error_action {
 
 /**
  * struct dpni_error_cfg - Structure representing DPNI errors treatment
- * @errors: Errors mask; use 'DPNI_ERROR__<X>
- * @error_action: The desired action for the errors mask
- * @set_frame_annotation: Set to '1' to mark the errors in frame annotation
- *		status (FAS); relevant only for the non-discard action
+ * @errors:			Errors mask; use 'DPNI_ERROR__<X>
+ * @error_action:		The desired action for the errors mask
+ * @set_frame_annotation:	Set to '1' to mark the errors in frame
+ *				annotation status (FAS); relevant only
+ *				for the non-discard action
  */
 struct dpni_error_cfg {
-	uint32_t		errors;
-	enum dpni_error_action	error_action;
-	int			set_frame_annotation;
+	uint32_t errors;
+	enum dpni_error_action error_action;
+	int set_frame_annotation;
 };
 
-/**
- * dpni_set_errors_behavior() - Set errors behavior
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @cfg:	Errors configuration
- *
- * this function may be called numerous times with different
- * error masks
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_set_errors_behavior(struct fsl_mc_io		*mc_io,
-			     uint32_t			cmd_flags,
-			     uint16_t			token,
-			     struct dpni_error_cfg	*cfg);
+int dpni_set_errors_behavior(struct fsl_mc_io *mc_io,
+			     uint32_t cmd_flags,
+			     uint16_t token,
+			     struct dpni_error_cfg *cfg);
 
 /**
  * DPNI buffer layout modification options
@@ -520,25 +444,26 @@ int dpni_set_errors_behavior(struct fsl_mc_io		*mc_io,
 
 /**
  * struct dpni_buffer_layout - Structure representing DPNI buffer layout
- * @options: Flags representing the suggested modifications to the buffer
- *		layout; Use any combination of 'DPNI_BUF_LAYOUT_OPT_<X>' flags
- * @pass_timestamp: Pass timestamp value
- * @pass_parser_result: Pass parser results
- * @pass_frame_status: Pass frame status
- * @private_data_size: Size kept for private data (in bytes)
- * @data_align: Data alignment
- * @data_head_room: Data head room
- * @data_tail_room: Data tail room
+ * @options:		Flags representing the suggested modifications to the
+ *			buffer layout;
+ *			Use any combination of 'DPNI_BUF_LAYOUT_OPT_<X>' flags
+ * @pass_timestamp:	Pass timestamp value
+ * @pass_parser_result:	Pass parser results
+ * @pass_frame_status:	Pass frame status
+ * @private_data_size:	Size kept for private data (in bytes)
+ * @data_align:		Data alignment
+ * @data_head_room:	Data head room
+ * @data_tail_room:	Data tail room
  */
 struct dpni_buffer_layout {
-	uint32_t	options;
-	int		pass_timestamp;
-	int		pass_parser_result;
-	int		pass_frame_status;
-	uint16_t	private_data_size;
-	uint16_t	data_align;
-	uint16_t	data_head_room;
-	uint16_t	data_tail_room;
+	uint32_t options;
+	int pass_timestamp;
+	int pass_parser_result;
+	int pass_frame_status;
+	uint16_t private_data_size;
+	uint16_t data_align;
+	uint16_t data_head_room;
+	uint16_t data_tail_room;
 };
 
 /**
@@ -547,45 +472,24 @@ struct dpni_buffer_layout {
  * @DPNI_QUEUE_TX: Tx queue
  * @DPNI_QUEUE_TX_CONFIRM: Tx confirmation queue
  * @DPNI_QUEUE_RX_ERR: Rx error queue
- */enum dpni_queue_type {
+ */
+enum dpni_queue_type {
 	DPNI_QUEUE_RX,
 	DPNI_QUEUE_TX,
 	DPNI_QUEUE_TX_CONFIRM,
 	DPNI_QUEUE_RX_ERR,
 };
 
-/**
- * dpni_get_buffer_layout() - Retrieve buffer layout attributes.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @qtype:	Type of queue to get the layout from
- * @layout:	Returns buffer layout attributes
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_get_buffer_layout(struct fsl_mc_io		*mc_io,
-			   uint32_t			cmd_flags,
-			   uint16_t			token,
-			   enum dpni_queue_type		qtype,
-			   struct dpni_buffer_layout	*layout);
+int dpni_get_buffer_layout(struct fsl_mc_io *mc_io,
+			   uint32_t cmd_flags,
+			   uint16_t token,
+			   enum dpni_queue_type qtype,
+			   struct dpni_buffer_layout *layout);
 
-/**
- * dpni_set_buffer_layout() - Set buffer layout configuration.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @qtype:	Type of queue to set layout on
- * @layout:	Buffer layout configuration
- *
- * Return:	'0' on Success; Error code otherwise.
- *
- * @warning	Allowed only when DPNI is disabled
- */
-int dpni_set_buffer_layout(struct fsl_mc_io		   *mc_io,
-			   uint32_t			   cmd_flags,
-			   uint16_t			   token,
-			   enum dpni_queue_type		   qtype,
+int dpni_set_buffer_layout(struct fsl_mc_io *mc_io,
+			   uint32_t cmd_flags,
+			   uint16_t token,
+			   enum dpni_queue_type qtype,
 			   const struct dpni_buffer_layout *layout);
 
 /**
@@ -594,72 +498,39 @@ int dpni_set_buffer_layout(struct fsl_mc_io		   *mc_io,
  * @DPNI_OFF_RX_L4_CSUM: Rx L4 checksum validation
  * @DPNI_OFF_TX_L3_CSUM: Tx L3 checksum generation
  * @DPNI_OFF_TX_L4_CSUM: Tx L4 checksum generation
+ * @DPNI_OPT_FLCTYPE_HASH: flow context will be generated by WRIOP for AIOP or
+ *			   for CPU
  */
 enum dpni_offload {
 	DPNI_OFF_RX_L3_CSUM,
 	DPNI_OFF_RX_L4_CSUM,
 	DPNI_OFF_TX_L3_CSUM,
 	DPNI_OFF_TX_L4_CSUM,
+	DPNI_FLCTYPE_HASH,
 };
 
-/**
- * dpni_set_offload() - Set DPNI offload configuration.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @type:	Type of DPNI offload
- * @config:	Offload configuration.
- *			For checksum offloads, non-zero value enables
- *			the offload.
- *
- * Return:	'0' on Success; Error code otherwise.
- *
- * @warning	Allowed only when DPNI is disabled
- */
 int dpni_set_offload(struct fsl_mc_io *mc_io,
 		     uint32_t cmd_flags,
 		     uint16_t token,
 		     enum dpni_offload type,
 		     uint32_t config);
 
-/**
- * dpni_get_offload() - Get DPNI offload configuration.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @type:	Type of DPNI offload
- * @config:	Offload configuration.
- *			For checksum offloads, a value of 1 indicates that the
- *			offload is enabled.
- *
- * Return:	'0' on Success; Error code otherwise.
- *
- * @warning	Allowed only when DPNI is disabled
- */
 int dpni_get_offload(struct fsl_mc_io *mc_io,
 		     uint32_t cmd_flags,
 		     uint16_t token,
 		     enum dpni_offload type,
 		     uint32_t *config);
 
-/**
- * dpni_get_qdid() - Get the Queuing Destination ID (QDID) that should be used
- *			for enqueue operations
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @qtype:	Type of queue to get QDID for.  For applications lookig to
- *		transmit traffic this should be set to DPNI_QUEUE_TX
- * @qdid:	Returned virtual QDID value that should be used as an argument
- *			in all enqueue operations
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_get_qdid(struct fsl_mc_io	*mc_io,
-		  uint32_t		cmd_flags,
-		  uint16_t		token,
-		  enum dpni_queue_type	qtype,
-		  uint16_t		*qdid);
+int dpni_get_qdid(struct fsl_mc_io *mc_io,
+		  uint32_t cmd_flags,
+		  uint16_t token,
+		  enum dpni_queue_type qtype,
+		  uint16_t *qdid);
+
+int dpni_get_tx_data_offset(struct fsl_mc_io *mc_io,
+			    uint32_t cmd_flags,
+			    uint16_t token,
+			    uint16_t *data_offset);
 
 #define DPNI_STATISTICS_CNT		7
 
@@ -715,6 +586,23 @@ union dpni_statistics {
 		uint64_t egress_confirmed_frames;
 	} page_2;
 	/**
+	 * struct page_3 - Page_3 statistics structure with values for the
+	 *			selected TC
+	 * @ceetm_dequeue_bytes: Cumulative count of the number of bytes
+	 *			dequeued
+	 * @ceetm_dequeue_frames: Cumulative count of the number of frames
+	 *			dequeued
+	 * @ceetm_reject_bytes: Cumulative count of the number of bytes in all
+	 *			frames whose enqueue was rejected
+	 * @ceetm_reject_frames: Cumulative count of all frame enqueues rejected
+	 */
+	struct {
+		uint64_t ceetm_dequeue_bytes;
+		uint64_t ceetm_dequeue_frames;
+		uint64_t ceetm_reject_bytes;
+		uint64_t ceetm_reject_frames;
+	} page_3;
+	/**
 	 * struct raw - raw statistics structure, used to index counters
 	 */
 	struct {
@@ -738,6 +626,10 @@ union dpni_statistics {
  * Enable a-symmetric pause frames
  */
 #define DPNI_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
+/**
+ * Enable priority flow control pause frames
+ */
+#define DPNI_LINK_OPT_PFC_PAUSE	0x0000000000000010ULL
 
 /**
  * struct - Structure representing DPNI link configuration
@@ -749,278 +641,117 @@ struct dpni_link_cfg {
 	uint64_t options;
 };
 
-/**
- * dpni_set_link_cfg() - set the link configuration.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @cfg:	Link configuration
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_set_link_cfg(struct fsl_mc_io			*mc_io,
-		      uint32_t				cmd_flags,
-		      uint16_t				token,
-		      const struct dpni_link_cfg	*cfg);
+int dpni_set_link_cfg(struct fsl_mc_io *mc_io,
+		      uint32_t cmd_flags,
+		      uint16_t token,
+		      const struct dpni_link_cfg *cfg);
 
 /**
  * struct dpni_link_state - Structure representing DPNI link state
- * @rate: Rate
- * @options: Mask of available options; use 'DPNI_LINK_OPT_<X>' values
- * @up: Link state; '0' for down, '1' for up
+ * @rate:	Rate
+ * @options:	Mask of available options; use 'DPNI_LINK_OPT_<X>' values
+ * @up:		Link state; '0' for down, '1' for up
  */
 struct dpni_link_state {
-	uint32_t	rate;
-	uint64_t	options;
-	int		up;
+	uint32_t rate;
+	uint64_t options;
+	int up;
 };
 
-/**
- * dpni_get_link_state() - Return the link state (either up or down)
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @state:	Returned link state;
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_get_link_state(struct fsl_mc_io	*mc_io,
-			uint32_t		cmd_flags,
-			uint16_t		token,
-			struct dpni_link_state	*state);
-
-/**
- * dpni_set_max_frame_length() - Set the maximum received frame length.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @max_frame_length:	Maximum received frame length (in
- *				bytes); frame is discarded if its
- *				length exceeds this value
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_set_max_frame_length(struct fsl_mc_io	*mc_io,
-			      uint32_t		cmd_flags,
-			      uint16_t		token,
-			      uint16_t		max_frame_length);
-
-/**
- * dpni_get_max_frame_length() - Get the maximum received frame length.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @max_frame_length:	Maximum received frame length (in
- *				bytes); frame is discarded if its
- *				length exceeds this value
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_get_max_frame_length(struct fsl_mc_io	*mc_io,
-			      uint32_t		cmd_flags,
-			      uint16_t		token,
-			      uint16_t		*max_frame_length);
-
-/**
- * dpni_set_multicast_promisc() - Enable/disable multicast promiscuous mode
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @en:		Set to '1' to enable; '0' to disable
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_set_multicast_promisc(struct fsl_mc_io	*mc_io,
-			       uint32_t		cmd_flags,
-			       uint16_t		token,
-			       int		en);
-
-/**
- * dpni_get_multicast_promisc() - Get multicast promiscuous mode
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @en:		Returns '1' if enabled; '0' otherwise
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_get_multicast_promisc(struct fsl_mc_io	*mc_io,
-			       uint32_t		cmd_flags,
-			       uint16_t		token,
-			       int		*en);
-
-/**
- * dpni_set_unicast_promisc() - Enable/disable unicast promiscuous mode
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @en:		Set to '1' to enable; '0' to disable
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_set_unicast_promisc(struct fsl_mc_io	*mc_io,
-			     uint32_t		cmd_flags,
-			     uint16_t		token,
-			     int		en);
-
-/**
- * dpni_get_unicast_promisc() - Get unicast promiscuous mode
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @en:		Returns '1' if enabled; '0' otherwise
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_get_unicast_promisc(struct fsl_mc_io	*mc_io,
-			     uint32_t		cmd_flags,
-			     uint16_t		token,
-			     int		*en);
-
-/**
- * dpni_set_primary_mac_addr() - Set the primary MAC address
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @mac_addr:	MAC address to set as primary address
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_set_primary_mac_addr(struct fsl_mc_io	*mc_io,
-			      uint32_t		cmd_flags,
-			      uint16_t		token,
-			      const uint8_t	mac_addr[6]);
-
-/**
- * dpni_get_primary_mac_addr() - Get the primary MAC address
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @mac_addr:	Returned MAC address
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_get_primary_mac_addr(struct fsl_mc_io	*mc_io,
-			      uint32_t		cmd_flags,
-			      uint16_t		token,
-			      uint8_t		mac_addr[6]);
-
-/**
- * dpni_add_mac_addr() - Add MAC address filter
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @mac_addr:	MAC address to add
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_add_mac_addr(struct fsl_mc_io	*mc_io,
-		      uint32_t		cmd_flags,
-		      uint16_t		token,
-		      const uint8_t	mac_addr[6]);
-
-/**
- * dpni_remove_mac_addr() - Remove MAC address filter
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @mac_addr:	MAC address to remove
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_remove_mac_addr(struct fsl_mc_io	*mc_io,
-			 uint32_t		cmd_flags,
-			 uint16_t		token,
-			 const uint8_t		mac_addr[6]);
-
-/**
- * dpni_clear_mac_filters() - Clear all unicast and/or multicast MAC filters
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @unicast:	Set to '1' to clear unicast addresses
- * @multicast:	Set to '1' to clear multicast addresses
- *
- * The primary MAC address is not cleared by this operation.
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_clear_mac_filters(struct fsl_mc_io	*mc_io,
-			   uint32_t		cmd_flags,
-			   uint16_t		token,
-			   int			unicast,
-			   int			multicast);
-
-/**
- * dpni_get_port_mac_addr() - Retrieve MAC address associated to the physical
- *		port the DPNI is attached to
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @mac_addr:	MAC address of the physical port, if any, otherwise 0
- *
- * The primary MAC address is not modified by this operation.
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_get_port_mac_addr(struct fsl_mc_io	*mc_io,
-			   uint32_t		cmd_flags,
-			   uint16_t		token,
-			   uint8_t		mac_addr[6]);
+int dpni_get_link_state(struct fsl_mc_io *mc_io,
+			uint32_t cmd_flags,
+			uint16_t token,
+			struct dpni_link_state *state);
+
+int dpni_set_max_frame_length(struct fsl_mc_io *mc_io,
+			      uint32_t cmd_flags,
+			      uint16_t token,
+			      uint16_t max_frame_length);
+
+int dpni_get_max_frame_length(struct fsl_mc_io *mc_io,
+			      uint32_t cmd_flags,
+			      uint16_t token,
+			      uint16_t *max_frame_length);
+
+int dpni_set_mtu(struct fsl_mc_io *mc_io,
+		 uint32_t cmd_flags,
+		 uint16_t token,
+		 uint16_t mtu);
+
+int dpni_get_mtu(struct fsl_mc_io *mc_io,
+		 uint32_t cmd_flags,
+		 uint16_t token,
+		 uint16_t *mtu);
+
+int dpni_set_multicast_promisc(struct fsl_mc_io *mc_io,
+			       uint32_t cmd_flags,
+			       uint16_t token,
+			       int en);
+
+int dpni_get_multicast_promisc(struct fsl_mc_io *mc_io,
+			       uint32_t cmd_flags,
+			       uint16_t token,
+			       int *en);
+
+int dpni_set_unicast_promisc(struct fsl_mc_io *mc_io,
+			     uint32_t cmd_flags,
+			     uint16_t token,
+			     int en);
+
+int dpni_get_unicast_promisc(struct fsl_mc_io *mc_io,
+			     uint32_t cmd_flags,
+			     uint16_t token,
+			     int *en);
+
+int dpni_set_primary_mac_addr(struct fsl_mc_io *mc_io,
+			      uint32_t cmd_flags,
+			      uint16_t token,
+			      const uint8_t mac_addr[6]);
+
+int dpni_get_primary_mac_addr(struct fsl_mc_io *mc_io,
+			      uint32_t cmd_flags,
+			      uint16_t token,
+			      uint8_t mac_addr[6]);
+
+int dpni_add_mac_addr(struct fsl_mc_io *mc_io,
+		      uint32_t cmd_flags,
+		      uint16_t token,
+		      const uint8_t mac_addr[6]);
 
-/**
- * dpni_enable_vlan_filter() - Enable/disable VLAN filtering mode
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @en:		Set to '1' to enable; '0' to disable
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_enable_vlan_filter(struct fsl_mc_io	*mc_io,
-			    uint32_t		cmd_flags,
-			    uint16_t		token,
-			    int			en);
+int dpni_remove_mac_addr(struct fsl_mc_io *mc_io,
+			 uint32_t cmd_flags,
+			 uint16_t token,
+			 const uint8_t mac_addr[6]);
+
+int dpni_clear_mac_filters(struct fsl_mc_io *mc_io,
+			   uint32_t cmd_flags,
+			   uint16_t token,
+			   int unicast,
+			   int multicast);
+
+int dpni_get_port_mac_addr(struct fsl_mc_io *mc_io,
+			   uint32_t cmd_flags,
+			   uint16_t token,
+			   uint8_t mac_addr[6]);
+
+int dpni_enable_vlan_filter(struct fsl_mc_io *mc_io,
+			    uint32_t cmd_flags,
+			    uint16_t token,
+			    int en);
+
+int dpni_add_vlan_id(struct fsl_mc_io *mc_io,
+		     uint32_t cmd_flags,
+		     uint16_t token,
+		     uint16_t vlan_id);
 
-/**
- * dpni_add_vlan_id() - Add VLAN ID filter
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @vlan_id:	VLAN ID to add
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_add_vlan_id(struct fsl_mc_io	*mc_io,
-		     uint32_t		cmd_flags,
-		     uint16_t		token,
-		     uint16_t		vlan_id);
+int dpni_remove_vlan_id(struct fsl_mc_io *mc_io,
+			uint32_t cmd_flags,
+			uint16_t token,
+			uint16_t vlan_id);
 
-/**
- * dpni_remove_vlan_id() - Remove VLAN ID filter
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @vlan_id:	VLAN ID to remove
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_remove_vlan_id(struct fsl_mc_io	*mc_io,
-			uint32_t		cmd_flags,
-			uint16_t		token,
-			uint16_t		vlan_id);
-
-/**
- * dpni_clear_vlan_filters() - Clear all VLAN filters
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_clear_vlan_filters(struct fsl_mc_io	*mc_io,
-			    uint32_t		cmd_flags,
-			    uint16_t		token);
+int dpni_clear_vlan_filters(struct fsl_mc_io *mc_io,
+			    uint32_t cmd_flags,
+			    uint16_t token);
 
 /**
  * enum dpni_dist_mode - DPNI distribution mode
@@ -1050,27 +781,16 @@ enum dpni_fs_miss_action {
 
 /**
  * struct dpni_fs_tbl_cfg - Flow Steering table configuration
- * @miss_action: Miss action selection
- * @default_flow_id: Used when 'miss_action = DPNI_FS_MISS_EXPLICIT_FLOWID'
+ * @miss_action:	Miss action selection
+ * @default_flow_id:	Used when 'miss_action = DPNI_FS_MISS_EXPLICIT_FLOWID'
  */
 struct dpni_fs_tbl_cfg {
-	enum dpni_fs_miss_action	miss_action;
-	uint16_t			default_flow_id;
+	enum dpni_fs_miss_action miss_action;
+	uint16_t default_flow_id;
+	char keep_hash_key;
 };
 
 /**
- * dpni_prepare_key_cfg() - function prepare extract parameters
- * @cfg: defining a full Key Generation profile (rule)
- * @key_cfg_buf: Zeroed 256 bytes of memory before mapping it to DMA
- *
- * This function has to be called before the following functions:
- *	- dpni_set_rx_tc_dist()
- *	- dpni_set_qos_table()
- */
-int dpni_prepare_key_cfg(const struct dpkg_profile_cfg	*cfg,
-			 uint8_t			*key_cfg_buf);
-
-/**
  * struct dpni_rx_tc_dist_cfg - Rx traffic class distribution configuration
  * @dist_size: Set the distribution size;
  *	supported values: 1,2,3,4,6,7,8,12,14,16,24,28,32,48,56,64,96,
@@ -1078,36 +798,24 @@ int dpni_prepare_key_cfg(const struct dpkg_profile_cfg	*cfg,
  * @dist_mode: Distribution mode
  * @key_cfg_iova: I/O virtual address of 256 bytes DMA-able memory filled with
  *		the extractions to be used for the distribution key by calling
- *		dpni_prepare_key_cfg() relevant only when
+ *		dpkg_prepare_key_cfg() relevant only when
  *		'dist_mode != DPNI_DIST_MODE_NONE', otherwise it can be '0'
  * @fs_cfg: Flow Steering table configuration; only relevant if
  *		'dist_mode = DPNI_DIST_MODE_FS'
  */
 struct dpni_rx_tc_dist_cfg {
-	uint16_t		dist_size;
-	enum dpni_dist_mode	dist_mode;
-	uint64_t		key_cfg_iova;
-	struct dpni_fs_tbl_cfg	fs_cfg;
+	uint16_t dist_size;
+	enum dpni_dist_mode dist_mode;
+	uint64_t key_cfg_iova;
+	struct dpni_fs_tbl_cfg fs_cfg;
 };
 
-/**
- * dpni_set_rx_tc_dist() - Set Rx traffic class distribution configuration
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @tc_id:	Traffic class selection (0-7)
- * @cfg:	Traffic class distribution configuration
- *
- * warning: if 'dist_mode != DPNI_DIST_MODE_NONE', call dpni_prepare_key_cfg()
- *			first to prepare the key_cfg_iova parameter
- *
- * Return:	'0' on Success; error code otherwise.
- */
-int dpni_set_rx_tc_dist(struct fsl_mc_io			*mc_io,
-			uint32_t				cmd_flags,
-			uint16_t				token,
-			uint8_t					tc_id,
-			const struct dpni_rx_tc_dist_cfg	*cfg);
+int dpni_set_rx_tc_dist(struct fsl_mc_io *mc_io,
+			uint32_t cmd_flags,
+			uint16_t token,
+			uint8_t tc_id,
+			const struct dpni_rx_tc_dist_cfg *cfg);
+
 /**
  * enum dpni_congestion_unit - DPNI congestion units
  * @DPNI_CONGESTION_UNIT_BYTES: bytes units
@@ -1147,9 +855,9 @@ enum dpni_dest {
  *		channel; not relevant for 'DPNI_DEST_NONE' option
  */
 struct dpni_dest_cfg {
-	enum dpni_dest	dest_type;
-	int		dest_id;
-	uint8_t		priority;
+	enum dpni_dest dest_type;
+	int dest_id;
+	uint8_t priority;
 };
 
 /* DPNI congestion options */
@@ -1186,6 +894,11 @@ struct dpni_dest_cfg {
  * sw-portal's DQRR, the DQRI interrupt is asserted immediately (if enabled)
  */
 #define DPNI_CONG_OPT_INTR_COALESCING_DISABLED	0x00000020
+/**
+ * This congestion will trigger flow control or priority flow control. This
+ * will have effect only if flow control is enabled with dpni_set_link_cfg()
+ */
+#define DPNI_CONG_OPT_FLOW_CONTROL	0x00000040
 
 /**
  * struct dpni_congestion_notification_cfg - congestion notification
@@ -1203,54 +916,35 @@ struct dpni_dest_cfg {
  */
 
 struct dpni_congestion_notification_cfg {
-	enum dpni_congestion_unit	units;
-	uint32_t			threshold_entry;
-	uint32_t			threshold_exit;
-	uint64_t			message_ctx;
-	uint64_t			message_iova;
-	struct dpni_dest_cfg		dest_cfg;
-	uint16_t			notification_mode;
+	enum dpni_congestion_unit units;
+	uint32_t threshold_entry;
+	uint32_t threshold_exit;
+	uint64_t message_ctx;
+	uint64_t message_iova;
+	struct dpni_dest_cfg dest_cfg;
+	uint16_t notification_mode;
 };
 
-/**
- * dpni_set_congestion_notification() - Set traffic class congestion
- *	notification configuration
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @qtype:	Type of queue - Rx, Tx and Tx confirm types are supported
- * @tc_id:	Traffic class selection (0-7)
- * @cfg:	congestion notification configuration
- *
- * Return:	'0' on Success; error code otherwise.
- */
-int dpni_set_congestion_notification(
-			struct fsl_mc_io		*mc_io,
-			uint32_t			cmd_flags,
-			uint16_t			token,
-			enum dpni_queue_type		qtype,
-			uint8_t				tc_id,
+int dpni_set_congestion_notification(struct fsl_mc_io *mc_io,
+				     uint32_t cmd_flags,
+				     uint16_t token,
+				     enum dpni_queue_type qtype,
+				     uint8_t tc_id,
 			const struct dpni_congestion_notification_cfg *cfg);
 
-/**
- * dpni_get_congestion_notification() - Get traffic class congestion
- *	notification configuration
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @qtype:	Type of queue - Rx, Tx and Tx confirm types are supported
- * @tc_id:	Traffic class selection (0-7)
- * @cfg:	congestion notification configuration
- *
- * Return:	'0' on Success; error code otherwise.
- */
-int dpni_get_congestion_notification(struct fsl_mc_io		*mc_io,
-				     uint32_t			cmd_flags,
-				     uint16_t			token,
-				     enum dpni_queue_type	qtype,
-				     uint8_t			tc_id,
+int dpni_get_congestion_notification(struct fsl_mc_io *mc_io,
+				     uint32_t cmd_flags,
+				     uint16_t token,
+				     enum dpni_queue_type qtype,
+				     uint8_t tc_id,
 				struct dpni_congestion_notification_cfg *cfg);
 
+/* DPNI FLC stash options */
+
+/**
+ * stashes the whole annotation area (up to 192 bytes)
+ */
+#define DPNI_FLC_STASH_FRAME_ANNOTATION	0x00000001
 
 /**
  * struct dpni_queue - Queue structure
@@ -1291,9 +985,25 @@ struct dpni_queue {
 	uint64_t user_context;
 	/**
 	 * struct flc - FD FLow Context structure
-	 * @value:		FLC value to set
-	 * @stash_control:	Boolean, indicates whether the 6 lowest
-	 *			significant bits are used for stash control.
+	 * @value: Default FLC value for traffic dequeued from
+	 *      this queue.  Please check description of FD
+	 *      structure for more information.
+	 *      Note that FLC values set using dpni_add_fs_entry,
+	 *      if any, take precedence over values per queue.
+	 * @stash_control: Boolean, indicates whether the 6 lowest
+	 *      - significant bits are used for stash control.
+	 *      significant bits are used for stash control.  If set, the 6
+	 *      least significant bits in value are interpreted as follows:
+	 *      - bits 0-1: indicates the number of 64 byte units of context
+	 *      that are stashed.  FLC value is interpreted as a memory address
+	 *      in this case, excluding the 6 LS bits.
+	 *      - bits 2-3: indicates the number of 64 byte units of frame
+	 *      annotation to be stashed.  Annotation is placed at FD[ADDR].
+	 *      - bits 4-5: indicates the number of 64 byte units of frame
+	 *      data to be stashed.  Frame data is placed at FD[ADDR] +
+	 *      FD[OFFSET].
+	 *      For more details check the Frame Descriptor section in the
+	 *      hardware documentation.
 	 */
 	struct {
 		uint64_t value;
@@ -1331,41 +1041,16 @@ enum dpni_confirmation_mode {
 	DPNI_CONF_DISABLE,
 };
 
-/**
- * dpni_set_tx_confirmation_mode() - Tx confirmation mode
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @mode:	Tx confirmation mode
- *
- * This function is useful only when 'DPNI_OPT_TX_CONF_DISABLED' is not
- * selected at DPNI creation.
- * Calling this function with 'mode' set to DPNI_CONF_DISABLE disables all
- * transmit confirmation (including the private confirmation queues), regardless
- * of previous settings; Note that in this case, Tx error frames are still
- * enqueued to the general transmit errors queue.
- * Calling this function with 'mode' set to DPNI_CONF_SINGLE switches all
- * Tx confirmations to a shared Tx conf queue.  The ID of the queue when
- * calling dpni_set/get_queue is -1.
- * Tx confirmation mode can only be changed while the DPNI is disabled.
- * Executing this command while the DPNI is enabled will return an error.
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpni_set_tx_confirmation_mode(struct fsl_mc_io		*mc_io,
-				  uint32_t			cmd_flags,
-				  uint16_t			token,
-				  enum dpni_confirmation_mode	mode);
+int dpni_set_tx_confirmation_mode(struct fsl_mc_io *mc_io,
+				  uint32_t cmd_flags,
+				  uint16_t token,
+				  enum dpni_confirmation_mode mode);
+
+int dpni_get_tx_confirmation_mode(struct fsl_mc_io *mc_io,
+				  uint32_t cmd_flags,
+				  uint16_t token,
+				  enum dpni_confirmation_mode *mode);
 
-/**
- * dpni_get_api_version() - Get Data Path Network Interface API version
- * @mc_io:  Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @major_ver:	Major version of data path network interface API
- * @minor_ver:	Minor version of data path network interface API
- *
- * Return:  '0' on Success; Error code otherwise.
- */
 int dpni_get_api_version(struct fsl_mc_io *mc_io,
 			 uint32_t cmd_flags,
 			 uint16_t *major_ver,
@@ -1396,23 +1081,6 @@ int dpni_get_api_version(struct fsl_mc_io *mc_io,
  */
 #define DPNI_QUEUE_OPT_HOLD_ACTIVE	0x00000008
 
-/**
- * dpni_set_queue() - Set queue parameters
- * @mc_io:		Pointer to MC portal's I/O object
- * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:		Token of DPNI object
- * @qtype:		Type of queue - all queue types are supported, although
- *				the command is ignored for Tx
- * @tc:			Traffic class, in range 0 to NUM_TCS - 1
- * @index:		Selects the specific queue out of the set
- *				allocated for the same TC.Value must be in
- *				range 0 to NUM_QUEUES - 1
- * @options:		A combination of DPNI_QUEUE_OPT_ values that control
- *				what configuration options are set on the queue
- * @queue:		Queue configuration structure
- *
- * Return:  '0' on Success; Error code otherwise.
- */
 int dpni_set_queue(struct fsl_mc_io *mc_io,
 		   uint32_t cmd_flags,
 		   uint16_t token,
@@ -1422,31 +1090,6 @@ int dpni_set_queue(struct fsl_mc_io *mc_io,
 		   uint8_t options,
 		   const struct dpni_queue *queue);
 
-/**
- * dpni_get_queue() - Get queue parameters
- * @mc_io:		Pointer to MC portal's I/O object
- * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:		Token of DPNI object
- * @qtype:		Type of queue - all queue types are supported
- * @tc:			Traffic class, in range 0 to NUM_TCS - 1
- * @index:		Selects the specific queue out of the set allocated
- *				for the same TC. Value must be in range 0 to
- *				NUM_QUEUES - 1
- * @queue:		Queue configuration structure
- * @qid:		Queue identification
- *
- * This function returns current queue configuration which can be changed by
- * calling dpni_set_queue, and queue identification information.
- * Returned qid.fqid and/or qid.qdbin values can be used to:
- * - enqueue traffic for Tx queues,
- * - perform volatile dequeue for Rx and, if applicable, Tx confirmation
- *   clean-up,
- * - retrieve queue state.
- *
- * All these operations are supported through the DPIO run-time API.
- *
- * Return:  '0' on Success; Error code otherwise.
- */
 int dpni_get_queue(struct fsl_mc_io *mc_io,
 		   uint32_t cmd_flags,
 		   uint16_t token,
@@ -1456,32 +1099,13 @@ int dpni_get_queue(struct fsl_mc_io *mc_io,
 		   struct dpni_queue *queue,
 		   struct dpni_queue_id *qid);
 
-/**
- * dpni_get_statistics() - Get DPNI statistics
- * @mc_io:		Pointer to MC portal's I/O object
- * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:		Token of DPNI object
- * @page:		Selects the statistics page to retrieve, see
- *				DPNI_GET_STATISTICS output.
- *				Pages are numbered 0 to 2.
- * @stat:		Structure containing the statistics
- *
- * Return:  '0' on Success; Error code otherwise.
- */
 int dpni_get_statistics(struct fsl_mc_io *mc_io,
 			uint32_t cmd_flags,
 			uint16_t token,
 			uint8_t page,
+			uint8_t param,
 			union dpni_statistics *stat);
 
-/**
- * dpni_reset_statistics() - Clears DPNI statistics
- * @mc_io:		Pointer to MC portal's I/O object
- * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:		Token of DPNI object
- *
- * Return:  '0' on Success; Error code otherwise.
- */
 int dpni_reset_statistics(struct fsl_mc_io *mc_io,
 			  uint32_t cmd_flags,
 			  uint16_t token);
@@ -1505,40 +1129,25 @@ enum dpni_congestion_point {
  * struct dpni_taildrop - Structure representing the taildrop
  * @enable:	Indicates whether the taildrop is active or not.
  * @units:	Indicates the unit of THRESHOLD. Queue taildrop only
- *			supports byte units, this field is ignored and
- *			assumed = 0 if CONGESTION_POINT is 0.
+ *		supports byte units, this field is ignored and
+ *		assumed = 0 if CONGESTION_POINT is 0.
  * @threshold:	Threshold value, in units identified by UNITS field. Value 0
- *			cannot be used as a valid taildrop threshold,
- *			THRESHOLD must be > 0 if the taildrop is
- *			enabled.
+ *		cannot be used as a valid taildrop threshold,
+ *		THRESHOLD must be > 0 if the taildrop is
+ *		enabled.
+ * @oal	:	Overhead Accounting Length, a 12-bit, 2's complement value
+ *		with range (-2048 to +2047) representing a fixed per-frame
+ *		overhead to be added to the actual length of a frame when
+ *		performing WRED and tail drop calculations and threshold
+ *		comparisons.
  */
 struct dpni_taildrop {
 	char enable;
 	enum dpni_congestion_unit units;
 	uint32_t threshold;
+	int16_t oal;
 };
 
-/**
- * dpni_set_taildrop() - Set taildrop per queue or TC
- *
- * Setting a per-TC taildrop (cg_point = DPNI_CP_GROUP) will reset any current
- * congestion notification or early drop (WRED) configuration previously applied
- * to the same TC.
- *
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @cg_point:	Congestion point.  DPNI_CP_QUEUE is only supported in
- *		combination with DPNI_QUEUE_RX.
- * @q_type:	Queue type, can be DPNI_QUEUE_RX or DPNI_QUEUE_TX.
- * @tc:		Traffic class to apply this taildrop to
- * @q_index:	Index of the queue if the DPNI supports multiple queues for
- *			traffic distribution.
- *			Ignored if CONGESTION_POINT is not DPNI_CP_QUEUE.
- * @taildrop:	Taildrop structure
- *
- * Return:  '0' on Success; Error code otherwise.
- */
 int dpni_set_taildrop(struct fsl_mc_io *mc_io,
 		      uint32_t cmd_flags,
 		      uint16_t token,
@@ -1548,21 +1157,6 @@ int dpni_set_taildrop(struct fsl_mc_io *mc_io,
 		      uint8_t q_index,
 		      struct dpni_taildrop *taildrop);
 
-/**
- * dpni_get_taildrop() - Get taildrop information
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPNI object
- * @cg_point:	Congestion point
- * @q_type:
- * @tc:		Traffic class to apply this taildrop to
- * @q_index:	Index of the queue if the DPNI supports multiple queues for
- *			traffic distribution. Ignored if CONGESTION_POINT
- *			is not 0.
- * @taildrop:	Taildrop structure
- *
- * Return:  '0' on Success; Error code otherwise.
- */
 int dpni_get_taildrop(struct fsl_mc_io *mc_io,
 		      uint32_t cmd_flags,
 		      uint16_t token,
diff --git a/drivers/net/dpaa2/mc/fsl_dpni_cmd.h b/drivers/net/dpaa2/mc/fsl_dpni_cmd.h
index 2ac397cd..1a483329 100644
--- a/drivers/net/dpaa2/mc/fsl_dpni_cmd.h
+++ b/drivers/net/dpaa2/mc/fsl_dpni_cmd.h
@@ -5,7 +5,7 @@
  *   BSD LICENSE
  *
  * Copyright 2013-2016 Freescale Semiconductor Inc.
- * Copyright 2016 NXP.
+ * Copyright 2016-2017 NXP.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -42,435 +42,597 @@
 
 /* DPNI Version */
 #define DPNI_VER_MAJOR				7
-#define DPNI_VER_MINOR				0
+#define DPNI_VER_MINOR				3
+
+#define DPNI_CMD_BASE_VERSION			1
+#define DPNI_CMD_VERSION_2			2
+#define DPNI_CMD_ID_OFFSET			4
+
+#define DPNI_CMD(id)	(((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_BASE_VERSION)
+#define DPNI_CMD_V2(id)	(((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_VERSION_2)
 
 /* Command IDs */
-#define DPNI_CMDID_OPEN                                ((0x801 << 4) | (0x1))
-#define DPNI_CMDID_CLOSE                               ((0x800 << 4) | (0x1))
-#define DPNI_CMDID_CREATE                              ((0x901 << 4) | (0x1))
-#define DPNI_CMDID_DESTROY                             ((0x981 << 4) | (0x1))
-#define DPNI_CMDID_GET_API_VERSION                     ((0xa01 << 4) | (0x1))
-
-#define DPNI_CMDID_ENABLE                              ((0x002 << 4) | (0x1))
-#define DPNI_CMDID_DISABLE                             ((0x003 << 4) | (0x1))
-#define DPNI_CMDID_GET_ATTR                            ((0x004 << 4) | (0x1))
-#define DPNI_CMDID_RESET                               ((0x005 << 4) | (0x1))
-#define DPNI_CMDID_IS_ENABLED                          ((0x006 << 4) | (0x1))
-
-#define DPNI_CMDID_SET_POOLS                           ((0x200 << 4) | (0x1))
-#define DPNI_CMDID_SET_ERRORS_BEHAVIOR                 ((0x20B << 4) | (0x1))
-
-#define DPNI_CMDID_GET_QDID                            ((0x210 << 4) | (0x1))
-#define DPNI_CMDID_GET_LINK_STATE                      ((0x215 << 4) | (0x1))
-#define DPNI_CMDID_SET_MAX_FRAME_LENGTH                ((0x216 << 4) | (0x1))
-#define DPNI_CMDID_GET_MAX_FRAME_LENGTH                ((0x217 << 4) | (0x1))
-#define DPNI_CMDID_SET_LINK_CFG                        ((0x21a << 4) | (0x1))
-
-#define DPNI_CMDID_SET_MCAST_PROMISC                   ((0x220 << 4) | (0x1))
-#define DPNI_CMDID_GET_MCAST_PROMISC                   ((0x221 << 4) | (0x1))
-#define DPNI_CMDID_SET_UNICAST_PROMISC                 ((0x222 << 4) | (0x1))
-#define DPNI_CMDID_GET_UNICAST_PROMISC                 ((0x223 << 4) | (0x1))
-#define DPNI_CMDID_SET_PRIM_MAC                        ((0x224 << 4) | (0x1))
-#define DPNI_CMDID_GET_PRIM_MAC                        ((0x225 << 4) | (0x1))
-#define DPNI_CMDID_ADD_MAC_ADDR                        ((0x226 << 4) | (0x1))
-#define DPNI_CMDID_REMOVE_MAC_ADDR                     ((0x227 << 4) | (0x1))
-#define DPNI_CMDID_CLR_MAC_FILTERS                     ((0x228 << 4) | (0x1))
-
-#define DPNI_CMDID_ENABLE_VLAN_FILTER                  ((0x230 << 4) | (0x1))
-#define DPNI_CMDID_ADD_VLAN_ID                         ((0x231 << 4) | (0x1))
-#define DPNI_CMDID_REMOVE_VLAN_ID                      ((0x232 << 4) | (0x1))
-#define DPNI_CMDID_CLR_VLAN_FILTERS                    ((0x233 << 4) | (0x1))
-
-#define DPNI_CMDID_SET_RX_TC_DIST                      ((0x235 << 4) | (0x1))
-
-#define DPNI_CMDID_GET_STATISTICS                      ((0x25D << 4) | (0x1))
-#define DPNI_CMDID_RESET_STATISTICS                    ((0x25E << 4) | (0x1))
-#define DPNI_CMDID_GET_QUEUE                           ((0x25F << 4) | (0x1))
-#define DPNI_CMDID_SET_QUEUE                           ((0x260 << 4) | (0x1))
-#define DPNI_CMDID_GET_TAILDROP                        ((0x261 << 4) | (0x1))
-#define DPNI_CMDID_SET_TAILDROP                        ((0x262 << 4) | (0x1))
-
-#define DPNI_CMDID_GET_PORT_MAC_ADDR                   ((0x263 << 4) | (0x1))
-
-#define DPNI_CMDID_GET_BUFFER_LAYOUT                   ((0x264 << 4) | (0x1))
-#define DPNI_CMDID_SET_BUFFER_LAYOUT                   ((0x265 << 4) | (0x1))
-
-#define DPNI_CMDID_SET_CONGESTION_NOTIFICATION         ((0x267 << 4) | (0x1))
-#define DPNI_CMDID_GET_CONGESTION_NOTIFICATION         ((0x268 << 4) | (0x1))
-#define DPNI_CMDID_GET_OFFLOAD                         ((0x26B << 4) | (0x1))
-#define DPNI_CMDID_SET_OFFLOAD                         ((0x26C << 4) | (0x1))
-#define DPNI_CMDID_SET_TX_CONFIRMATION_MODE            ((0x266 << 4) | (0x1))
-#define DPNI_CMDID_GET_TX_CONFIRMATION_MODE            ((0x26D << 4) | (0x1))
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_OPEN(cmd, dpni_id) \
-	MC_CMD_OP(cmd,	 0,	0,	32,	int,	dpni_id)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_CREATE(cmd, cfg) \
-do { \
-	MC_CMD_OP(cmd, 0,  0, 32, uint32_t,  (cfg)->options); \
-	MC_CMD_OP(cmd, 0, 32,  8,  uint8_t,  (cfg)->num_queues); \
-	MC_CMD_OP(cmd, 0, 40,  8,  uint8_t,  (cfg)->num_tcs); \
-	MC_CMD_OP(cmd, 0, 48,  8,  uint8_t,  (cfg)->mac_filter_entries); \
-	MC_CMD_OP(cmd, 1,  0,  8,  uint8_t,  (cfg)->vlan_filter_entries); \
-	MC_CMD_OP(cmd, 1, 16,  8,  uint8_t,  (cfg)->qos_entries); \
-	MC_CMD_OP(cmd, 1, 32, 16, uint16_t,  (cfg)->fs_entries); \
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_SET_POOLS(cmd, cfg) \
-do { \
-	MC_CMD_OP(cmd, 0, 0,  8,  uint8_t,  cfg->num_dpbp); \
-	MC_CMD_OP(cmd, 0, 8,  1,  int,      cfg->pools[0].backup_pool); \
-	MC_CMD_OP(cmd, 0, 9,  1,  int,      cfg->pools[1].backup_pool); \
-	MC_CMD_OP(cmd, 0, 10, 1,  int,      cfg->pools[2].backup_pool); \
-	MC_CMD_OP(cmd, 0, 11, 1,  int,      cfg->pools[3].backup_pool); \
-	MC_CMD_OP(cmd, 0, 12, 1,  int,      cfg->pools[4].backup_pool); \
-	MC_CMD_OP(cmd, 0, 13, 1,  int,      cfg->pools[5].backup_pool); \
-	MC_CMD_OP(cmd, 0, 14, 1,  int,      cfg->pools[6].backup_pool); \
-	MC_CMD_OP(cmd, 0, 15, 1,  int,      cfg->pools[7].backup_pool); \
-	MC_CMD_OP(cmd, 0, 32, 32, int,      cfg->pools[0].dpbp_id); \
-	MC_CMD_OP(cmd, 4, 32, 16, uint16_t, cfg->pools[0].buffer_size);\
-	MC_CMD_OP(cmd, 1, 0,  32, int,      cfg->pools[1].dpbp_id); \
-	MC_CMD_OP(cmd, 4, 48, 16, uint16_t, cfg->pools[1].buffer_size);\
-	MC_CMD_OP(cmd, 1, 32, 32, int,      cfg->pools[2].dpbp_id); \
-	MC_CMD_OP(cmd, 5, 0,  16, uint16_t, cfg->pools[2].buffer_size);\
-	MC_CMD_OP(cmd, 2, 0,  32, int,      cfg->pools[3].dpbp_id); \
-	MC_CMD_OP(cmd, 5, 16, 16, uint16_t, cfg->pools[3].buffer_size);\
-	MC_CMD_OP(cmd, 2, 32, 32, int,      cfg->pools[4].dpbp_id); \
-	MC_CMD_OP(cmd, 5, 32, 16, uint16_t, cfg->pools[4].buffer_size);\
-	MC_CMD_OP(cmd, 3, 0,  32, int,      cfg->pools[5].dpbp_id); \
-	MC_CMD_OP(cmd, 5, 48, 16, uint16_t, cfg->pools[5].buffer_size);\
-	MC_CMD_OP(cmd, 3, 32, 32, int,      cfg->pools[6].dpbp_id); \
-	MC_CMD_OP(cmd, 6, 0,  16, uint16_t, cfg->pools[6].buffer_size);\
-	MC_CMD_OP(cmd, 4, 0,  32, int,      cfg->pools[7].dpbp_id); \
-	MC_CMD_OP(cmd, 6, 16, 16, uint16_t, cfg->pools[7].buffer_size);\
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_RSP_IS_ENABLED(cmd, en) \
-	MC_RSP_OP(cmd, 0, 0,  1,  int,	    en)
-
-/* DPNI_CMD_GET_ATTR is not used, no input parameters */
-
-#define DPNI_RSP_GET_ATTR(cmd, attr) \
-do { \
-	MC_RSP_OP(cmd, 0,  0, 32, uint32_t, (attr)->options); \
-	MC_RSP_OP(cmd, 0, 32,  8, uint8_t,  (attr)->num_queues); \
-	MC_RSP_OP(cmd, 0, 40,  8, uint8_t,  (attr)->num_tcs); \
-	MC_RSP_OP(cmd, 0, 48,  8, uint8_t,  (attr)->mac_filter_entries); \
-	MC_RSP_OP(cmd, 1,  0,  8, uint8_t, (attr)->vlan_filter_entries); \
-	MC_RSP_OP(cmd, 1, 16,  8, uint8_t,  (attr)->qos_entries); \
-	MC_RSP_OP(cmd, 1, 32, 16, uint16_t, (attr)->fs_entries); \
-	MC_RSP_OP(cmd, 2,  0,  8, uint8_t,  (attr)->qos_key_size); \
-	MC_RSP_OP(cmd, 2,  8,  8, uint8_t,  (attr)->fs_key_size); \
-	MC_RSP_OP(cmd, 2, 16, 16, uint16_t, (attr)->wriop_version); \
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_SET_ERRORS_BEHAVIOR(cmd, cfg) \
-do { \
-	MC_CMD_OP(cmd, 0, 0,  32, uint32_t, cfg->errors); \
-	MC_CMD_OP(cmd, 0, 32, 4,  enum dpni_error_action, cfg->error_action); \
-	MC_CMD_OP(cmd, 0, 36, 1,  int,      cfg->set_frame_annotation); \
-} while (0)
-
-#define DPNI_CMD_GET_BUFFER_LAYOUT(cmd, qtype) \
-	MC_CMD_OP(cmd, 0,  0,  8, enum dpni_queue_type, qtype)
-
-#define DPNI_RSP_GET_BUFFER_LAYOUT(cmd, layout) \
-do { \
-	MC_RSP_OP(cmd, 0, 48,  1, char, (layout)->pass_timestamp); \
-	MC_RSP_OP(cmd, 0, 49,  1, char, (layout)->pass_parser_result); \
-	MC_RSP_OP(cmd, 0, 50,  1, char, (layout)->pass_frame_status); \
-	MC_RSP_OP(cmd, 1,  0, 16, uint16_t, (layout)->private_data_size); \
-	MC_RSP_OP(cmd, 1, 16, 16, uint16_t, (layout)->data_align); \
-	MC_RSP_OP(cmd, 1, 32, 16, uint16_t, (layout)->data_head_room); \
-	MC_RSP_OP(cmd, 1, 48, 16, uint16_t, (layout)->data_tail_room); \
-} while (0)
-
-#define DPNI_CMD_SET_BUFFER_LAYOUT(cmd, qtype, layout) \
-do { \
-	MC_CMD_OP(cmd, 0,  0,  8, enum dpni_queue_type, qtype); \
-	MC_CMD_OP(cmd, 0, 32, 16, uint16_t, (layout)->options); \
-	MC_CMD_OP(cmd, 0, 48,  1, char, (layout)->pass_timestamp); \
-	MC_CMD_OP(cmd, 0, 49,  1, char, (layout)->pass_parser_result); \
-	MC_CMD_OP(cmd, 0, 50,  1, char, (layout)->pass_frame_status); \
-	MC_CMD_OP(cmd, 1,  0, 16, uint16_t, (layout)->private_data_size); \
-	MC_CMD_OP(cmd, 1, 16, 16, uint16_t, (layout)->data_align); \
-	MC_CMD_OP(cmd, 1, 32, 16, uint16_t, (layout)->data_head_room); \
-	MC_CMD_OP(cmd, 1, 48, 16, uint16_t, (layout)->data_tail_room); \
-} while (0)
-
-#define DPNI_CMD_SET_OFFLOAD(cmd, type, config) \
-do { \
-	MC_CMD_OP(cmd, 0, 24,  8, enum dpni_offload, type); \
-	MC_CMD_OP(cmd, 0, 32, 32, uint32_t, config); \
-} while (0)
-
-#define DPNI_CMD_GET_OFFLOAD(cmd, type) \
-	MC_CMD_OP(cmd, 0, 24,  8, enum dpni_offload, type)
-
-#define DPNI_RSP_GET_OFFLOAD(cmd, config) \
-	MC_RSP_OP(cmd, 0, 32, 32, uint32_t, config)
-
-#define DPNI_CMD_GET_QDID(cmd, qtype) \
-	MC_CMD_OP(cmd, 0,  0,  8, enum dpni_queue_type, qtype)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_RSP_GET_QDID(cmd, qdid) \
-	MC_RSP_OP(cmd, 0, 0,  16, uint16_t, qdid)
-
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_GET_STATISTICS(cmd, page) \
-	MC_CMD_OP(cmd, 0, 0, 8, uint8_t, page)
-
-#define DPNI_RSP_GET_STATISTICS(cmd, stat) \
-do { \
-	MC_RSP_OP(cmd, 0, 0, 64, uint64_t, (stat)->raw.counter[0]); \
-	MC_RSP_OP(cmd, 1, 0, 64, uint64_t, (stat)->raw.counter[1]); \
-	MC_RSP_OP(cmd, 2, 0, 64, uint64_t, (stat)->raw.counter[2]); \
-	MC_RSP_OP(cmd, 3, 0, 64, uint64_t, (stat)->raw.counter[3]); \
-	MC_RSP_OP(cmd, 4, 0, 64, uint64_t, (stat)->raw.counter[4]); \
-	MC_RSP_OP(cmd, 5, 0, 64, uint64_t, (stat)->raw.counter[5]); \
-	MC_RSP_OP(cmd, 6, 0, 64, uint64_t, (stat)->raw.counter[6]); \
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_SET_LINK_CFG(cmd, cfg) \
-do { \
-	MC_CMD_OP(cmd, 1, 0,  32, uint32_t, cfg->rate);\
-	MC_CMD_OP(cmd, 2, 0,  64, uint64_t, cfg->options);\
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_RSP_GET_LINK_STATE(cmd, state) \
-do { \
-	MC_RSP_OP(cmd, 0, 32,  1, int,      state->up);\
-	MC_RSP_OP(cmd, 1, 0,  32, uint32_t, state->rate);\
-	MC_RSP_OP(cmd, 2, 0,  64, uint64_t, state->options);\
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_SET_MAX_FRAME_LENGTH(cmd, max_frame_length) \
-	MC_CMD_OP(cmd, 0, 0,  16, uint16_t, max_frame_length)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_RSP_GET_MAX_FRAME_LENGTH(cmd, max_frame_length) \
-	MC_RSP_OP(cmd, 0, 0,  16, uint16_t, max_frame_length)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_SET_MULTICAST_PROMISC(cmd, en) \
-	MC_CMD_OP(cmd, 0, 0,  1,  int,      en)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_RSP_GET_MULTICAST_PROMISC(cmd, en) \
-	MC_RSP_OP(cmd, 0, 0,  1,  int,	    en)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_SET_UNICAST_PROMISC(cmd, en) \
-	MC_CMD_OP(cmd, 0, 0,  1,  int,      en)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_RSP_GET_UNICAST_PROMISC(cmd, en) \
-	MC_RSP_OP(cmd, 0, 0,  1,  int,	    en)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_SET_PRIMARY_MAC_ADDR(cmd, mac_addr) \
-do { \
-	MC_CMD_OP(cmd, 0, 16, 8,  uint8_t,  mac_addr[5]); \
-	MC_CMD_OP(cmd, 0, 24, 8,  uint8_t,  mac_addr[4]); \
-	MC_CMD_OP(cmd, 0, 32, 8,  uint8_t,  mac_addr[3]); \
-	MC_CMD_OP(cmd, 0, 40, 8,  uint8_t,  mac_addr[2]); \
-	MC_CMD_OP(cmd, 0, 48, 8,  uint8_t,  mac_addr[1]); \
-	MC_CMD_OP(cmd, 0, 56, 8,  uint8_t,  mac_addr[0]); \
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_RSP_GET_PRIMARY_MAC_ADDR(cmd, mac_addr) \
-do { \
-	MC_RSP_OP(cmd, 0, 16, 8,  uint8_t,  mac_addr[5]); \
-	MC_RSP_OP(cmd, 0, 24, 8,  uint8_t,  mac_addr[4]); \
-	MC_RSP_OP(cmd, 0, 32, 8,  uint8_t,  mac_addr[3]); \
-	MC_RSP_OP(cmd, 0, 40, 8,  uint8_t,  mac_addr[2]); \
-	MC_RSP_OP(cmd, 0, 48, 8,  uint8_t,  mac_addr[1]); \
-	MC_RSP_OP(cmd, 0, 56, 8,  uint8_t,  mac_addr[0]); \
-} while (0)
-
-#define DPNI_RSP_GET_PORT_MAC_ADDR(cmd, mac_addr) \
-do { \
-	MC_RSP_OP(cmd, 0, 16, 8,  uint8_t,  mac_addr[5]); \
-	MC_RSP_OP(cmd, 0, 24, 8,  uint8_t,  mac_addr[4]); \
-	MC_RSP_OP(cmd, 0, 32, 8,  uint8_t,  mac_addr[3]); \
-	MC_RSP_OP(cmd, 0, 40, 8,  uint8_t,  mac_addr[2]); \
-	MC_RSP_OP(cmd, 0, 48, 8,  uint8_t,  mac_addr[1]); \
-	MC_RSP_OP(cmd, 0, 56, 8,  uint8_t,  mac_addr[0]); \
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_ADD_MAC_ADDR(cmd, mac_addr) \
-do { \
-	MC_CMD_OP(cmd, 0, 16, 8,  uint8_t,  mac_addr[5]); \
-	MC_CMD_OP(cmd, 0, 24, 8,  uint8_t,  mac_addr[4]); \
-	MC_CMD_OP(cmd, 0, 32, 8,  uint8_t,  mac_addr[3]); \
-	MC_CMD_OP(cmd, 0, 40, 8,  uint8_t,  mac_addr[2]); \
-	MC_CMD_OP(cmd, 0, 48, 8,  uint8_t,  mac_addr[1]); \
-	MC_CMD_OP(cmd, 0, 56, 8,  uint8_t,  mac_addr[0]); \
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_REMOVE_MAC_ADDR(cmd, mac_addr) \
-do { \
-	MC_CMD_OP(cmd, 0, 16, 8,  uint8_t,  mac_addr[5]); \
-	MC_CMD_OP(cmd, 0, 24, 8,  uint8_t,  mac_addr[4]); \
-	MC_CMD_OP(cmd, 0, 32, 8,  uint8_t,  mac_addr[3]); \
-	MC_CMD_OP(cmd, 0, 40, 8,  uint8_t,  mac_addr[2]); \
-	MC_CMD_OP(cmd, 0, 48, 8,  uint8_t,  mac_addr[1]); \
-	MC_CMD_OP(cmd, 0, 56, 8,  uint8_t,  mac_addr[0]); \
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_CLEAR_MAC_FILTERS(cmd, unicast, multicast) \
-do { \
-	MC_CMD_OP(cmd, 0, 0,  1,  int,      unicast); \
-	MC_CMD_OP(cmd, 0, 1,  1,  int,      multicast); \
-} while (0)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_ENABLE_VLAN_FILTER(cmd, en) \
-	MC_CMD_OP(cmd, 0, 0,  1,  int,	    en)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_ADD_VLAN_ID(cmd, vlan_id) \
-	MC_CMD_OP(cmd, 0, 32, 16, uint16_t, vlan_id)
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_REMOVE_VLAN_ID(cmd, vlan_id) \
-	MC_CMD_OP(cmd, 0, 32, 16, uint16_t, vlan_id)
-
-
-/*                cmd, param, offset, width, type, arg_name */
-#define DPNI_CMD_SET_RX_TC_DIST(cmd, tc_id, cfg) \
-do { \
-	MC_CMD_OP(cmd, 0, 0,  16, uint16_t,  cfg->dist_size); \
-	MC_CMD_OP(cmd, 0, 16, 8,  uint8_t,  tc_id); \
-	MC_CMD_OP(cmd, 0, 24, 4,  enum dpni_dist_mode, cfg->dist_mode); \
-	MC_CMD_OP(cmd, 0, 28, 4,  enum dpni_fs_miss_action, \
-						  cfg->fs_cfg.miss_action); \
-	MC_CMD_OP(cmd, 0, 48, 16, uint16_t, cfg->fs_cfg.default_flow_id); \
-	MC_CMD_OP(cmd, 6, 0,  64, uint64_t, cfg->key_cfg_iova); \
-} while (0)
-
-#define DPNI_CMD_GET_QUEUE(cmd, qtype, tc, index) \
-do { \
-	MC_CMD_OP(cmd, 0,  0,  8, enum dpni_queue_type, qtype); \
-	MC_CMD_OP(cmd, 0,  8,  8,  uint8_t, tc); \
-	MC_CMD_OP(cmd, 0, 16,  8,  uint8_t, index); \
-} while (0)
-
-#define DPNI_RSP_GET_QUEUE(cmd, queue, queue_id) \
-do { \
-	MC_RSP_OP(cmd, 1,  0, 32, uint32_t, (queue)->destination.id); \
-	MC_RSP_OP(cmd, 1, 48,  8, uint8_t, (queue)->destination.priority); \
-	MC_RSP_OP(cmd, 1, 56,  4, enum dpni_dest, (queue)->destination.type); \
-	MC_RSP_OP(cmd, 1, 62,  1, char, (queue)->flc.stash_control); \
-	MC_RSP_OP(cmd, 1, 63,  1, char, (queue)->destination.hold_active); \
-	MC_RSP_OP(cmd, 2,  0, 64, uint64_t, (queue)->flc.value); \
-	MC_RSP_OP(cmd, 3,  0, 64, uint64_t, (queue)->user_context); \
-	MC_RSP_OP(cmd, 4,  0, 32, uint32_t, (queue_id)->fqid); \
-	MC_RSP_OP(cmd, 4, 32, 16, uint16_t, (queue_id)->qdbin); \
-} while (0)
-
-#define DPNI_CMD_SET_QUEUE(cmd, qtype, tc, index, options, queue) \
-do { \
-	MC_CMD_OP(cmd, 0,  0,  8, enum dpni_queue_type, qtype); \
-	MC_CMD_OP(cmd, 0,  8,  8,  uint8_t, tc); \
-	MC_CMD_OP(cmd, 0, 16,  8,  uint8_t, index); \
-	MC_CMD_OP(cmd, 0, 24,  8,  uint8_t, options); \
-	MC_CMD_OP(cmd, 1,  0, 32, uint32_t, (queue)->destination.id); \
-	MC_CMD_OP(cmd, 1, 48,  8, uint8_t, (queue)->destination.priority); \
-	MC_CMD_OP(cmd, 1, 56,  4, enum dpni_dest, (queue)->destination.type); \
-	MC_CMD_OP(cmd, 1, 62,  1, char, (queue)->flc.stash_control); \
-	MC_CMD_OP(cmd, 1, 63,  1, char, (queue)->destination.hold_active); \
-	MC_CMD_OP(cmd, 2,  0, 64, uint64_t, (queue)->flc.value); \
-	MC_CMD_OP(cmd, 3,  0, 64, uint64_t, (queue)->user_context); \
-} while (0)
-
-/*                cmd, param, offset, width, type,      arg_name */
-#define DPNI_RSP_GET_API_VERSION(cmd, major, minor) \
-do { \
-	MC_RSP_OP(cmd, 0, 0,  16, uint16_t, major);\
-	MC_RSP_OP(cmd, 0, 16, 16, uint16_t, minor);\
-} while (0)
-
-#define DPNI_CMD_GET_TAILDROP(cmd, cp, q_type, tc, q_index) \
-do { \
-	MC_CMD_OP(cmd, 0,  0,  8, enum dpni_congestion_point, cp); \
-	MC_CMD_OP(cmd, 0,  8,  8, enum dpni_queue_type, q_type); \
-	MC_CMD_OP(cmd, 0, 16,  8, uint8_t, tc); \
-	MC_CMD_OP(cmd, 0, 24,  8, uint8_t, q_index); \
-} while (0)
-
-#define DPNI_RSP_GET_TAILDROP(cmd, taildrop) \
-do { \
-	MC_RSP_OP(cmd, 1,  0,  1, char, (taildrop)->enable); \
-	MC_RSP_OP(cmd, 1, 16,  8, enum dpni_congestion_unit, \
-				(taildrop)->units); \
-	MC_RSP_OP(cmd, 1, 32, 32, uint32_t, (taildrop)->threshold); \
-} while (0)
-
-#define DPNI_CMD_SET_TAILDROP(cmd, cp, q_type, tc, q_index, taildrop) \
-do { \
-	MC_CMD_OP(cmd, 0,  0,  8, enum dpni_congestion_point, cp); \
-	MC_CMD_OP(cmd, 0,  8,  8, enum dpni_queue_type, q_type); \
-	MC_CMD_OP(cmd, 0, 16,  8, uint8_t, tc); \
-	MC_CMD_OP(cmd, 0, 24,  8, uint8_t, q_index); \
-	MC_CMD_OP(cmd, 1,  0,  1, char, (taildrop)->enable); \
-	MC_CMD_OP(cmd, 1, 16,  8, enum dpni_congestion_unit, \
-				(taildrop)->units); \
-	MC_CMD_OP(cmd, 1, 32, 32, uint32_t, (taildrop)->threshold); \
-} while (0)
-
-#define DPNI_CMD_SET_TX_CONFIRMATION_MODE(cmd, mode) \
-	MC_CMD_OP(cmd, 0, 32, 8, enum dpni_confirmation_mode, mode)
-
-#define DPNI_RSP_GET_TX_CONFIRMATION_MODE(cmd, mode) \
-	MC_RSP_OP(cmd, 0, 32, 8, enum dpni_confirmation_mode, mode)
-
-#define DPNI_CMD_SET_CONGESTION_NOTIFICATION(cmd, qtype, tc, cfg) \
-do { \
-	MC_CMD_OP(cmd, 0,  0,  8, enum dpni_queue_type, qtype); \
-	MC_CMD_OP(cmd, 0,  8,  8, uint8_t, tc); \
-	MC_CMD_OP(cmd, 1,  0, 32, uint32_t, (cfg)->dest_cfg.dest_id); \
-	MC_CMD_OP(cmd, 1, 32, 16, uint16_t, (cfg)->notification_mode); \
-	MC_CMD_OP(cmd, 1, 48,  8, uint8_t, (cfg)->dest_cfg.priority); \
-	MC_CMD_OP(cmd, 1, 56,  4, enum dpni_dest, (cfg)->dest_cfg.dest_type); \
-	MC_CMD_OP(cmd, 1, 60,  2, enum dpni_congestion_unit, (cfg)->units); \
-	MC_CMD_OP(cmd, 2,  0, 64, uint64_t, (cfg)->message_iova); \
-	MC_CMD_OP(cmd, 3,  0, 64, uint64_t, (cfg)->message_ctx); \
-	MC_CMD_OP(cmd, 4,  0, 32, uint32_t, (cfg)->threshold_entry); \
-	MC_CMD_OP(cmd, 4, 32, 32, uint32_t, (cfg)->threshold_exit); \
-} while (0)
-
-#define DPNI_CMD_GET_CONGESTION_NOTIFICATION(cmd, qtype, tc) \
-do { \
-	MC_CMD_OP(cmd, 0,  0,  8, enum dpni_queue_type, qtype); \
-	MC_CMD_OP(cmd, 0,  8,  8, uint8_t, tc); \
-} while (0)
-
-#define DPNI_RSP_GET_CONGESTION_NOTIFICATION(cmd, cfg) \
-do { \
-	MC_RSP_OP(cmd, 1,  0, 32, uint32_t, (cfg)->dest_cfg.dest_id); \
-	MC_RSP_OP(cmd, 1,  0, 16, uint16_t, (cfg)->notification_mode); \
-	MC_RSP_OP(cmd, 1, 48,  8, uint8_t, (cfg)->dest_cfg.priority); \
-	MC_RSP_OP(cmd, 1, 56,  4, enum dpni_dest, (cfg)->dest_cfg.dest_type); \
-	MC_RSP_OP(cmd, 1, 60,  2, enum dpni_congestion_unit, (cfg)->units); \
-	MC_RSP_OP(cmd, 2,  0, 64, uint64_t, (cfg)->message_iova); \
-	MC_RSP_OP(cmd, 3,  0, 64, uint64_t, (cfg)->message_ctx); \
-	MC_RSP_OP(cmd, 4,  0, 32, uint32_t, (cfg)->threshold_entry); \
-	MC_RSP_OP(cmd, 4, 32, 32, uint32_t, (cfg)->threshold_exit); \
-} while (0)
+#define DPNI_CMDID_OPEN				DPNI_CMD(0x801)
+#define DPNI_CMDID_CLOSE			DPNI_CMD(0x800)
+#define DPNI_CMDID_CREATE			DPNI_CMD(0x901)
+#define DPNI_CMDID_DESTROY			DPNI_CMD(0x981)
+#define DPNI_CMDID_GET_API_VERSION		DPNI_CMD(0xa01)
+
+#define DPNI_CMDID_ENABLE			DPNI_CMD(0x002)
+#define DPNI_CMDID_DISABLE			DPNI_CMD(0x003)
+#define DPNI_CMDID_GET_ATTR			DPNI_CMD_V2(0x004)
+#define DPNI_CMDID_RESET			DPNI_CMD(0x005)
+#define DPNI_CMDID_IS_ENABLED			DPNI_CMD(0x006)
+
+#define DPNI_CMDID_SET_IRQ_ENABLE		DPNI_CMD(0x012)
+#define DPNI_CMDID_GET_IRQ_ENABLE		DPNI_CMD(0x013)
+#define DPNI_CMDID_SET_IRQ_MASK			DPNI_CMD(0x014)
+#define DPNI_CMDID_GET_IRQ_MASK			DPNI_CMD(0x015)
+#define DPNI_CMDID_GET_IRQ_STATUS		DPNI_CMD(0x016)
+#define DPNI_CMDID_CLEAR_IRQ_STATUS		DPNI_CMD(0x017)
+
+#define DPNI_CMDID_SET_POOLS			DPNI_CMD_V2(0x200)
+#define DPNI_CMDID_SET_ERRORS_BEHAVIOR		DPNI_CMD(0x20B)
+
+#define DPNI_CMDID_GET_QDID			DPNI_CMD(0x210)
+#define DPNI_CMDID_GET_SP_INFO			DPNI_CMD(0x211)
+#define DPNI_CMDID_GET_TX_DATA_OFFSET		DPNI_CMD(0x212)
+#define DPNI_CMDID_GET_LINK_STATE		DPNI_CMD(0x215)
+#define DPNI_CMDID_SET_MAX_FRAME_LENGTH		DPNI_CMD(0x216)
+#define DPNI_CMDID_GET_MAX_FRAME_LENGTH		DPNI_CMD(0x217)
+#define DPNI_CMDID_SET_LINK_CFG			DPNI_CMD(0x21A)
+#define DPNI_CMDID_SET_TX_SHAPING		DPNI_CMD_V2(0x21B)
+
+#define DPNI_CMDID_SET_MCAST_PROMISC		DPNI_CMD(0x220)
+#define DPNI_CMDID_GET_MCAST_PROMISC		DPNI_CMD(0x221)
+#define DPNI_CMDID_SET_UNICAST_PROMISC		DPNI_CMD(0x222)
+#define DPNI_CMDID_GET_UNICAST_PROMISC		DPNI_CMD(0x223)
+#define DPNI_CMDID_SET_PRIM_MAC			DPNI_CMD(0x224)
+#define DPNI_CMDID_GET_PRIM_MAC			DPNI_CMD(0x225)
+#define DPNI_CMDID_ADD_MAC_ADDR			DPNI_CMD(0x226)
+#define DPNI_CMDID_REMOVE_MAC_ADDR		DPNI_CMD(0x227)
+#define DPNI_CMDID_CLR_MAC_FILTERS		DPNI_CMD(0x228)
+
+#define DPNI_CMDID_ENABLE_VLAN_FILTER		DPNI_CMD(0x230)
+#define DPNI_CMDID_ADD_VLAN_ID			DPNI_CMD(0x231)
+#define DPNI_CMDID_REMOVE_VLAN_ID		DPNI_CMD(0x232)
+#define DPNI_CMDID_CLR_VLAN_FILTERS		DPNI_CMD(0x233)
+
+#define DPNI_CMDID_SET_RX_TC_DIST		DPNI_CMD_V2(0x235)
+
+#define DPNI_CMDID_GET_STATISTICS		DPNI_CMD_V2(0x25D)
+#define DPNI_CMDID_RESET_STATISTICS		DPNI_CMD(0x25E)
+#define DPNI_CMDID_GET_QUEUE			DPNI_CMD(0x25F)
+#define DPNI_CMDID_SET_QUEUE			DPNI_CMD(0x260)
+#define DPNI_CMDID_GET_TAILDROP			DPNI_CMD_V2(0x261)
+#define DPNI_CMDID_SET_TAILDROP			DPNI_CMD_V2(0x262)
+
+#define DPNI_CMDID_GET_PORT_MAC_ADDR		DPNI_CMD(0x263)
+
+#define DPNI_CMDID_GET_BUFFER_LAYOUT		DPNI_CMD(0x264)
+#define DPNI_CMDID_SET_BUFFER_LAYOUT		DPNI_CMD(0x265)
+
+#define DPNI_CMDID_SET_CONGESTION_NOTIFICATION	DPNI_CMD(0x267)
+#define DPNI_CMDID_GET_CONGESTION_NOTIFICATION	DPNI_CMD(0x268)
+#define DPNI_CMDID_SET_EARLY_DROP		DPNI_CMD_V2(0x269)
+#define DPNI_CMDID_GET_EARLY_DROP		DPNI_CMD_V2(0x26A)
+#define DPNI_CMDID_GET_OFFLOAD			DPNI_CMD(0x26B)
+#define DPNI_CMDID_SET_OFFLOAD			DPNI_CMD(0x26C)
+#define DPNI_CMDID_SET_TX_CONFIRMATION_MODE	DPNI_CMD(0x266)
+#define DPNI_CMDID_GET_TX_CONFIRMATION_MODE	DPNI_CMD(0x26D)
+
+/* Macros for accessing command fields smaller than 1byte */
+#define DPNI_MASK(field)	\
+	GENMASK(DPNI_##field##_SHIFT + DPNI_##field##_SIZE - 1, \
+		DPNI_##field##_SHIFT)
+#define dpni_set_field(var, field, val)	\
+	((var) |= (((val) << DPNI_##field##_SHIFT) & DPNI_MASK(field)))
+#define dpni_get_field(var, field)	\
+	(((var) & DPNI_MASK(field)) >> DPNI_##field##_SHIFT)
+
+#pragma pack(push, 1)
+struct dpni_cmd_open {
+	uint32_t dpni_id;
+};
+
+struct dpni_cmd_create {
+	uint32_t options;
+	uint8_t num_queues;
+	uint8_t num_tcs;
+	uint8_t mac_filter_entries;
+	uint8_t pad1;
+	uint8_t vlan_filter_entries;
+	uint8_t pad2;
+	uint8_t qos_entries;
+	uint8_t pad3;
+	uint16_t fs_entries;
+};
+
+struct dpni_cmd_destroy {
+	uint32_t dpsw_id;
+};
+
+#define DPNI_BACKUP_POOL(val, order)	(((val) & 0x1) << (order))
+
+struct dpni_cmd_pool {
+	uint16_t dpbp_id;
+	uint8_t priority_mask;
+	uint8_t pad;
+};
+
+struct dpni_cmd_set_pools {
+	uint8_t num_dpbp;
+	uint8_t backup_pool_mask;
+	uint16_t pad;
+	struct dpni_cmd_pool pool[8];
+	uint16_t buffer_size[8];
+};
+
+/* The enable indication is always the least significant bit */
+#define DPNI_ENABLE_SHIFT		0
+#define DPNI_ENABLE_SIZE		1
+
+struct dpni_rsp_is_enabled {
+	uint8_t enabled;
+};
+
+struct dpni_cmd_set_irq_enable {
+	uint8_t enable;
+	uint8_t pad[3];
+	uint8_t irq_index;
+};
+
+struct dpni_cmd_get_irq_enable {
+	uint32_t pad;
+	uint8_t irq_index;
+};
+
+struct dpni_rsp_get_irq_enable {
+	uint8_t enabled;
+};
+
+struct dpni_cmd_set_irq_mask {
+	uint32_t mask;
+	uint8_t irq_index;
+};
+
+struct dpni_cmd_get_irq_mask {
+	uint32_t pad;
+	uint8_t irq_index;
+};
+
+struct dpni_rsp_get_irq_mask {
+	uint32_t mask;
+};
+
+struct dpni_cmd_get_irq_status {
+	uint32_t status;
+	uint8_t irq_index;
+};
+
+struct dpni_rsp_get_irq_status {
+	uint32_t status;
+};
+
+struct dpni_cmd_clear_irq_status {
+	uint32_t status;
+	uint8_t irq_index;
+};
+
+struct dpni_rsp_get_attr {
+	/* response word 0 */
+	uint32_t options;
+	uint8_t num_queues;
+	uint8_t num_rx_tcs;
+	uint8_t mac_filter_entries;
+	uint8_t num_tx_tcs;
+	/* response word 1 */
+	uint8_t vlan_filter_entries;
+	uint8_t pad1;
+	uint8_t qos_entries;
+	uint8_t pad2;
+	uint16_t fs_entries;
+	uint16_t pad3;
+	/* response word 2 */
+	uint8_t qos_key_size;
+	uint8_t fs_key_size;
+	uint16_t wriop_version;
+};
+
+#define DPNI_ERROR_ACTION_SHIFT		0
+#define DPNI_ERROR_ACTION_SIZE		4
+#define DPNI_FRAME_ANN_SHIFT		4
+#define DPNI_FRAME_ANN_SIZE		1
+
+struct dpni_cmd_set_errors_behavior {
+	uint32_t errors;
+	/* from least significant bit: error_action:4, set_frame_annotation:1 */
+	uint8_t flags;
+};
+
+/* There are 3 separate commands for configuring Rx, Tx and Tx confirmation
+ * buffer layouts, but they all share the same parameters.
+ * If one of the functions changes, below structure needs to be split.
+ */
 
+#define DPNI_PASS_TS_SHIFT		0
+#define DPNI_PASS_TS_SIZE		1
+#define DPNI_PASS_PR_SHIFT		1
+#define DPNI_PASS_PR_SIZE		1
+#define DPNI_PASS_FS_SHIFT		2
+#define DPNI_PASS_FS_SIZE		1
+
+struct dpni_cmd_get_buffer_layout {
+	uint8_t qtype;
+};
+
+struct dpni_rsp_get_buffer_layout {
+	/* response word 0 */
+	uint8_t pad0[6];
+	/* from LSB: pass_timestamp:1, parser_result:1, frame_status:1 */
+	uint8_t flags;
+	uint8_t pad1;
+	/* response word 1 */
+	uint16_t private_data_size;
+	uint16_t data_align;
+	uint16_t head_room;
+	uint16_t tail_room;
+};
+
+struct dpni_cmd_set_buffer_layout {
+	/* cmd word 0 */
+	uint8_t qtype;
+	uint8_t pad0[3];
+	uint16_t options;
+	/* from LSB: pass_timestamp:1, parser_result:1, frame_status:1 */
+	uint8_t flags;
+	uint8_t pad1;
+	/* cmd word 1 */
+	uint16_t private_data_size;
+	uint16_t data_align;
+	uint16_t head_room;
+	uint16_t tail_room;
+};
+
+struct dpni_cmd_set_offload {
+	uint8_t pad[3];
+	uint8_t dpni_offload;
+	uint32_t config;
+};
+
+struct dpni_cmd_get_offload {
+	uint8_t pad[3];
+	uint8_t dpni_offload;
+};
+
+struct dpni_rsp_get_offload {
+	uint32_t pad;
+	uint32_t config;
+};
+
+struct dpni_cmd_get_qdid {
+	uint8_t qtype;
+};
+
+struct dpni_rsp_get_qdid {
+	uint16_t qdid;
+};
+
+struct dpni_rsp_get_sp_info {
+	uint16_t spids[2];
+};
+
+struct dpni_rsp_get_tx_data_offset {
+	uint16_t data_offset;
+};
+
+struct dpni_cmd_get_statistics {
+	uint8_t page_number;
+	uint8_t param;
+};
+
+struct dpni_rsp_get_statistics {
+	uint64_t counter[7];
+};
+
+struct dpni_cmd_set_link_cfg {
+	uint64_t pad0;
+	uint32_t rate;
+	uint32_t pad1;
+	uint64_t options;
+};
+
+#define DPNI_LINK_STATE_SHIFT		0
+#define DPNI_LINK_STATE_SIZE		1
+
+struct dpni_rsp_get_link_state {
+	uint32_t pad0;
+	/* from LSB: up:1 */
+	uint8_t flags;
+	uint8_t pad1[3];
+	uint32_t rate;
+	uint32_t pad2;
+	uint64_t options;
+};
+
+struct dpni_cmd_set_max_frame_length {
+	uint16_t max_frame_length;
+};
+
+struct dpni_rsp_get_max_frame_length {
+	uint16_t max_frame_length;
+};
+
+struct dpni_cmd_set_multicast_promisc {
+	uint8_t enable;
+};
+
+struct dpni_rsp_get_multicast_promisc {
+	uint8_t enabled;
+};
+
+struct dpni_cmd_set_unicast_promisc {
+	uint8_t enable;
+};
+
+struct dpni_rsp_get_unicast_promisc {
+	uint8_t enabled;
+};
+
+struct dpni_cmd_set_primary_mac_addr {
+	uint16_t pad;
+	uint8_t mac_addr[6];
+};
+
+struct dpni_rsp_get_primary_mac_addr {
+	uint16_t pad;
+	uint8_t mac_addr[6];
+};
+
+struct dpni_rsp_get_port_mac_addr {
+	uint16_t pad;
+	uint8_t mac_addr[6];
+};
+
+struct dpni_cmd_add_mac_addr {
+	uint16_t pad;
+	uint8_t mac_addr[6];
+};
+
+struct dpni_cmd_remove_mac_addr {
+	uint16_t pad;
+	uint8_t mac_addr[6];
+};
+
+#define DPNI_UNICAST_FILTERS_SHIFT	0
+#define DPNI_UNICAST_FILTERS_SIZE	1
+#define DPNI_MULTICAST_FILTERS_SHIFT	1
+#define DPNI_MULTICAST_FILTERS_SIZE	1
+
+struct dpni_cmd_clear_mac_filters {
+	/* from LSB: unicast:1, multicast:1 */
+	uint8_t flags;
+};
+
+struct dpni_cmd_enable_vlan_filter {
+	/* only the LSB */
+	uint8_t en;
+};
+
+struct dpni_cmd_vlan_id {
+	uint32_t pad;
+	uint16_t vlan_id;
+};
+
+#define DPNI_SEPARATE_GRP_SHIFT 0
+#define DPNI_SEPARATE_GRP_SIZE  1
+#define DPNI_MODE_1_SHIFT		0
+#define DPNI_MODE_1_SIZE		4
+#define DPNI_MODE_2_SHIFT		4
+#define DPNI_MODE_2_SIZE		4
+
+struct dpni_cmd_set_tx_priorities {
+	uint16_t flags;
+	uint8_t prio_group_A;
+	uint8_t prio_group_B;
+	uint32_t pad0;
+	uint8_t modes[4];
+	uint32_t pad1;
+	uint64_t pad2;
+	uint16_t delta_bandwidth[8];
+};
+
+#define DPNI_DIST_MODE_SHIFT		0
+#define DPNI_DIST_MODE_SIZE		4
+#define DPNI_MISS_ACTION_SHIFT		4
+#define DPNI_MISS_ACTION_SIZE		4
+#define DPNI_KEEP_HASH_KEY_SHIFT	7
+#define DPNI_KEEP_HASH_KEY_SIZE		1
+
+struct dpni_cmd_set_rx_tc_dist {
+	uint16_t dist_size;
+	uint8_t tc_id;
+	/* from LSB: dist_mode:4, miss_action:4 */
+	uint8_t flags;
+	uint8_t pad0;
+	/* only the LSB */
+	uint8_t keep_hash_key;
+	uint16_t default_flow_id;
+	uint64_t pad1[5];
+	uint64_t key_cfg_iova;
+};
+
+struct dpni_cmd_get_queue {
+	uint8_t qtype;
+	uint8_t tc;
+	uint8_t index;
+};
+
+#define DPNI_DEST_TYPE_SHIFT		0
+#define DPNI_DEST_TYPE_SIZE		4
+#define DPNI_STASH_CTRL_SHIFT		6
+#define DPNI_STASH_CTRL_SIZE		1
+#define DPNI_HOLD_ACTIVE_SHIFT		7
+#define DPNI_HOLD_ACTIVE_SIZE		1
+
+struct dpni_rsp_get_queue {
+	/* response word 0 */
+	uint64_t pad0;
+	/* response word 1 */
+	uint32_t dest_id;
+	uint16_t pad1;
+	uint8_t dest_prio;
+	/* From LSB: dest_type:4, pad:2, flc_stash_ctrl:1, hold_active:1 */
+	uint8_t flags;
+	/* response word 2 */
+	uint64_t flc;
+	/* response word 3 */
+	uint64_t user_context;
+	/* response word 4 */
+	uint32_t fqid;
+	uint16_t qdbin;
+};
+
+struct dpni_cmd_set_queue {
+	/* cmd word 0 */
+	uint8_t qtype;
+	uint8_t tc;
+	uint8_t index;
+	uint8_t options;
+	uint32_t pad0;
+	/* cmd word 1 */
+	uint32_t dest_id;
+	uint16_t pad1;
+	uint8_t dest_prio;
+	uint8_t flags;
+	/* cmd word 2 */
+	uint64_t flc;
+	/* cmd word 3 */
+	uint64_t user_context;
+};
+
+#define DPNI_DROP_ENABLE_SHIFT	0
+#define DPNI_DROP_ENABLE_SIZE	1
+#define DPNI_DROP_UNITS_SHIFT	2
+#define DPNI_DROP_UNITS_SIZE	2
+
+struct dpni_early_drop {
+	/* from LSB: enable:1 units:2 */
+	uint8_t flags;
+	uint8_t pad0[3];
+	uint32_t pad1;
+	uint8_t green_drop_probability;
+	uint8_t pad2[7];
+	uint64_t green_max_threshold;
+	uint64_t green_min_threshold;
+	uint64_t pad3;
+	uint8_t yellow_drop_probability;
+	uint8_t pad4[7];
+	uint64_t yellow_max_threshold;
+	uint64_t yellow_min_threshold;
+	uint64_t pad5;
+	uint8_t red_drop_probability;
+	uint8_t pad6[7];
+	uint64_t red_max_threshold;
+	uint64_t red_min_threshold;
+};
+
+struct dpni_cmd_early_drop {
+	uint8_t qtype;
+	uint8_t tc;
+	uint8_t pad[6];
+	uint64_t early_drop_iova;
+};
+
+struct dpni_rsp_get_api_version {
+	uint16_t major;
+	uint16_t minor;
+};
+
+struct dpni_cmd_get_taildrop {
+	uint8_t congestion_point;
+	uint8_t qtype;
+	uint8_t tc;
+	uint8_t index;
+};
+
+struct dpni_rsp_get_taildrop {
+	/* cmd word 0 */
+	uint64_t pad0;
+	/* cmd word 1 */
+	/* from LSB: enable:1 oal_lo:7 */
+	uint8_t enable_oal_lo;
+	/* from LSB: oal_hi:5 */
+	uint8_t oal_hi;
+	uint8_t units;
+	uint8_t pad2;
+	uint32_t threshold;
+};
+
+#define DPNI_OAL_LO_SHIFT	1
+#define DPNI_OAL_LO_SIZE	7
+#define DPNI_OAL_HI_SHIFT	0
+#define DPNI_OAL_HI_SIZE	5
+
+struct dpni_cmd_set_taildrop {
+	/* cmd word 0 */
+	uint8_t congestion_point;
+	uint8_t qtype;
+	uint8_t tc;
+	uint8_t index;
+	uint32_t pad0;
+	/* cmd word 1 */
+	/* from LSB: enable:1 oal_lo:7 */
+	uint8_t enable_oal_lo;
+	/* from LSB: oal_hi:5 */
+	uint8_t oal_hi;
+	uint8_t units;
+	uint8_t pad2;
+	uint32_t threshold;
+};
+
+struct dpni_tx_confirmation_mode {
+	uint32_t pad;
+	uint8_t confirmation_mode;
+};
+
+#define DPNI_DEST_TYPE_SHIFT		0
+#define DPNI_DEST_TYPE_SIZE		4
+#define DPNI_CONG_UNITS_SHIFT		4
+#define DPNI_CONG_UNITS_SIZE		2
+
+struct dpni_cmd_set_congestion_notification {
+	uint8_t qtype;
+	uint8_t tc;
+	uint8_t pad[6];
+	uint32_t dest_id;
+	uint16_t notification_mode;
+	uint8_t dest_priority;
+	/* from LSB: dest_type: 4 units:2 */
+	uint8_t type_units;
+	uint64_t message_iova;
+	uint64_t message_ctx;
+	uint32_t threshold_entry;
+	uint32_t threshold_exit;
+};
+
+struct dpni_cmd_get_congestion_notification {
+	uint8_t qtype;
+	uint8_t tc;
+};
+
+struct dpni_rsp_get_congestion_notification {
+	uint64_t pad;
+	uint32_t dest_id;
+	uint16_t notification_mode;
+	uint8_t dest_priority;
+	/* from LSB: dest_type: 4 units:2 */
+	uint8_t type_units;
+	uint64_t message_iova;
+	uint64_t message_ctx;
+	uint32_t threshold_entry;
+	uint32_t threshold_exit;
+};
+
+#pragma pack(pop)
 #endif /* _FSL_DPNI_CMD_H */
diff --git a/drivers/net/dpaa2/mc/fsl_net.h b/drivers/net/dpaa2/mc/fsl_net.h
index ef7e4dac..dbec306a 100644
--- a/drivers/net/dpaa2/mc/fsl_net.h
+++ b/drivers/net/dpaa2/mc/fsl_net.h
@@ -213,7 +213,7 @@
 #define NH_FLD_SCTP_CHUNK_DATA_STREAM_SQN     (NH_FLD_SCTP_CHUNK_DATA_TYPE << 5)
 #define NH_FLD_SCTP_CHUNK_DATA_PAYLOAD_PID    (NH_FLD_SCTP_CHUNK_DATA_TYPE << 6)
 #define NH_FLD_SCTP_CHUNK_DATA_UNORDERED      (NH_FLD_SCTP_CHUNK_DATA_TYPE << 7)
-#define NH_FLD_SCTP_CHUNK_DATA_BEGINNING      (NH_FLD_SCTP_CHUNK_DATA_TYPE << 8)
+#define NH_FLD_SCTP_CHUNK_DATA_BEGGINNING     (NH_FLD_SCTP_CHUNK_DATA_TYPE << 8)
 #define NH_FLD_SCTP_CHUNK_DATA_END            (NH_FLD_SCTP_CHUNK_DATA_TYPE << 9)
 #define NH_FLD_SCTP_CHUNK_DATA_ALL_FIELDS \
 	((NH_FLD_SCTP_CHUNK_DATA_TYPE << 10) - 1)
diff --git a/drivers/net/dpaa2/rte_pmd_dpaa2_version.map b/drivers/net/dpaa2/rte_pmd_dpaa2_version.map
index 8591cc0b..09f4364b 100644
--- a/drivers/net/dpaa2/rte_pmd_dpaa2_version.map
+++ b/drivers/net/dpaa2/rte_pmd_dpaa2_version.map
@@ -2,3 +2,11 @@ DPDK_17.05 {
 
 	local: *;
 };
+
+DPDK_17.11 {
+	global:
+
+	dpaa2_eth_eventq_attach;
+	dpaa2_eth_eventq_detach;
+
+} DPDK_17.05;
diff --git a/drivers/net/e1000/Makefile b/drivers/net/e1000/Makefile
index ffdf36d3..3f0344b4 100644
--- a/drivers/net/e1000/Makefile
+++ b/drivers/net/e1000/Makefile
@@ -38,6 +38,9 @@ LIB = librte_pmd_e1000.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 EXPORT_MAP := rte_pmd_e1000_version.map
 
@@ -68,7 +71,7 @@ endif
 # Add extra flags for base driver files (also known as shared code)
 # to disable warnings in them
 #
-BASE_DRIVER_OBJS=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))
+BASE_DRIVER_OBJS=$(sort $(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))))
 $(foreach obj, $(BASE_DRIVER_OBJS), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
 
 VPATH += $(SRCDIR)/base
diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c
index 3d4ab936..a0c3b4dc 100644
--- a/drivers/net/e1000/em_ethdev.c
+++ b/drivers/net/e1000/em_ethdev.c
@@ -43,11 +43,11 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_ethdev_pci.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
 #include <rte_atomic.h>
 #include <rte_malloc.h>
@@ -72,7 +72,7 @@ static void eth_em_allmulticast_enable(struct rte_eth_dev *dev);
 static void eth_em_allmulticast_disable(struct rte_eth_dev *dev);
 static int eth_em_link_update(struct rte_eth_dev *dev,
 				int wait_to_complete);
-static void eth_em_stats_get(struct rte_eth_dev *dev,
+static int eth_em_stats_get(struct rte_eth_dev *dev,
 				struct rte_eth_stats *rte_stats);
 static void eth_em_stats_reset(struct rte_eth_dev *dev);
 static void eth_em_infos_get(struct rte_eth_dev *dev,
@@ -99,7 +99,7 @@ static int eth_em_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
 
 static int eth_em_vlan_filter_set(struct rte_eth_dev *dev,
 		uint16_t vlan_id, int on);
-static void eth_em_vlan_offload_set(struct rte_eth_dev *dev, int mask);
+static int eth_em_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 static void em_vlan_hw_filter_enable(struct rte_eth_dev *dev);
 static void em_vlan_hw_filter_disable(struct rte_eth_dev *dev);
 static void em_vlan_hw_strip_enable(struct rte_eth_dev *dev);
@@ -341,7 +341,6 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev)
 	}
 
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
 	hw->device_id = pci_dev->id.device_id;
@@ -432,7 +431,8 @@ static int eth_em_pci_remove(struct rte_pci_device *pci_dev)
 
 static struct rte_pci_driver rte_em_pmd = {
 	.id_table = pci_id_em_map,
-	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+		     RTE_PCI_DRV_IOVA_AS_VA,
 	.probe = eth_em_pci_probe,
 	.remove = eth_em_pci_remove,
 };
@@ -668,7 +668,12 @@ eth_em_start(struct rte_eth_dev *dev)
 
 	mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK | \
 			ETH_VLAN_EXTEND_MASK;
-	eth_em_vlan_offload_set(dev, mask);
+	ret = eth_em_vlan_offload_set(dev, mask);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Unable to update vlan offload");
+		em_dev_clear_queues(dev);
+		return ret;
+	}
 
 	/* Set Interrupt Throttling Rate to maximum allowed value. */
 	E1000_WRITE_REG(hw, E1000_ITR, UINT16_MAX);
@@ -906,7 +911,7 @@ em_hardware_init(struct e1000_hw *hw)
 }
 
 /* This function is based on em_update_stats_counters() in e1000/if_em.c */
-static void
+static int
 eth_em_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
 {
 	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -1006,7 +1011,7 @@ eth_em_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
 	}
 
 	if (rte_stats == NULL)
-		return;
+		return -EINVAL;
 
 	/* Rx Errors */
 	rte_stats->imissed = stats->mpc;
@@ -1021,6 +1026,7 @@ eth_em_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
 	rte_stats->opackets = stats->gptc;
 	rte_stats->ibytes   = stats->gorc;
 	rte_stats->obytes   = stats->gotc;
+	return 0;
 }
 
 static void
@@ -1447,7 +1453,7 @@ em_vlan_hw_strip_enable(struct rte_eth_dev *dev)
 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
 }
 
-static void
+static int
 eth_em_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	if(mask & ETH_VLAN_STRIP_MASK){
@@ -1463,6 +1469,8 @@ eth_em_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 		else
 			em_vlan_hw_filter_disable(dev);
 	}
+
+	return 0;
 }
 
 /*
@@ -1624,7 +1632,7 @@ eth_em_interrupt_action(struct rte_eth_dev *dev,
 	rte_em_dev_atomic_read_link_status(dev, &link);
 	if (link.link_status) {
 		PMD_INIT_LOG(INFO, " Port %d: Link Up - speed %u Mbps - %s",
-			     dev->data->port_id, (unsigned)link.link_speed,
+			     dev->data->port_id, link.link_speed,
 			     link.link_duplex == ETH_LINK_FULL_DUPLEX ?
 			     "full-duplex" : "half-duplex");
 	} else {
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 31819c5b..1d8f0794 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -119,7 +119,7 @@ struct em_rx_queue {
 	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
 	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
 	uint16_t            queue_id;   /**< RX queue index. */
-	uint8_t             port_id;    /**< Device port identifier. */
+	uint16_t            port_id;    /**< Device port identifier. */
 	uint8_t             pthresh;    /**< Prefetch threshold register. */
 	uint8_t             hthresh;    /**< Host threshold register. */
 	uint8_t             wthresh;    /**< Write-back threshold register. */
@@ -186,7 +186,7 @@ struct em_tx_queue {
 	/** Total number of TX descriptors ready to be allocated. */
 	uint16_t               nb_tx_free;
 	uint16_t               queue_id; /**< TX queue index. */
-	uint8_t                port_id;  /**< Device port identifier. */
+	uint16_t               port_id;  /**< Device port identifier. */
 	uint8_t                pthresh;  /**< Prefetch threshold register. */
 	uint8_t                hthresh;  /**< Host threshold register. */
 	uint8_t                wthresh;  /**< Write-back threshold register. */
@@ -577,7 +577,7 @@ eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			 * Set up Transmit Data Descriptor.
 			 */
 			slen = m_seg->data_len;
-			buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
+			buf_dma_addr = rte_mbuf_data_iova(m_seg);
 
 			txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
 			txd->lower.data = rte_cpu_to_le_32(cmd_type_len | slen);
@@ -675,7 +675,7 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
 
 	/* Check if VLAN present */
 	pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
-		PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED : 0);
+		PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
 
 	return pkt_flags;
 }
@@ -799,7 +799,7 @@ eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxm = rxe->mbuf;
 		rxe->mbuf = nmb;
 		dma_addr =
-			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->buffer_addr = dma_addr;
 		rxdp->status = 0;
 
@@ -830,7 +830,7 @@ eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxm->ol_flags = rxm->ol_flags |
 				rx_desc_error_to_pkt_flags(rxd.errors);
 
-		/* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
+		/* Only valid if PKT_RX_VLAN set in pkt_flags */
 		rxm->vlan_tci = rte_le_to_cpu_16(rxd.special);
 
 		/*
@@ -979,7 +979,7 @@ eth_em_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		rxm = rxe->mbuf;
 		rxe->mbuf = nmb;
-		dma = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+		dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->buffer_addr = dma;
 		rxdp->status = 0;
 
@@ -1056,7 +1056,7 @@ eth_em_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		first_seg->ol_flags = first_seg->ol_flags |
 					rx_desc_error_to_pkt_flags(rxd.errors);
 
-		/* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
+		/* Only valid if PKT_RX_VLAN set in pkt_flags */
 		rxm->vlan_tci = rte_le_to_cpu_16(rxd.special);
 
 		/* Prefetch data of first segment, if configured to do so. */
@@ -1289,7 +1289,7 @@ eth_em_tx_queue_setup(struct rte_eth_dev *dev,
 	txq->port_id = dev->data->port_id;
 
 	txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(queue_idx));
-	txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+	txq->tx_ring_phys_addr = tz->iova;
 	txq->tx_ring = (struct e1000_data_desc *) tz->addr;
 
 	PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
@@ -1416,7 +1416,7 @@ eth_em_rx_queue_setup(struct rte_eth_dev *dev,
 
 	rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(queue_idx));
 	rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(queue_idx));
-	rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (struct e1000_rx_desc *) rz->addr;
 
 	PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
@@ -1652,7 +1652,7 @@ em_alloc_rx_queue_mbufs(struct em_rx_queue *rxq)
 		}
 
 		dma_addr =
-			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
 		/* Clear HW ring memory */
 		rxq->rx_ring[i] = rxd_init;
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index e4f7a9fa..fdc139f3 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -43,11 +43,11 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_ethdev_pci.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
 #include <rte_atomic.h>
 #include <rte_malloc.h>
@@ -112,7 +112,7 @@ static void eth_igb_allmulticast_enable(struct rte_eth_dev *dev);
 static void eth_igb_allmulticast_disable(struct rte_eth_dev *dev);
 static int  eth_igb_link_update(struct rte_eth_dev *dev,
 				int wait_to_complete);
-static void eth_igb_stats_get(struct rte_eth_dev *dev,
+static int eth_igb_stats_get(struct rte_eth_dev *dev,
 				struct rte_eth_stats *rte_stats);
 static int eth_igb_xstats_get(struct rte_eth_dev *dev,
 			      struct rte_eth_xstat *xstats, unsigned n);
@@ -157,7 +157,7 @@ static int eth_igb_vlan_filter_set(struct rte_eth_dev *dev,
 static int eth_igb_vlan_tpid_set(struct rte_eth_dev *dev,
 				 enum rte_vlan_type vlan_type,
 				 uint16_t tpid_id);
-static void eth_igb_vlan_offload_set(struct rte_eth_dev *dev, int mask);
+static int eth_igb_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 
 static void igb_vlan_hw_filter_enable(struct rte_eth_dev *dev);
 static void igb_vlan_hw_filter_disable(struct rte_eth_dev *dev);
@@ -188,7 +188,7 @@ static void igbvf_promiscuous_disable(struct rte_eth_dev *dev);
 static void igbvf_allmulticast_enable(struct rte_eth_dev *dev);
 static void igbvf_allmulticast_disable(struct rte_eth_dev *dev);
 static int eth_igbvf_link_update(struct e1000_hw *hw);
-static void eth_igbvf_stats_get(struct rte_eth_dev *dev,
+static int eth_igbvf_stats_get(struct rte_eth_dev *dev,
 				struct rte_eth_stats *rte_stats);
 static int eth_igbvf_xstats_get(struct rte_eth_dev *dev,
 				struct rte_eth_xstat *xstats, unsigned n);
@@ -334,6 +334,8 @@ static const struct rte_pci_id pci_id_igb_map[] = {
 	{ RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER) },
 	{ RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES) },
 	{ RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII) },
+	{ RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS) },
+	{ RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS) },
 	{ RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER) },
 	{ RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
 	{ RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII) },
@@ -816,7 +818,6 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev)
 	}
 
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	hw->hw_addr= (void *)pci_dev->mem_resource[0].addr;
 
@@ -1051,7 +1052,6 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
 
 	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	hw->device_id = pci_dev->id.device_id;
 	hw->vendor_id = pci_dev->id.vendor_id;
@@ -1166,7 +1166,8 @@ static int eth_igb_pci_remove(struct rte_pci_device *pci_dev)
 
 static struct rte_pci_driver rte_igb_pmd = {
 	.id_table = pci_id_igb_map,
-	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+		     RTE_PCI_DRV_IOVA_AS_VA,
 	.probe = eth_igb_pci_probe,
 	.remove = eth_igb_pci_remove,
 };
@@ -1189,7 +1190,7 @@ static int eth_igbvf_pci_remove(struct rte_pci_device *pci_dev)
  */
 static struct rte_pci_driver rte_igbvf_pmd = {
 	.id_table = pci_id_igbvf_map,
-	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_IOVA_AS_VA,
 	.probe = eth_igbvf_pci_probe,
 	.remove = eth_igbvf_pci_remove,
 };
@@ -1400,7 +1401,12 @@ eth_igb_start(struct rte_eth_dev *dev)
 	 */
 	mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK | \
 			ETH_VLAN_EXTEND_MASK;
-	eth_igb_vlan_offload_set(dev, mask);
+	ret = eth_igb_vlan_offload_set(dev, mask);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Unable to set vlan offload");
+		igb_dev_clear_queues(dev);
+		return ret;
+	}
 
 	if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_ONLY) {
 		/* Enable VLAN filter since VMDq always use VLAN filter */
@@ -1828,7 +1834,7 @@ igb_read_stats_registers(struct e1000_hw *hw, struct e1000_hw_stats *stats)
 	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
 }
 
-static void
+static int
 eth_igb_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
 {
 	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -1838,7 +1844,7 @@ eth_igb_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
 	igb_read_stats_registers(hw, stats);
 
 	if (rte_stats == NULL)
-		return;
+		return -EINVAL;
 
 	/* Rx Errors */
 	rte_stats->imissed = stats->mpc;
@@ -1853,6 +1859,7 @@ eth_igb_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
 	rte_stats->opackets = stats->gptc;
 	rte_stats->ibytes   = stats->gorc;
 	rte_stats->obytes   = stats->gotc;
+	return 0;
 }
 
 static void
@@ -2093,7 +2100,7 @@ eth_igbvf_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 	return IGBVF_NB_XSTATS;
 }
 
-static void
+static int
 eth_igbvf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
 {
 	struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -2103,12 +2110,13 @@ eth_igbvf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
 	igbvf_read_stats_registers(hw, hw_stats);
 
 	if (rte_stats == NULL)
-		return;
+		return -EINVAL;
 
 	rte_stats->ipackets = hw_stats->gprc;
 	rte_stats->ibytes = hw_stats->gorc;
 	rte_stats->opackets = hw_stats->gptc;
 	rte_stats->obytes = hw_stats->gotc;
+	return 0;
 }
 
 static void
@@ -2715,7 +2723,7 @@ igb_vlan_hw_extend_enable(struct rte_eth_dev *dev)
 						2 * VLAN_TAG_SIZE);
 }
 
-static void
+static int
 eth_igb_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	if(mask & ETH_VLAN_STRIP_MASK){
@@ -2738,6 +2746,8 @@ eth_igb_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 		else
 			igb_vlan_hw_extend_disable(dev);
 	}
+
+	return 0;
 }
 
 
@@ -4094,7 +4104,7 @@ eth_igb_get_flex_filter(struct rte_eth_dev *dev,
 	flex_filter.filter_info.priority = filter->priority;
 	memcpy(flex_filter.filter_info.dwords, filter->bytes, filter->len);
 	memcpy(flex_filter.filter_info.mask, filter->mask,
-			RTE_ALIGN(filter->len, sizeof(char)) / sizeof(char));
+			RTE_ALIGN(filter->len, CHAR_BIT) / CHAR_BIT);
 
 	it = eth_igb_flex_filter_lookup(&filter_info->flex_list,
 				&flex_filter.filter_info);
@@ -5382,7 +5392,14 @@ eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
 	struct e1000_hw *hw =
 		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	uint32_t mask = 1 << queue_id;
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+	uint32_t vec = E1000_MISC_VEC_ID;
+
+	if (rte_intr_allow_others(intr_handle))
+		vec = E1000_RX_VEC_START;
+
+	uint32_t mask = 1 << (queue_id + vec);
 
 	E1000_WRITE_REG(hw, E1000_EIMC, mask);
 	E1000_WRITE_FLUSH(hw);
@@ -5397,7 +5414,12 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
-	uint32_t mask = 1 << queue_id;
+	uint32_t vec = E1000_MISC_VEC_ID;
+
+	if (rte_intr_allow_others(intr_handle))
+		vec = E1000_RX_VEC_START;
+
+	uint32_t mask = 1 << (queue_id + vec);
 	uint32_t regval;
 
 	regval = E1000_READ_REG(hw, E1000_EIMS);
diff --git a/drivers/net/e1000/igb_flow.c b/drivers/net/e1000/igb_flow.c
index ed2ecc40..22bad265 100644
--- a/drivers/net/e1000/igb_flow.c
+++ b/drivers/net/e1000/igb_flow.c
@@ -47,7 +47,6 @@
 #include <rte_ethdev.h>
 #include <rte_ethdev_pci.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
 #include <rte_atomic.h>
 #include <rte_malloc.h>
@@ -1346,7 +1345,7 @@ igb_flow_create(struct rte_eth_dev *dev,
 		if (!ret) {
 			ntuple_filter_ptr = rte_zmalloc("igb_ntuple_filter",
 				sizeof(struct igb_ntuple_filter_ele), 0);
-			(void)rte_memcpy(&ntuple_filter_ptr->filter_info,
+			rte_memcpy(&ntuple_filter_ptr->filter_info,
 				&ntuple_filter,
 				sizeof(struct rte_eth_ntuple_filter));
 			TAILQ_INSERT_TAIL(&igb_filter_ntuple_list,
@@ -1368,7 +1367,7 @@ igb_flow_create(struct rte_eth_dev *dev,
 			ethertype_filter_ptr = rte_zmalloc(
 				"igb_ethertype_filter",
 				sizeof(struct igb_ethertype_filter_ele), 0);
-			(void)rte_memcpy(&ethertype_filter_ptr->filter_info,
+			rte_memcpy(&ethertype_filter_ptr->filter_info,
 				&ethertype_filter,
 				sizeof(struct rte_eth_ethertype_filter));
 			TAILQ_INSERT_TAIL(&igb_filter_ethertype_list,
@@ -1388,7 +1387,7 @@ igb_flow_create(struct rte_eth_dev *dev,
 		if (!ret) {
 			syn_filter_ptr = rte_zmalloc("igb_syn_filter",
 				sizeof(struct igb_eth_syn_filter_ele), 0);
-			(void)rte_memcpy(&syn_filter_ptr->filter_info,
+			rte_memcpy(&syn_filter_ptr->filter_info,
 				&syn_filter,
 				sizeof(struct rte_eth_syn_filter));
 			TAILQ_INSERT_TAIL(&igb_filter_syn_list,
@@ -1409,7 +1408,7 @@ igb_flow_create(struct rte_eth_dev *dev,
 		if (!ret) {
 			flex_filter_ptr = rte_zmalloc("igb_flex_filter",
 				sizeof(struct igb_flex_filter_ele), 0);
-			(void)rte_memcpy(&flex_filter_ptr->filter_info,
+			rte_memcpy(&flex_filter_ptr->filter_info,
 				&flex_filter,
 				sizeof(struct rte_eth_flex_filter));
 			TAILQ_INSERT_TAIL(&igb_filter_flex_list,
diff --git a/drivers/net/e1000/igb_pf.c b/drivers/net/e1000/igb_pf.c
index 6809d30c..cd6ae2fb 100644
--- a/drivers/net/e1000/igb_pf.c
+++ b/drivers/net/e1000/igb_pf.c
@@ -39,6 +39,7 @@
 #include <stdarg.h>
 #include <inttypes.h>
 
+#include <rte_bus_pci.h>
 #include <rte_interrupts.h>
 #include <rte_log.h>
 #include <rte_debug.h>
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index 1c80a2a1..4ee12e9e 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -105,6 +105,13 @@ struct igb_tx_entry {
 };
 
 /**
+ * rx queue flags
+ */
+enum igb_rxq_flags {
+	IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
+};
+
+/**
  * Structure associated with each RX queue.
  */
 struct igb_rx_queue {
@@ -122,12 +129,13 @@ struct igb_rx_queue {
 	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
 	uint16_t            queue_id;   /**< RX queue index. */
 	uint16_t            reg_idx;    /**< RX queue register index. */
-	uint8_t             port_id;    /**< Device port identifier. */
+	uint16_t            port_id;    /**< Device port identifier. */
 	uint8_t             pthresh;    /**< Prefetch threshold register. */
 	uint8_t             hthresh;    /**< Host threshold register. */
 	uint8_t             wthresh;    /**< Write-back threshold register. */
 	uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
 	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
+	uint32_t            flags;      /**< RX flags. */
 };
 
 /**
@@ -191,7 +199,7 @@ struct igb_tx_queue {
 	/**< Index of first used TX descriptor. */
 	uint16_t               queue_id; /**< TX queue index. */
 	uint16_t               reg_idx;  /**< TX queue register index. */
-	uint8_t                port_id;  /**< Device port identifier. */
+	uint16_t               port_id;  /**< Device port identifier. */
 	uint8_t                pthresh;  /**< Prefetch threshold register. */
 	uint8_t                hthresh;  /**< Host threshold register. */
 	uint8_t                wthresh;  /**< Write-back threshold register. */
@@ -589,7 +597,7 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			 * Set up transmit descriptor.
 			 */
 			slen = (uint16_t) m_seg->data_len;
-			buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
+			buf_dma_addr = rte_mbuf_data_iova(m_seg);
 			txd->read.buffer_addr =
 				rte_cpu_to_le_64(buf_dma_addr);
 			txd->read.cmd_type_len =
@@ -785,7 +793,7 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status)
 
 	/* Check if VLAN present */
 	pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
-		PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED : 0);
+		PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
 
 #if defined(RTE_LIBRTE_IEEE1588)
 	if (rx_status & E1000_RXD_STAT_TMST)
@@ -917,7 +925,7 @@ eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxm = rxe->mbuf;
 		rxe->mbuf = nmb;
 		dma_addr =
-			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
 		rxdp->read.pkt_addr = dma_addr;
 
@@ -946,9 +954,17 @@ eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 		rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
 		hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
-		/* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
-		rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
 
+		/*
+		 * The vlan_tci field is only valid when PKT_RX_VLAN is
+		 * set in the pkt_flags field and must be in CPU byte order.
+		 */
+		if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
+				(rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
+			rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
+		} else {
+			rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
+		}
 		pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
 		pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
 		pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
@@ -1103,7 +1119,7 @@ eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 */
 		rxm = rxe->mbuf;
 		rxe->mbuf = nmb;
-		dma = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+		dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.pkt_addr = dma;
 		rxdp->read.hdr_addr = 0;
 
@@ -1180,10 +1196,17 @@ eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
 
 		/*
-		 * The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
-		 * set in the pkt_flags field.
+		 * The vlan_tci field is only valid when PKT_RX_VLAN is
+		 * set in the pkt_flags field and must be in CPU byte order.
 		 */
-		first_seg->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
+		if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
+				(rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
+			first_seg->vlan_tci =
+				rte_be_to_cpu_16(rxd.wb.upper.vlan);
+		} else {
+			first_seg->vlan_tci =
+				rte_le_to_cpu_16(rxd.wb.upper.vlan);
+		}
 		hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
 		pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
 		pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
@@ -1530,7 +1553,7 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
 	txq->port_id = dev->data->port_id;
 
 	txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
-	txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+	txq->tx_ring_phys_addr = tz->iova;
 
 	txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
 	/* Allocate software ring */
@@ -1667,7 +1690,7 @@ eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
 	}
 	rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
 	rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
-	rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
 
 	/* Allocate software ring. */
@@ -2180,7 +2203,7 @@ igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
 			return -ENOMEM;
 		}
 		dma_addr =
-			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 		rxd = &rxq->rx_ring[i];
 		rxd->read.hdr_addr = 0;
 		rxd->read.pkt_addr = dma_addr;
@@ -2278,6 +2301,17 @@ eth_igb_rx_init(struct rte_eth_dev *dev)
 
 		rxq = dev->data->rx_queues[i];
 
+		rxq->flags = 0;
+		/*
+		 * i350 and i354 vlan packets have vlan tags byte swapped.
+		 */
+		if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
+			rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
+			PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
+		} else {
+			PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
+		}
+
 		/* Allocate buffers for descriptor rings and set up queue */
 		ret = igb_alloc_rx_queue_mbufs(rxq);
 		if (ret)
@@ -2557,6 +2591,17 @@ eth_igbvf_rx_init(struct rte_eth_dev *dev)
 
 		rxq = dev->data->rx_queues[i];
 
+		rxq->flags = 0;
+		/*
+		 * i350VF LB vlan packets have vlan tags byte swapped.
+		 */
+		if (hw->mac.type == e1000_vfadapt_i350) {
+			rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
+			PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
+		} else {
+			PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
+		}
+
 		/* Allocate buffers for descriptor rings and set up queue */
 		ret = igb_alloc_rx_queue_mbufs(rxq);
 		if (ret)
diff --git a/drivers/net/ena/Makefile b/drivers/net/ena/Makefile
index bf1f5da0..f9bfe053 100644
--- a/drivers/net/ena/Makefile
+++ b/drivers/net/ena/Makefile
@@ -52,5 +52,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += ena_com.c
 SRCS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += ena_eth_com.c
 
 CFLAGS += $(INCLUDES)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/ena/base/ena_plat_dpdk.h b/drivers/net/ena/base/ena_plat_dpdk.h
index 71a8c1e2..accecf51 100644
--- a/drivers/net/ena/base/ena_plat_dpdk.h
+++ b/drivers/net/ena/base/ena_plat_dpdk.h
@@ -191,7 +191,7 @@ typedef uint64_t dma_addr_t;
 		mz = rte_memzone_reserve(z_name, size, SOCKET_ID_ANY, 0); \
 		memset(mz->addr, 0, size);				\
 		virt = mz->addr;					\
-		phys = mz->phys_addr;					\
+		phys = mz->iova;					\
 		handle = mz;						\
 	} while (0)
 #define ENA_MEM_FREE_COHERENT(dmadev, size, virt, phys, handle) 	\
@@ -209,7 +209,7 @@ typedef uint64_t dma_addr_t;
 		mz = rte_memzone_reserve(z_name, size, node, 0); \
 		memset(mz->addr, 0, size);				\
 		virt = mz->addr;					\
-		phys = mz->phys_addr;					\
+		phys = mz->iova;					\
 	} while (0)
 
 #define ENA_MEM_ALLOC_NODE(dmadev, size, virt, node, dev_node) \
diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index 80ce1f35..22db8951 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -205,7 +205,7 @@ static void ena_init_rings(struct ena_adapter *adapter);
 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
 static int ena_start(struct rte_eth_dev *dev);
 static void ena_close(struct rte_eth_dev *dev);
-static void ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
+static int ena_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
 static void ena_rx_queue_release_all(struct rte_eth_dev *dev);
 static void ena_tx_queue_release_all(struct rte_eth_dev *dev);
 static void ena_rx_queue_release(void *queue);
@@ -811,7 +811,7 @@ static void ena_stats_restart(struct rte_eth_dev *dev)
 	rte_atomic64_init(&adapter->drv_stats->rx_nombuf);
 }
 
-static void ena_stats_get(struct rte_eth_dev *dev,
+static int ena_stats_get(struct rte_eth_dev *dev,
 			  struct rte_eth_stats *stats)
 {
 	struct ena_admin_basic_stats ena_stats;
@@ -821,13 +821,13 @@ static void ena_stats_get(struct rte_eth_dev *dev,
 	int rc;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-		return;
+		return -ENOTSUP;
 
 	memset(&ena_stats, 0, sizeof(ena_stats));
 	rc = ena_com_get_dev_basic_stats(ena_dev, &ena_stats);
 	if (unlikely(rc)) {
 		RTE_LOG(ERR, PMD, "Could not retrieve statistics from ENA");
-		return;
+		return rc;
 	}
 
 	/* Set of basic statistics from ENA */
@@ -846,6 +846,7 @@ static void ena_stats_get(struct rte_eth_dev *dev,
 	stats->ierrors = rte_atomic64_read(&adapter->drv_stats->ierrors);
 	stats->oerrors = rte_atomic64_read(&adapter->drv_stats->oerrors);
 	stats->rx_nombuf = rte_atomic64_read(&adapter->drv_stats->rx_nombuf);
+	return 0;
 }
 
 static int ena_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
@@ -1166,7 +1167,7 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count)
 
 		rte_prefetch0(mbufs[((next_to_use + 4) & ring_mask)]);
 		/* prepare physical address for DMA transaction */
-		ebuf.paddr = mbuf->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+		ebuf.paddr = mbuf->buf_iova + RTE_PKTMBUF_HEADROOM;
 		ebuf.len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM;
 		/* pass resource to device */
 		rc = ena_com_add_single_rx_desc(rxq->ena_com_io_sq,
@@ -1725,7 +1726,7 @@ static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		 * consideration pushed header
 		 */
 		if (mbuf->data_len > ena_tx_ctx.header_len) {
-			ebuf->paddr = mbuf->buf_physaddr +
+			ebuf->paddr = mbuf->buf_iova +
 				      mbuf->data_off +
 				      ena_tx_ctx.header_len;
 			ebuf->len = mbuf->data_len - ena_tx_ctx.header_len;
@@ -1734,7 +1735,7 @@ static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		}
 
 		while ((mbuf = mbuf->next) != NULL) {
-			ebuf->paddr = mbuf->buf_physaddr + mbuf->data_off;
+			ebuf->paddr = mbuf->buf_iova + mbuf->data_off;
 			ebuf->len = mbuf->data_len;
 			ebuf++;
 			tx_info->num_of_bufs++;
diff --git a/drivers/net/ena/ena_ethdev.h b/drivers/net/ena/ena_ethdev.h
index dc3080ff..be8bc9fa 100644
--- a/drivers/net/ena/ena_ethdev.h
+++ b/drivers/net/ena/ena_ethdev.h
@@ -35,6 +35,7 @@
 #define _ENA_ETHDEV_H_
 
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 
 #include "ena_com.h"
 
diff --git a/drivers/net/enic/Makefile b/drivers/net/enic/Makefile
index db48ff2d..5191db54 100644
--- a/drivers/net/enic/Makefile
+++ b/drivers/net/enic/Makefile
@@ -45,6 +45,9 @@ CFLAGS += -I$(SRCDIR)/base/
 CFLAGS += -I$(SRCDIR)
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS) -Wno-strict-aliasing
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
+LDLIBS += -lrte_bus_pci
 
 VPATH += $(SRCDIR)/src
 
diff --git a/drivers/net/enic/base/vnic_cq.c b/drivers/net/enic/base/vnic_cq.c
index 2f65f357..3549fece 100644
--- a/drivers/net/enic/base/vnic_cq.c
+++ b/drivers/net/enic/base/vnic_cq.c
@@ -35,16 +35,6 @@
 #include "vnic_dev.h"
 #include "vnic_cq.h"
 
-int vnic_cq_mem_size(struct vnic_cq *cq, unsigned int desc_count,
-	unsigned int desc_size)
-{
-	int mem_size;
-
-	mem_size = vnic_dev_desc_ring_size(&cq->ring, desc_count, desc_size);
-
-	return mem_size;
-}
-
 void vnic_cq_free(struct vnic_cq *cq)
 {
 	vnic_dev_free_desc_ring(cq->vdev, &cq->ring);
@@ -65,11 +55,11 @@ int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index,
 
 	cq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_CQ, index);
 	if (!cq->ctrl) {
-		pr_err("Failed to hook CQ[%d] resource\n", index);
+		pr_err("Failed to hook CQ[%u] resource\n", index);
 		return -EINVAL;
 	}
 
-	snprintf(res_name, sizeof(res_name), "%d-cq-%d", instance++, index);
+	snprintf(res_name, sizeof(res_name), "%d-cq-%u", instance++, index);
 	err = vnic_dev_alloc_desc_ring(vdev, &cq->ring, desc_count, desc_size,
 		socket_id, res_name);
 	if (err)
diff --git a/drivers/net/enic/base/vnic_dev.c b/drivers/net/enic/base/vnic_dev.c
index 49b36555..9b25d219 100644
--- a/drivers/net/enic/base/vnic_dev.c
+++ b/drivers/net/enic/base/vnic_dev.c
@@ -272,7 +272,7 @@ int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev,
 	__attribute__((unused)) unsigned int socket_id,
 	char *z_name)
 {
-	void *alloc_addr = NULL;
+	void *alloc_addr;
 	dma_addr_t alloc_pa = 0;
 
 	vnic_dev_desc_ring_size(ring, desc_count, desc_size);
@@ -443,24 +443,6 @@ static int vnic_dev_cmd_no_proxy(struct vnic_dev *vdev,
 	return err;
 }
 
-void vnic_dev_cmd_proxy_by_index_start(struct vnic_dev *vdev, u16 index)
-{
-	vdev->proxy = PROXY_BY_INDEX;
-	vdev->proxy_index = index;
-}
-
-void vnic_dev_cmd_proxy_by_bdf_start(struct vnic_dev *vdev, u16 bdf)
-{
-	vdev->proxy = PROXY_BY_BDF;
-	vdev->proxy_index = bdf;
-}
-
-void vnic_dev_cmd_proxy_end(struct vnic_dev *vdev)
-{
-	vdev->proxy = PROXY_NONE;
-	vdev->proxy_index = 0;
-}
-
 int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
 	u64 *a0, u64 *a1, int wait)
 {
@@ -650,7 +632,7 @@ int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats)
 
 	if (!vdev->stats) {
 		snprintf((char *)name, sizeof(name),
-			"vnic_stats-%d", instance++);
+			"vnic_stats-%u", instance++);
 		vdev->stats = vdev->alloc_consistent(vdev->priv,
 			sizeof(struct vnic_stats), &vdev->stats_pa, (u8 *)name);
 		if (!vdev->stats)
@@ -672,15 +654,6 @@ int vnic_dev_close(struct vnic_dev *vdev)
 	return vnic_dev_cmd(vdev, CMD_CLOSE, &a0, &a1, wait);
 }
 
-/** Deprecated.  @see vnic_dev_enable_wait */
-int vnic_dev_enable(struct vnic_dev *vdev)
-{
-	u64 a0 = 0, a1 = 0;
-	int wait = 1000;
-
-	return vnic_dev_cmd(vdev, CMD_ENABLE, &a0, &a1, wait);
-}
-
 int vnic_dev_enable_wait(struct vnic_dev *vdev)
 {
 	u64 a0 = 0, a1 = 0;
@@ -725,31 +698,6 @@ int vnic_dev_open_done(struct vnic_dev *vdev, int *done)
 	return 0;
 }
 
-int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg)
-{
-	u64 a0 = (u32)arg, a1 = 0;
-	int wait = 1000;
-
-	return vnic_dev_cmd(vdev, CMD_SOFT_RESET, &a0, &a1, wait);
-}
-
-int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done)
-{
-	u64 a0 = 0, a1 = 0;
-	int wait = 1000;
-	int err;
-
-	*done = 0;
-
-	err = vnic_dev_cmd(vdev, CMD_SOFT_RESET_STATUS, &a0, &a1, wait);
-	if (err)
-		return err;
-
-	*done = (a0 == 0);
-
-	return 0;
-}
-
 int vnic_dev_get_mac_addr(struct vnic_dev *vdev, u8 *mac_addr)
 {
 	u64 a0 = 0, a1 = 0;
@@ -840,19 +788,6 @@ int vnic_dev_set_ig_vlan_rewrite_mode(struct vnic_dev *vdev,
 		return 0;
 }
 
-int vnic_dev_raise_intr(struct vnic_dev *vdev, u16 intr)
-{
-	u64 a0 = intr, a1 = 0;
-	int wait = 1000;
-	int err;
-
-	err = vnic_dev_cmd(vdev, CMD_IAR, &a0, &a1, wait);
-	if (err)
-		pr_err("Failed to raise INTR[%d], err %d\n", intr, err);
-
-	return err;
-}
-
 void vnic_dev_set_reset_flag(struct vnic_dev *vdev, int state)
 {
 	vdev->in_reset = state;
@@ -900,7 +835,7 @@ int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr)
 	}
 	if (!vnic_dev_in_reset(vdev)) {
 		snprintf((char *)name, sizeof(name),
-			"vnic_notify-%d", instance++);
+			"vnic_notify-%u", instance++);
 		notify_addr = vdev->alloc_consistent(vdev->priv,
 			sizeof(struct vnic_devcmd_notify),
 			&notify_pa, (u8 *)name);
@@ -985,14 +920,6 @@ int vnic_dev_init(struct vnic_dev *vdev, int arg)
 	return r;
 }
 
-int vnic_dev_deinit(struct vnic_dev *vdev)
-{
-	u64 a0 = 0, a1 = 0;
-	int wait = 1000;
-
-	return vnic_dev_cmd(vdev, CMD_DEINIT, &a0, &a1, wait);
-}
-
 void vnic_dev_intr_coal_timer_info_default(struct vnic_dev *vdev)
 {
 	/* Default: hardware intr coal timer is in units of 1.5 usecs */
@@ -1018,18 +945,6 @@ u32 vnic_dev_port_speed(struct vnic_dev *vdev)
 	return vdev->notify_copy.port_speed;
 }
 
-void vnic_dev_set_intr_mode(struct vnic_dev *vdev,
-	enum vnic_dev_intr_mode intr_mode)
-{
-	vdev->intr_mode = intr_mode;
-}
-
-enum vnic_dev_intr_mode vnic_dev_get_intr_mode(
-	struct vnic_dev *vdev)
-{
-	return vdev->intr_mode;
-}
-
 u32 vnic_dev_intr_coal_timer_usec_to_hw(struct vnic_dev *vdev, u32 usec)
 {
 	return (usec * vdev->intr_coal_timer_info.mul) /
@@ -1063,7 +978,7 @@ void vnic_dev_unregister(struct vnic_dev *vdev)
 			vdev->free_consistent(vdev->priv,
 				sizeof(struct vnic_devcmd_fw_info),
 				vdev->fw_info, vdev->fw_info_pa);
-		kfree(vdev);
+		rte_free(vdev);
 	}
 }
 
@@ -1072,7 +987,13 @@ struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
 	unsigned int num_bars)
 {
 	if (!vdev) {
-		vdev = kzalloc(sizeof(struct vnic_dev), GFP_ATOMIC);
+		char name[NAME_MAX];
+		snprintf((char *)name, sizeof(name), "%s-vnic",
+			  pdev->device.name);
+		vdev = (struct vnic_dev *)rte_zmalloc_socket(name,
+					sizeof(struct vnic_dev),
+					RTE_CACHE_LINE_SIZE,
+					pdev->device.numa_node);
 		if (!vdev)
 			return NULL;
 	}
@@ -1094,23 +1015,6 @@ err_out:
 	return NULL;
 }
 
-struct rte_pci_device *vnic_dev_get_pdev(struct vnic_dev *vdev)
-{
-	return vdev->pdev;
-}
-
-int vnic_dev_set_mac_addr(struct vnic_dev *vdev, u8 *mac_addr)
-{
-	u64 a0, a1 = 0;
-	int wait = 1000;
-	int i;
-
-	for (i = 0; i < ETH_ALEN; i++)
-		((u8 *)&a0)[i] = mac_addr[i];
-
-	return vnic_dev_cmd(vdev, CMD_SET_MAC_ADDR, &a0, &a1, wait);
-}
-
 /*
  *  vnic_dev_classifier: Add/Delete classifier entries
  *  @vdev: vdev of the device
@@ -1150,7 +1054,7 @@ int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry,
 		tlv_size = filter_size + action_size +
 		    2*sizeof(struct filter_tlv);
 		snprintf((char *)z_name, sizeof(z_name),
-			"vnic_clsf_%d", unique_id++);
+			"vnic_clsf_%u", unique_id++);
 		tlv_va = vdev->alloc_consistent(vdev->priv,
 			tlv_size, &tlv_pa, (u8 *)z_name);
 		if (!tlv_va)
diff --git a/drivers/net/enic/base/vnic_dev.h b/drivers/net/enic/base/vnic_dev.h
index 9a9e6917..c9ca25b3 100644
--- a/drivers/net/enic/base/vnic_dev.h
+++ b/drivers/net/enic/base/vnic_dev.h
@@ -35,8 +35,10 @@
 #ifndef _VNIC_DEV_H_
 #define _VNIC_DEV_H_
 
+#include <rte_pci.h>
+#include <rte_bus_pci.h>
+
 #include "enic_compat.h"
-#include "rte_pci.h"
 #include "vnic_resource.h"
 #include "vnic_devcmd.h"
 
diff --git a/drivers/net/enic/base/vnic_rq.c b/drivers/net/enic/base/vnic_rq.c
index 10a40c1b..ea297eef 100644
--- a/drivers/net/enic/base/vnic_rq.c
+++ b/drivers/net/enic/base/vnic_rq.c
@@ -58,13 +58,13 @@ int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index,
 
 	rq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_RQ, index);
 	if (!rq->ctrl) {
-		pr_err("Failed to hook RQ[%d] resource\n", index);
+		pr_err("Failed to hook RQ[%u] resource\n", index);
 		return -EINVAL;
 	}
 
 	vnic_rq_disable(rq);
 
-	snprintf(res_name, sizeof(res_name), "%d-rq-%d", instance++, index);
+	snprintf(res_name, sizeof(res_name), "%d-rq-%u", instance++, index);
 	rc = vnic_dev_alloc_desc_ring(vdev, &rq->ring, desc_count, desc_size,
 		rq->socket_id, res_name);
 	return rc;
@@ -118,11 +118,6 @@ void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index,
 	rq->pkt_last_seg = NULL;
 }
 
-void vnic_rq_error_out(struct vnic_rq *rq, unsigned int error)
-{
-	iowrite32(error, &rq->ctrl->error_status);
-}
-
 unsigned int vnic_rq_error_status(struct vnic_rq *rq)
 {
 	return ioread32(&rq->ctrl->error_status);
diff --git a/drivers/net/enic/base/vnic_rss.c b/drivers/net/enic/base/vnic_rss.c
index 1cf055b0..87d40c0d 100644
--- a/drivers/net/enic/base/vnic_rss.c
+++ b/drivers/net/enic/base/vnic_rss.c
@@ -50,35 +50,3 @@ void vnic_set_rss_key(union vnic_rss_key *rss_key, u8 *key)
 	}
 }
 
-void vnic_set_rss_cpu(union vnic_rss_cpu *rss_cpu, u8 *cpu)
-{
-	u32 i;
-	u32 *p = (u32 *)cpu;
-
-	for (i = 0; i < 32; ++i)
-		iowrite32(*p++, &rss_cpu->cpu[i].b[0]);
-}
-
-void vnic_get_rss_key(union vnic_rss_key *rss_key, u8 *key)
-{
-	u32 i;
-	u32 *p;
-	u16 *q;
-
-	for (i = 0; i < 4; ++i) {
-		p = (u32 *)(key + (10 * i));
-		*p++ = ioread32(&rss_key->key[i].b[0]);
-		*p++ = ioread32(&rss_key->key[i].b[4]);
-		q = (u16 *)p;
-		*q = (u16)ioread32(&rss_key->key[i].b[8]);
-	}
-}
-
-void vnic_get_rss_cpu(union vnic_rss_cpu *rss_cpu, u8 *cpu)
-{
-	u32 i;
-	u32 *p = (u32 *)cpu;
-
-	for (i = 0; i < 32; ++i)
-		*p++ = ioread32(&rss_cpu->cpu[i].b[0]);
-}
diff --git a/drivers/net/enic/base/vnic_wq.c b/drivers/net/enic/base/vnic_wq.c
index 7c4119c3..0a1247f4 100644
--- a/drivers/net/enic/base/vnic_wq.c
+++ b/drivers/net/enic/base/vnic_wq.c
@@ -52,7 +52,7 @@ int vnic_wq_alloc_ring(struct vnic_dev *vdev, struct vnic_wq *wq,
 	char res_name[NAME_MAX];
 	static int instance;
 
-	snprintf(res_name, sizeof(res_name), "%d-wq-%d", instance++, wq->index);
+	snprintf(res_name, sizeof(res_name), "%d-wq-%u", instance++, wq->index);
 	return vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size,
 		wq->socket_id, res_name);
 }
@@ -145,11 +145,6 @@ void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index,
 	wq->last_completed_index = 0;
 }
 
-void vnic_wq_error_out(struct vnic_wq *wq, unsigned int error)
-{
-	iowrite32(error, &wq->ctrl->error_status);
-}
-
 unsigned int vnic_wq_error_status(struct vnic_wq *wq)
 {
 	return ioread32(&wq->ctrl->error_status);
diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h
index e28f2235..e36ec385 100644
--- a/drivers/net/enic/enic.h
+++ b/drivers/net/enic/enic.h
@@ -227,11 +227,6 @@ static inline unsigned int enic_cq_wq(struct enic *enic, unsigned int wq)
 	return enic->rq_count + wq;
 }
 
-static inline unsigned int enic_msix_err_intr(__rte_unused struct enic *enic)
-{
-	return 0;
-}
-
 static inline struct enic *pmd_priv(struct rte_eth_dev *eth_dev)
 {
 	return (struct enic *)eth_dev->data->dev_private;
@@ -287,7 +282,7 @@ extern int enic_enable(struct enic *enic);
 extern int enic_disable(struct enic *enic);
 extern void enic_remove(struct enic *enic);
 extern int enic_get_link_status(struct enic *enic);
-extern void enic_dev_stats_get(struct enic *enic,
+extern int enic_dev_stats_get(struct enic *enic,
 	struct rte_eth_stats *r_stats);
 extern void enic_dev_stats_clear(struct enic *enic);
 extern void enic_add_packet_filter(struct enic *enic);
diff --git a/drivers/net/enic/enic_compat.h b/drivers/net/enic/enic_compat.h
index fc58bb41..1cb5686f 100644
--- a/drivers/net/enic/enic_compat.h
+++ b/drivers/net/enic/enic_compat.h
@@ -99,11 +99,6 @@ static inline uint32_t ioread32(volatile void *addr)
 	return rte_read32(addr);
 }
 
-static inline uint16_t ioread16(volatile void *addr)
-{
-	return rte_read16(addr);
-}
-
 static inline uint8_t ioread8(volatile void *addr)
 {
 	return rte_read8(addr);
@@ -119,26 +114,11 @@ static inline void iowrite32_relaxed(uint32_t val, volatile void *addr)
 	rte_write32_relaxed(val, addr);
 }
 
-static inline void iowrite16(uint16_t val, volatile void *addr)
-{
-	rte_write16(val, addr);
-}
-
-static inline void iowrite8(uint8_t val, volatile void *addr)
-{
-	rte_write8(val, addr);
-}
-
 static inline unsigned int readl(volatile void __iomem *addr)
 {
 	return rte_read32(addr);
 }
 
-static inline unsigned int readl_relaxed(volatile void __iomem *addr)
-{
-	return rte_read32_relaxed(addr);
-}
-
 static inline void writel(unsigned int val, volatile void __iomem *addr)
 {
 	rte_write32(val, addr);
diff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c
index da8fec2d..669dbf33 100644
--- a/drivers/net/enic/enic_ethdev.c
+++ b/drivers/net/enic/enic_ethdev.c
@@ -37,6 +37,7 @@
 
 #include <rte_dev.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_ethdev.h>
 #include <rte_ethdev_pci.h>
 #include <rte_string_fns.h>
@@ -142,6 +143,10 @@ enicpmd_dev_filter_ctrl(struct rte_eth_dev *dev,
 static void enicpmd_dev_tx_queue_release(void *txq)
 {
 	ENICPMD_FUNC_TRACE();
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return;
+
 	enic_free_wq(txq);
 }
 
@@ -196,6 +201,9 @@ static int enicpmd_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
 	int ret;
 	struct enic *enic = pmd_priv(eth_dev);
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return -E_RTE_SECONDARY;
+
 	ENICPMD_FUNC_TRACE();
 	if (queue_idx >= ENIC_WQ_MAX) {
 		dev_err(enic,
@@ -272,6 +280,10 @@ static int enicpmd_dev_rx_queue_stop(struct rte_eth_dev *eth_dev,
 static void enicpmd_dev_rx_queue_release(void *rxq)
 {
 	ENICPMD_FUNC_TRACE();
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return;
+
 	enic_free_rq(rxq);
 }
 
@@ -310,6 +322,10 @@ static int enicpmd_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
 	struct enic *enic = pmd_priv(eth_dev);
 
 	ENICPMD_FUNC_TRACE();
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return -E_RTE_SECONDARY;
+
 	/* With Rx scatter support, two RQs are now used on VIC per RQ used
 	 * by the application.
 	 */
@@ -347,7 +363,7 @@ static int enicpmd_vlan_filter_set(struct rte_eth_dev *eth_dev,
 	return err;
 }
 
-static void enicpmd_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
+static int enicpmd_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
 {
 	struct enic *enic = pmd_priv(eth_dev);
 
@@ -371,6 +387,8 @@ static void enicpmd_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
 		dev_warning(enic,
 			"Configuration of extended VLAN is not supported\n");
 	}
+
+	return 0;
 }
 
 static int enicpmd_dev_configure(struct rte_eth_dev *eth_dev)
@@ -378,6 +396,9 @@ static int enicpmd_dev_configure(struct rte_eth_dev *eth_dev)
 	int ret;
 	struct enic *enic = pmd_priv(eth_dev);
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return -E_RTE_SECONDARY;
+
 	ENICPMD_FUNC_TRACE();
 	ret = enic_set_vnic_res(enic);
 	if (ret) {
@@ -392,9 +413,10 @@ static int enicpmd_dev_configure(struct rte_eth_dev *eth_dev)
 			eth_dev->data->dev_conf.rxmode.split_hdr_size);
 	}
 
-	enicpmd_vlan_offload_set(eth_dev, ETH_VLAN_STRIP_MASK);
 	enic->hw_ip_checksum = eth_dev->data->dev_conf.rxmode.hw_ip_checksum;
-	return 0;
+	ret = enicpmd_vlan_offload_set(eth_dev, ETH_VLAN_STRIP_MASK);
+
+	return ret;
 }
 
 /* Start the device.
@@ -404,6 +426,9 @@ static int enicpmd_dev_start(struct rte_eth_dev *eth_dev)
 {
 	struct enic *enic = pmd_priv(eth_dev);
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return -E_RTE_SECONDARY;
+
 	ENICPMD_FUNC_TRACE();
 	return enic_enable(enic);
 }
@@ -416,6 +441,9 @@ static void enicpmd_dev_stop(struct rte_eth_dev *eth_dev)
 	struct rte_eth_link link;
 	struct enic *enic = pmd_priv(eth_dev);
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return;
+
 	ENICPMD_FUNC_TRACE();
 	enic_disable(enic);
 	memset(&link, 0, sizeof(link));
@@ -444,13 +472,13 @@ static int enicpmd_dev_link_update(struct rte_eth_dev *eth_dev,
 	return enic_link_update(enic);
 }
 
-static void enicpmd_dev_stats_get(struct rte_eth_dev *eth_dev,
+static int enicpmd_dev_stats_get(struct rte_eth_dev *eth_dev,
 	struct rte_eth_stats *stats)
 {
 	struct enic *enic = pmd_priv(eth_dev);
 
 	ENICPMD_FUNC_TRACE();
-	enic_dev_stats_get(enic, stats);
+	return enic_dev_stats_get(enic, stats);
 }
 
 static void enicpmd_dev_stats_reset(struct rte_eth_dev *eth_dev)
@@ -513,7 +541,11 @@ static void enicpmd_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
 {
 	struct enic *enic = pmd_priv(eth_dev);
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return;
+
 	ENICPMD_FUNC_TRACE();
+
 	enic->promisc = 1;
 	enic_add_packet_filter(enic);
 }
@@ -522,6 +554,9 @@ static void enicpmd_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
 {
 	struct enic *enic = pmd_priv(eth_dev);
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return;
+
 	ENICPMD_FUNC_TRACE();
 	enic->promisc = 0;
 	enic_add_packet_filter(enic);
@@ -531,6 +566,9 @@ static void enicpmd_dev_allmulticast_enable(struct rte_eth_dev *eth_dev)
 {
 	struct enic *enic = pmd_priv(eth_dev);
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return;
+
 	ENICPMD_FUNC_TRACE();
 	enic->allmulti = 1;
 	enic_add_packet_filter(enic);
@@ -540,6 +578,9 @@ static void enicpmd_dev_allmulticast_disable(struct rte_eth_dev *eth_dev)
 {
 	struct enic *enic = pmd_priv(eth_dev);
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return;
+
 	ENICPMD_FUNC_TRACE();
 	enic->allmulti = 0;
 	enic_add_packet_filter(enic);
@@ -551,6 +592,9 @@ static int enicpmd_add_mac_addr(struct rte_eth_dev *eth_dev,
 {
 	struct enic *enic = pmd_priv(eth_dev);
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return -E_RTE_SECONDARY;
+
 	ENICPMD_FUNC_TRACE();
 	return enic_set_mac_address(enic, mac_addr->addr_bytes);
 }
@@ -559,6 +603,9 @@ static void enicpmd_remove_mac_addr(struct rte_eth_dev *eth_dev, uint32_t index)
 {
 	struct enic *enic = pmd_priv(eth_dev);
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return;
+
 	ENICPMD_FUNC_TRACE();
 	enic_del_mac_address(enic, index);
 }
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 40dbec7f..8af0ccd3 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -40,6 +40,7 @@
 #include <libgen.h>
 
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_memzone.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
@@ -156,16 +157,17 @@ void enic_dev_stats_clear(struct enic *enic)
 	enic_clear_soft_stats(enic);
 }
 
-void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
+int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
 {
 	struct vnic_stats *stats;
 	struct enic_soft_stats *soft_stats = &enic->soft_stats;
 	int64_t rx_truncated;
 	uint64_t rx_packet_errors;
+	int ret = vnic_dev_stats_dump(enic->vdev, &stats);
 
-	if (vnic_dev_stats_dump(enic->vdev, &stats)) {
+	if (ret) {
 		dev_err(enic, "Error in getting stats\n");
-		return;
+		return ret;
 	}
 
 	/* The number of truncated packets can only be calculated by
@@ -191,6 +193,7 @@ void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
 	r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
 
 	r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
+	return 0;
 }
 
 void enic_del_mac_address(struct enic *enic, int mac_index)
@@ -224,7 +227,7 @@ enic_free_rq_buf(struct rte_mbuf **mbuf)
 		return;
 
 	rte_pktmbuf_free(*mbuf);
-	mbuf = NULL;
+	*mbuf = NULL;
 }
 
 void enic_init_vnic_resources(struct enic *enic)
@@ -280,7 +283,7 @@ void enic_init_vnic_resources(struct enic *enic)
 			0 /* cq_entry_enable */,
 			1 /* cq_message_enable */,
 			0 /* interrupt offset */,
-			(u64)enic->wq[index].cqmsg_rz->phys_addr);
+			(u64)enic->wq[index].cqmsg_rz->iova);
 	}
 
 	vnic_intr_init(&enic->intr,
@@ -313,7 +316,7 @@ enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
 		}
 
 		mb->data_off = RTE_PKTMBUF_HEADROOM;
-		dma_addr = (dma_addr_t)(mb->buf_physaddr
+		dma_addr = (dma_addr_t)(mb->buf_iova
 			   + RTE_PKTMBUF_HEADROOM);
 		rq_enet_desc_enc(rqd, dma_addr,
 				(rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
@@ -359,7 +362,7 @@ enic_alloc_consistent(void *priv, size_t size,
 	}
 
 	vaddr = rz->addr;
-	*dma_handle = (dma_addr_t)rz->phys_addr;
+	*dma_handle = (dma_addr_t)rz->iova;
 
 	mze = rte_malloc("enic memzone entry",
 			 sizeof(struct enic_memzone_entry), 0);
@@ -368,6 +371,7 @@ enic_alloc_consistent(void *priv, size_t size,
 		pr_err("%s : Failed to allocate memory for memzone list\n",
 		       __func__);
 		rte_memzone_free(rz);
+		return NULL;
 	}
 
 	mze->rz = rz;
@@ -391,7 +395,7 @@ enic_free_consistent(void *priv,
 	rte_spinlock_lock(&enic->memzone_list_lock);
 	LIST_FOREACH(mze, &enic->memzone_list, entries) {
 		if (mze->rz->addr == vaddr &&
-		    mze->rz->phys_addr == dma_handle)
+		    mze->rz->iova == dma_handle)
 			break;
 	}
 	if (mze == NULL) {
@@ -1116,11 +1120,12 @@ static int
 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
 {
 	struct vnic_rq *sop_rq, *data_rq;
-	unsigned int cq_idx = enic_cq_rq(enic, rq_idx);
+	unsigned int cq_idx;
 	int rc = 0;
 
 	sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
 	data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
+	cq_idx = rq_idx;
 
 	vnic_cq_clean(&enic->cq[cq_idx]);
 	vnic_cq_init(&enic->cq[cq_idx],
@@ -1180,6 +1185,9 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
 	old_mtu = eth_dev->data->mtu;
 	config_mtu = enic->config.mtu;
 
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return -E_RTE_SECONDARY;
+
 	if (new_mtu > enic->max_mtu) {
 		dev_err(enic,
 			"MTU not updated: requested (%u) greater than max (%u)\n",
@@ -1331,6 +1339,10 @@ int enic_probe(struct enic *enic)
 
 	dev_debug(enic, " Initializing ENIC PMD\n");
 
+	/* if this is a secondary process the hardware is already initialized */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
 	enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
 	enic->bar0.len = pdev->mem_resource[0].len;
 
diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c
index a39172f1..a3663d51 100644
--- a/drivers/net/enic/enic_rxtx.c
+++ b/drivers/net/enic/enic_rxtx.c
@@ -243,7 +243,7 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
 
 	/* VLAN STRIPPED flag. The L2 packet type updated here also */
 	if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
-		pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
+		pkt_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
 		mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
 	} else {
 		if (vlan_tci != 0)
@@ -386,7 +386,7 @@ enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
 		/* Push descriptor for newly allocated mbuf */
 		nmb->data_off = RTE_PKTMBUF_HEADROOM;
-		dma_addr = (dma_addr_t)(nmb->buf_physaddr +
+		dma_addr = (dma_addr_t)(nmb->buf_iova +
 					RTE_PKTMBUF_HEADROOM);
 		rq_enet_desc_enc(rqd_ptr, dma_addr,
 				(rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
@@ -546,12 +546,15 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64_t bus_addr;
 	uint8_t offload_mode;
 	uint16_t header_len;
+	uint64_t tso;
+	rte_atomic64_t *tx_oversized;
 
 	enic_cleanup_wq(enic, wq);
 	wq_desc_avail = vnic_wq_desc_avail(wq);
 	head_idx = wq->head_idx;
 	desc_count = wq->ring.desc_count;
 	ol_flags_mask = PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK;
+	tx_oversized = &enic->soft_stats.tx_oversized;
 
 	nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX);
 
@@ -561,10 +564,12 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		data_len = tx_pkt->data_len;
 		ol_flags = tx_pkt->ol_flags;
 		nb_segs = tx_pkt->nb_segs;
+		tso = ol_flags & PKT_TX_TCP_SEG;
 
-		if (pkt_len > ENIC_TX_MAX_PKT_SIZE) {
+		/* drop packet if it's too big to send */
+		if (unlikely(!tso && pkt_len > ENIC_TX_MAX_PKT_SIZE)) {
 			rte_pktmbuf_free(tx_pkt);
-			rte_atomic64_inc(&enic->soft_stats.tx_oversized);
+			rte_atomic64_inc(tx_oversized);
 			continue;
 		}
 
@@ -578,7 +583,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		vlan_id = 0;
 		vlan_tag_insert = 0;
 		bus_addr = (dma_addr_t)
-			   (tx_pkt->buf_physaddr + tx_pkt->data_off);
+			   (tx_pkt->buf_iova + tx_pkt->data_off);
 
 		descs = (struct wq_enet_desc *)wq->ring.descs;
 		desc_p = descs + head_idx;
@@ -587,13 +592,21 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		offload_mode = WQ_ENET_OFFLOAD_MODE_CSUM;
 		header_len = 0;
 
-		if (tx_pkt->tso_segsz) {
+		if (tso) {
 			header_len = tso_header_len(tx_pkt);
-			if (header_len) {
-				offload_mode = WQ_ENET_OFFLOAD_MODE_TSO;
-				mss = tx_pkt->tso_segsz;
+
+			/* Drop if non-TCP packet or TSO seg size is too big */
+			if (unlikely(header_len == 0 || ((tx_pkt->tso_segsz +
+			    header_len) > ENIC_TX_MAX_PKT_SIZE))) {
+				rte_pktmbuf_free(tx_pkt);
+				rte_atomic64_inc(tx_oversized);
+				continue;
 			}
+
+			offload_mode = WQ_ENET_OFFLOAD_MODE_TSO;
+			mss = tx_pkt->tso_segsz;
 		}
+
 		if ((ol_flags & ol_flags_mask) && (header_len == 0)) {
 			if (ol_flags & PKT_TX_IP_CKSUM)
 				mss |= ENIC_CALC_IP_CKSUM;
@@ -630,7 +643,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 				if (tx_pkt->next == NULL)
 					eop = 1;
 				desc_p = descs + head_idx;
-				bus_addr = (dma_addr_t)(tx_pkt->buf_physaddr
+				bus_addr = (dma_addr_t)(tx_pkt->buf_iova
 					   + tx_pkt->data_off);
 				wq_enet_desc_enc((struct wq_enet_desc *)
 						 &desc_tmp, bus_addr, data_len,
diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index d516d362..ea2a8fe4 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -58,5 +58,8 @@ CFLAGS += -D_XOPEN_SOURCE=700
 CFLAGS += $(WERROR_FLAGS)
 CFLAGS += -Wno-strict-prototypes
 CFLAGS += -pedantic
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vdev
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 6006bef8..6bc5abac 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -37,7 +37,7 @@
 #include <rte_ethdev_vdev.h>
 #include <rte_devargs.h>
 #include <rte_kvargs.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 
 #include "failsafe_private.h"
 
diff --git a/drivers/net/failsafe/failsafe_args.c b/drivers/net/failsafe/failsafe_args.c
index 1f22416f..cfc83e36 100644
--- a/drivers/net/failsafe/failsafe_args.c
+++ b/drivers/net/failsafe/failsafe_args.c
@@ -115,8 +115,7 @@ fs_execute_cmd(struct sub_device *sdev, char *cmdline)
 	/* store possible newline as well */
 	char output[DEVARGS_MAXLEN + 1];
 	size_t len;
-	int old_err;
-	int ret, pclose_ret;
+	int ret;
 
 	RTE_ASSERT(cmdline != NULL || sdev->cmdline != NULL);
 	if (sdev->cmdline == NULL) {
@@ -135,12 +134,10 @@ fs_execute_cmd(struct sub_device *sdev, char *cmdline)
 				sdev->cmdline[i] = ' ';
 	}
 	DEBUG("'%s'", sdev->cmdline);
-	old_err = errno;
 	fp = popen(sdev->cmdline, "r");
 	if (fp == NULL) {
-		ret = errno;
+		ret = -errno;
 		ERROR("popen: %s", strerror(errno));
-		errno = old_err;
 		return ret;
 	}
 	/* We only read one line */
@@ -155,18 +152,11 @@ fs_execute_cmd(struct sub_device *sdev, char *cmdline)
 		goto ret_pclose;
 	}
 	ret = fs_parse_device(sdev, output);
-	if (ret) {
+	if (ret)
 		ERROR("Parsing device '%s' failed", output);
-		goto ret_pclose;
-	}
 ret_pclose:
-	pclose_ret = pclose(fp);
-	if (pclose_ret) {
-		pclose_ret = errno;
+	if (pclose(fp) == -1)
 		ERROR("pclose: %s", strerror(errno));
-		errno = old_err;
-		return pclose_ret;
-	}
 	return ret;
 }
 
@@ -286,10 +276,17 @@ fs_remove_sub_devices_definition(char params[DEVARGS_MAXLEN])
 			ERROR("Invalid parameter");
 			return -EINVAL;
 		}
-		if (params[b] == ',' || params[b] == '\0')
-			i += snprintf(&buffer[i], b - a + 1, "%s", &params[a]);
-		if (params[b] == '(') {
+		if (params[b] == ',' || params[b] == '\0') {
+			size_t len = b - a;
+
+			if (i > 0)
+				len += 1;
+			snprintf(&buffer[i], len + 1, "%s%s",
+					i ? "," : "", &params[a]);
+			i += len;
+		} else if (params[b] == '(') {
 			size_t start = b;
+
 			b += closing_paren(&params[b]);
 			if (b == start)
 				return -EINVAL;
@@ -393,6 +390,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
 					&dev->data->mac_addrs[0]);
 			if (ret < 0)
 				goto free_kvlist;
+
 			mac_from_arg = 1;
 		}
 	}
diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index c8f4318e..19d26f53 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -41,6 +41,7 @@ fs_bus_init(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	struct rte_devargs *da;
 	uint8_t i;
+	uint16_t j;
 	int ret;
 
 	FOREACH_SUBDEV(sdev, i, dev) {
@@ -57,7 +58,13 @@ fs_bus_init(struct rte_eth_dev *dev)
 			      rte_errno ? ")" : "");
 			continue;
 		}
-		ETH(sdev) = rte_eth_dev_allocated(da->name);
+		RTE_ETH_FOREACH_DEV(j) {
+			if (strcmp(rte_eth_devices[j].device->name,
+				    da->name) == 0) {
+				ETH(sdev) = &rte_eth_devices[j];
+				break;
+			}
+		}
 		if (ETH(sdev) == NULL) {
 			ERROR("sub_device %d init went wrong", i);
 			return -ENODEV;
@@ -90,19 +97,20 @@ fs_bus_uninit(struct rte_eth_dev *dev)
 {
 	struct sub_device *sdev = NULL;
 	uint8_t i;
-	int ret;
+	int sdev_ret;
+	int ret = 0;
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
-		ret = rte_eal_hotplug_remove(sdev->bus->name,
-					     sdev->dev->name);
-		if (ret) {
-			ERROR("Failed to remove requested device %s",
-			      sdev->dev->name);
+		sdev_ret = rte_eal_hotplug_remove(sdev->bus->name,
+							sdev->dev->name);
+		if (sdev_ret) {
+			ERROR("Failed to remove requested device %s (err: %d)",
+			      sdev->dev->name, sdev_ret);
 			continue;
 		}
 		sdev->state = DEV_PROBED - 1;
 	}
-	return 0;
+	return ret;
 }
 
 int
@@ -111,8 +119,6 @@ failsafe_eal_uninit(struct rte_eth_dev *dev)
 	int ret;
 
 	ret = fs_bus_uninit(dev);
-	if (ret)
-		return ret;
 	PRIV(dev)->state = DEV_PROBED - 1;
-	return 0;
+	return ret;
 }
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index a3a8cce9..21392e5a 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -35,6 +35,7 @@
 
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
+#include <rte_cycles.h>
 
 #include "failsafe_private.h"
 
@@ -203,6 +204,7 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
 
 			ether_format_addr(ea_fmt, ETHER_ADDR_FMT_SIZE, ea);
 			ERROR("Adding MAC address %s failed", ea_fmt);
+			return ret;
 		}
 	}
 	/* VLAN filter */
@@ -308,6 +310,28 @@ fs_dev_remove(struct sub_device *sdev)
 	failsafe_hotplug_alarm_install(sdev->fs_dev);
 }
 
+static void
+fs_dev_stats_save(struct sub_device *sdev)
+{
+	struct rte_eth_stats stats;
+	int err;
+
+	/* Attempt to read current stats. */
+	err = rte_eth_stats_get(PORT_ID(sdev), &stats);
+	if (err) {
+		uint64_t timestamp = sdev->stats_snapshot.timestamp;
+
+		WARN("Could not access latest statistics from sub-device %d.\n",
+			 SUB_ID(sdev));
+		if (timestamp != 0)
+			WARN("Using latest snapshot taken before %"PRIu64" seconds.\n",
+				 (rte_rdtsc() - timestamp) / rte_get_tsc_hz());
+	}
+	failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator,
+			err ? &sdev->stats_snapshot.stats : &stats);
+	memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
+}
+
 static inline int
 fs_rxtx_clean(struct sub_device *sdev)
 {
@@ -329,8 +353,10 @@ failsafe_dev_remove(struct rte_eth_dev *dev)
 	uint8_t i;
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
-		if (sdev->remove && fs_rxtx_clean(sdev))
+		if (sdev->remove && fs_rxtx_clean(sdev)) {
+			fs_dev_stats_save(sdev);
 			fs_dev_remove(sdev);
+		}
 }
 
 int
@@ -399,8 +425,31 @@ err_remove:
 	return ret;
 }
 
+void
+failsafe_stats_increment(struct rte_eth_stats *to, struct rte_eth_stats *from)
+{
+	uint32_t i;
+
+	RTE_ASSERT(to != NULL && from != NULL);
+	to->ipackets += from->ipackets;
+	to->opackets += from->opackets;
+	to->ibytes += from->ibytes;
+	to->obytes += from->obytes;
+	to->imissed += from->imissed;
+	to->ierrors += from->ierrors;
+	to->oerrors += from->oerrors;
+	to->rx_nombuf += from->rx_nombuf;
+	for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) {
+		to->q_ipackets[i] += from->q_ipackets[i];
+		to->q_opackets[i] += from->q_opackets[i];
+		to->q_ibytes[i] += from->q_ibytes[i];
+		to->q_obytes[i] += from->q_obytes[i];
+		to->q_errors[i] += from->q_errors[i];
+	}
+}
+
 int
-failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
+failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
 				enum rte_eth_event_type event __rte_unused,
 				void *cb_arg, void *out __rte_unused)
 {
@@ -419,7 +468,7 @@ failsafe_eth_rmv_event_callback(uint8_t port_id __rte_unused,
 }
 
 int
-failsafe_eth_lsc_event_callback(uint8_t port_id __rte_unused,
+failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
 				enum rte_eth_event_type event __rte_unused,
 				void *cb_arg, void *out __rte_unused)
 {
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index ff9ad155..e16a5903 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -38,6 +38,7 @@
 #include <rte_ethdev.h>
 #include <rte_malloc.h>
 #include <rte_flow.h>
+#include <rte_cycles.h>
 
 #include "failsafe_private.h"
 
@@ -79,132 +80,14 @@ static struct rte_eth_dev_info default_infos = {
 	.flow_type_rss_offloads = 0x0,
 };
 
-/**
- * Check whether a specific offloading capability
- * is supported by a sub_device.
- *
- * @return
- *   0: all requested capabilities are supported by the sub_device
- *   positive value: This flag at least is not supported by the sub_device
- */
-static int
-fs_port_offload_validate(struct rte_eth_dev *dev,
-			 struct sub_device *sdev)
-{
-	struct rte_eth_dev_info infos = {0};
-	struct rte_eth_conf *cf;
-	uint32_t cap;
-
-	cf = &dev->data->dev_conf;
-	SUBOPS(sdev, dev_infos_get)(ETH(sdev), &infos);
-	/* RX capabilities */
-	cap = infos.rx_offload_capa;
-	if (cf->rxmode.hw_vlan_strip &&
-	    ((cap & DEV_RX_OFFLOAD_VLAN_STRIP) == 0)) {
-		WARN("VLAN stripping offload requested but not supported by sub_device %d",
-		      SUB_ID(sdev));
-		return DEV_RX_OFFLOAD_VLAN_STRIP;
-	}
-	if (cf->rxmode.hw_ip_checksum &&
-	    ((cap & (DEV_RX_OFFLOAD_IPV4_CKSUM |
-		     DEV_RX_OFFLOAD_UDP_CKSUM |
-		     DEV_RX_OFFLOAD_TCP_CKSUM)) !=
-	     (DEV_RX_OFFLOAD_IPV4_CKSUM |
-	      DEV_RX_OFFLOAD_UDP_CKSUM |
-	      DEV_RX_OFFLOAD_TCP_CKSUM))) {
-		WARN("IP checksum offload requested but not supported by sub_device %d",
-		      SUB_ID(sdev));
-		return DEV_RX_OFFLOAD_IPV4_CKSUM |
-		       DEV_RX_OFFLOAD_UDP_CKSUM |
-		       DEV_RX_OFFLOAD_TCP_CKSUM;
-	}
-	if (cf->rxmode.enable_lro &&
-	    ((cap & DEV_RX_OFFLOAD_TCP_LRO) == 0)) {
-		WARN("TCP LRO offload requested but not supported by sub_device %d",
-		      SUB_ID(sdev));
-		return DEV_RX_OFFLOAD_TCP_LRO;
-	}
-	if (cf->rxmode.hw_vlan_extend &&
-	    ((cap & DEV_RX_OFFLOAD_QINQ_STRIP) == 0)) {
-		WARN("Stacked VLAN stripping offload requested but not supported by sub_device %d",
-		      SUB_ID(sdev));
-		return DEV_RX_OFFLOAD_QINQ_STRIP;
-	}
-	/* TX capabilities */
-	/* Nothing to do, no tx capa supported */
-	return 0;
-}
-
-/*
- * Disable the dev_conf flag related to an offload capability flag
- * within an ethdev configuration.
- */
-static int
-fs_port_disable_offload(struct rte_eth_conf *cf,
-			uint32_t ol_cap)
-{
-	switch (ol_cap) {
-	case DEV_RX_OFFLOAD_VLAN_STRIP:
-		INFO("Disabling VLAN stripping offload");
-		cf->rxmode.hw_vlan_strip = 0;
-		break;
-	case DEV_RX_OFFLOAD_IPV4_CKSUM:
-	case DEV_RX_OFFLOAD_UDP_CKSUM:
-	case DEV_RX_OFFLOAD_TCP_CKSUM:
-	case (DEV_RX_OFFLOAD_IPV4_CKSUM |
-	      DEV_RX_OFFLOAD_UDP_CKSUM |
-	      DEV_RX_OFFLOAD_TCP_CKSUM):
-		INFO("Disabling IP checksum offload");
-		cf->rxmode.hw_ip_checksum = 0;
-		break;
-	case DEV_RX_OFFLOAD_TCP_LRO:
-		INFO("Disabling TCP LRO offload");
-		cf->rxmode.enable_lro = 0;
-		break;
-	case DEV_RX_OFFLOAD_QINQ_STRIP:
-		INFO("Disabling stacked VLAN stripping offload");
-		cf->rxmode.hw_vlan_extend = 0;
-		break;
-	default:
-		DEBUG("Unable to disable offload capability: %" PRIx32,
-		      ol_cap);
-		return -1;
-	}
-	return 0;
-}
-
 static int
 fs_dev_configure(struct rte_eth_dev *dev)
 {
 	struct sub_device *sdev;
 	uint8_t i;
-	int capa_flag;
 	int ret;
 
 	FOREACH_SUBDEV(sdev, i, dev) {
-		if (sdev->state != DEV_PROBED)
-			continue;
-		DEBUG("Checking capabilities for sub_device %d", i);
-		while ((capa_flag = fs_port_offload_validate(dev, sdev))) {
-			/*
-			 * Refuse to change configuration if multiple devices
-			 * are present and we already have configured at least
-			 * some of them.
-			 */
-			if (PRIV(dev)->state >= DEV_ACTIVE &&
-			    PRIV(dev)->subs_tail > 1) {
-				ERROR("device already configured, cannot fix live configuration");
-				return -1;
-			}
-			ret = fs_port_disable_offload(&dev->data->dev_conf,
-						      capa_flag);
-			if (ret) {
-				ERROR("Unable to disable offload capability");
-				return ret;
-			}
-		}
-	}
-	FOREACH_SUBDEV(sdev, i, dev) {
 		int rmv_interrupt = 0;
 		int lsc_interrupt = 0;
 		int lsc_enabled;
@@ -582,13 +465,30 @@ fs_link_update(struct rte_eth_dev *dev,
 	return -1;
 }
 
-static void
+static int
 fs_stats_get(struct rte_eth_dev *dev,
 	     struct rte_eth_stats *stats)
 {
-	if (TX_SUBDEV(dev) == NULL)
-		return;
-	rte_eth_stats_get(PORT_ID(TX_SUBDEV(dev)), stats);
+	struct sub_device *sdev;
+	uint8_t i;
+	int ret;
+
+	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
+	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
+		uint64_t *timestamp = &sdev->stats_snapshot.timestamp;
+
+		ret = rte_eth_stats_get(PORT_ID(sdev), snapshot);
+		if (ret) {
+			ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
+				  i, ret);
+			*timestamp = 0;
+			return ret;
+		}
+		*timestamp = rte_rdtsc();
+		failsafe_stats_increment(stats, snapshot);
+	}
+	return 0;
 }
 
 static void
@@ -597,8 +497,11 @@ fs_stats_reset(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
-	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
+	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		rte_eth_stats_reset(PORT_ID(sdev));
+		memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
+	}
+	memset(&PRIV(dev)->stats_accumulator, 0, sizeof(struct rte_eth_stats));
 }
 
 /**
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 0361cf43..d81cc3ca 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -93,6 +93,11 @@ enum dev_state {
 	DEV_STARTED,
 };
 
+struct fs_stats {
+	struct rte_eth_stats stats;
+	uint64_t timestamp;
+};
+
 struct sub_device {
 	/* Exhaustive DPDK device description */
 	struct rte_devargs devargs;
@@ -102,6 +107,8 @@ struct sub_device {
 	uint8_t sid;
 	/* Device state machine */
 	enum dev_state state;
+	/* Last stats snapshot passed to user */
+	struct fs_stats stats_snapshot;
 	/* Some device are defined as a command line */
 	char *cmdline;
 	/* fail-safe device backreference */
@@ -140,6 +147,7 @@ struct fs_priv {
 	 * synchronized state.
 	 */
 	enum dev_state state;
+	struct rte_eth_stats stats_accumulator;
 	unsigned int pending_alarm:1; /* An alarm is pending */
 	/* flow isolation state */
 	int flow_isolated:1;
@@ -180,10 +188,12 @@ int failsafe_eal_uninit(struct rte_eth_dev *dev);
 
 int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
 void failsafe_dev_remove(struct rte_eth_dev *dev);
-int failsafe_eth_rmv_event_callback(uint8_t port_id,
+void failsafe_stats_increment(struct rte_eth_stats *to,
+				struct rte_eth_stats *from);
+int failsafe_eth_rmv_event_callback(uint16_t port_id,
 				    enum rte_eth_event_type type,
 				    void *arg, void *out);
-int failsafe_eth_lsc_event_callback(uint8_t port_id,
+int failsafe_eth_lsc_event_callback(uint16_t port_id,
 				    enum rte_eth_event_type event,
 				    void *cb_arg, void *out);
 
@@ -220,10 +230,10 @@ extern int mac_from_arg;
  * dev:   (struct rte_eth_dev *), fail-safe ethdev
  * state: (enum dev_state), minimum acceptable device state
  */
-#define FOREACH_SUBDEV_STATE(s, i, dev, state)				\
-	for (i = fs_find_next((dev), 0, state);				\
-	     i < PRIV(dev)->subs_tail && (s = &PRIV(dev)->subs[i]);	\
-	     i = fs_find_next((dev), i + 1, state))
+#define FOREACH_SUBDEV_STATE(s, i, dev, state)		\
+	for (s = fs_find_next((dev), 0, state, &i);	\
+	     s != NULL;					\
+	     s = fs_find_next((dev), i + 1, state, &i))
 
 /**
  * Iterator construct over fail-safe sub-devices:
@@ -294,18 +304,26 @@ extern int mac_from_arg;
 
 /* inlined functions */
 
-static inline uint8_t
-fs_find_next(struct rte_eth_dev *dev, uint8_t sid,
-		enum dev_state min_state)
+static inline struct sub_device *
+fs_find_next(struct rte_eth_dev *dev,
+	     uint8_t sid,
+	     enum dev_state min_state,
+	     uint8_t *sid_out)
 {
-	while (sid < PRIV(dev)->subs_tail) {
-		if (PRIV(dev)->subs[sid].state >= min_state)
+	struct sub_device *subs;
+	uint8_t tail;
+
+	subs = PRIV(dev)->subs;
+	tail = PRIV(dev)->subs_tail;
+	while (sid < tail) {
+		if (subs[sid].state >= min_state)
 			break;
 		sid++;
 	}
-	if (sid >= PRIV(dev)->subs_tail)
-		return PRIV(dev)->subs_tail;
-	return sid;
+	*sid_out = sid;
+	if (sid >= tail)
+		return NULL;
+	return &subs[sid];
 }
 
 /*
@@ -334,7 +352,7 @@ fs_switch_dev(struct rte_eth_dev *dev,
 	} else if ((txd && txd->state < req_state) ||
 		   txd == NULL ||
 		   txd == banned) {
-		struct sub_device *sdev;
+		struct sub_device *sdev = NULL;
 		uint8_t i;
 
 		/* Using acceptable device */
@@ -346,9 +364,10 @@ fs_switch_dev(struct rte_eth_dev *dev,
 			PRIV(dev)->subs_tx = i;
 			break;
 		}
-	} else if (txd && txd->state < req_state) {
-		DEBUG("No device ready, deactivating tx_dev");
-		PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
+		if (i >= PRIV(dev)->subs_tail || sdev == NULL) {
+			DEBUG("No device ready, deactivating tx_dev");
+			PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
+		}
 	} else {
 		return;
 	}
diff --git a/drivers/net/failsafe/failsafe_rxtx.c b/drivers/net/failsafe/failsafe_rxtx.c
index 73114215..70157c82 100644
--- a/drivers/net/failsafe/failsafe_rxtx.c
+++ b/drivers/net/failsafe/failsafe_rxtx.c
@@ -43,7 +43,8 @@ fs_rx_unsafe(struct sub_device *sdev)
 {
 	return (ETH(sdev) == NULL) ||
 		(ETH(sdev)->rx_pkt_burst == NULL) ||
-		(sdev->state != DEV_STARTED);
+		(sdev->state != DEV_STARTED) ||
+		(sdev->remove != 0);
 }
 
 static inline int
diff --git a/drivers/net/fm10k/Makefile b/drivers/net/fm10k/Makefile
index e0024f05..1129596f 100644
--- a/drivers/net/fm10k/Makefile
+++ b/drivers/net/fm10k/Makefile
@@ -76,11 +76,14 @@ CFLAGS_BASE_DRIVER += -Wno-implicit-fallthrough
 endif
 endif
 endif
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
+LDLIBS += -lrte_bus_pci
 
 #
 # Add extra flags for base driver source files to disable warnings in them
 #
-BASE_DRIVER_OBJS=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))
+BASE_DRIVER_OBJS=$(sort $(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))))
 $(foreach obj, $(BASE_DRIVER_OBJS), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
 
 VPATH += $(SRCDIR)/base
diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 8e1a9506..1273aa86 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -155,6 +155,7 @@ struct fm10k_dev_info {
 	struct fm10k_macvlan_filter_info    macvlan;
 	/* Flag to indicate if RX vector conditions satisfied */
 	bool rx_vec_allowed;
+	bool sm_down;
 };
 
 /*
@@ -204,7 +205,7 @@ struct fm10k_rx_queue {
 	uint16_t rxrearm_nb;     /* number of remaining to be re-armed */
 	uint16_t rxrearm_start;  /* the idx we start the re-arming from */
 	uint16_t rx_using_sse; /* indicates that vector RX is in use */
-	uint8_t port_id;
+	uint16_t port_id;
 	uint8_t drop_en;
 	uint8_t rx_deferred_start; /* don't start this queue in dev start. */
 	uint16_t rx_ftag_en; /* indicates FTAG RX supported */
@@ -241,7 +242,7 @@ struct fm10k_tx_queue {
 	volatile uint32_t *tail_ptr;
 	uint32_t txq_flags; /* Holds flags for this TXq */
 	uint16_t nb_desc;
-	uint8_t port_id;
+	uint16_t port_id;
 	uint8_t tx_deferred_start; /** don't start this queue in dev start. */
 	uint16_t queue_id;
 	uint16_t tx_ftag_en; /* indicates FTAG TX supported */
@@ -252,11 +253,11 @@ struct fm10k_txq_ops {
 };
 
 #define MBUF_DMA_ADDR(mb) \
-	((uint64_t) ((mb)->buf_physaddr + (mb)->data_off))
+	((uint64_t) ((mb)->buf_iova + (mb)->data_off))
 
 /* enforce 512B alignment on default Rx DMA addresses */
 #define MBUF_DMA_ADDR_DEFAULT(mb) \
-	((uint64_t) RTE_ALIGN(((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM),\
+	((uint64_t) RTE_ALIGN(((mb)->buf_iova + RTE_PKTMBUF_HEADROOM),\
 			FM10K_RX_DATABUF_ALIGN))
 
 static inline void fifo_reset(struct fifo *fifo, uint32_t len)
@@ -289,7 +290,7 @@ static inline uint16_t fifo_remove(struct fifo *fifo)
 }
 
 static inline void
-fm10k_pktmbuf_reset(struct rte_mbuf *mb, uint8_t in_port)
+fm10k_pktmbuf_reset(struct rte_mbuf *mb, uint16_t in_port)
 {
 	rte_mbuf_refcnt_set(mb, 1);
 	mb->next = NULL;
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index e60d3a36..2d05a466 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -1256,14 +1256,17 @@ static int
 fm10k_link_update(struct rte_eth_dev *dev,
 	__rte_unused int wait_to_complete)
 {
+	struct fm10k_dev_info *dev_info =
+		FM10K_DEV_PRIVATE_TO_INFO(dev->data->dev_private);
 	PMD_INIT_FUNC_TRACE();
 
-	/* The host-interface link is always up.  The speed is ~50Gbps per Gen3
-	 * x8 PCIe interface. For now, we leave the speed undefined since there
-	 * is no 50Gbps Ethernet. */
+	/* The speed is ~50Gbps per Gen3 x8 PCIe interface. For now, we
+	 * leave the speed undefined since there is no 50Gbps Ethernet.
+	 */
 	dev->data->dev_link.link_speed  = 0;
 	dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
-	dev->data->dev_link.link_status = ETH_LINK_UP;
+	dev->data->dev_link.link_status =
+		dev_info->sm_down ? ETH_LINK_DOWN : ETH_LINK_UP;
 
 	return 0;
 }
@@ -1346,7 +1349,7 @@ fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 	return FM10K_NB_XSTATS;
 }
 
-static void
+static int
 fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	uint64_t ipackets, opackets, ibytes, obytes;
@@ -1376,6 +1379,7 @@ fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	stats->opackets = opackets;
 	stats->ibytes = ibytes;
 	stats->obytes = obytes;
+	return 0;
 }
 
 static void
@@ -1590,7 +1594,7 @@ fm10k_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 	return 0;
 }
 
-static void
+static int
 fm10k_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	if (mask & ETH_VLAN_STRIP_MASK) {
@@ -1609,6 +1613,8 @@ fm10k_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 		if (!dev->data->dev_conf.rxmode.hw_vlan_filter)
 			PMD_INIT_LOG(ERR, "VLAN filter is always on in fm10k");
 	}
+
+	return 0;
 }
 
 /* Add/Remove a MAC address, and update filters to main VSI */
@@ -1887,7 +1893,7 @@ fm10k_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
 		return -ENOMEM;
 	}
 	q->hw_ring = mz->addr;
-	q->hw_ring_phys_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
+	q->hw_ring_phys_addr = mz->iova;
 
 	/* Check if number of descs satisfied Vector requirement */
 	if (!rte_is_power_of_2(nb_desc)) {
@@ -2047,7 +2053,7 @@ fm10k_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
 		return -ENOMEM;
 	}
 	q->hw_ring = mz->addr;
-	q->hw_ring_phys_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
+	q->hw_ring_phys_addr = mz->iova;
 
 	/*
 	 * allocate memory for the RS bit tracker. Enough slots to hold the
@@ -2552,6 +2558,10 @@ fm10k_dev_interrupt_handler_pf(void *param)
 	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 	struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	uint32_t cause, status;
+	struct fm10k_dev_info *dev_info =
+		FM10K_DEV_PRIVATE_TO_INFO(dev->data->dev_private);
+	int status_mbx;
+	s32 err;
 
 	if (hw->mac.type != fm10k_mac_pf)
 		return;
@@ -2568,14 +2578,69 @@ fm10k_dev_interrupt_handler_pf(void *param)
 	if (cause & FM10K_EICR_SWITCHNOTREADY)
 		PMD_INIT_LOG(ERR, "INT: Switch is not ready");
 
-	if (cause & FM10K_EICR_SWITCHREADY)
+	if (cause & FM10K_EICR_SWITCHREADY) {
 		PMD_INIT_LOG(INFO, "INT: Switch is ready");
+		if (dev_info->sm_down == 1) {
+			fm10k_mbx_lock(hw);
+
+			/* For recreating logical ports */
+			status_mbx = hw->mac.ops.update_lport_state(hw,
+					hw->mac.dglort_map, MAX_LPORT_NUM, 1);
+			if (status_mbx == FM10K_SUCCESS)
+				PMD_INIT_LOG(INFO,
+					"INT: Recreated Logical port");
+			else
+				PMD_INIT_LOG(INFO,
+					"INT: Logical ports weren't recreated");
+
+			status_mbx = hw->mac.ops.update_xcast_mode(hw,
+				hw->mac.dglort_map, FM10K_XCAST_MODE_NONE);
+			if (status_mbx != FM10K_SUCCESS)
+				PMD_INIT_LOG(ERR, "Failed to set XCAST mode");
+
+			fm10k_mbx_unlock(hw);
+
+			/* first clear the internal SW recording structure */
+			if (!(dev->data->dev_conf.rxmode.mq_mode &
+						ETH_MQ_RX_VMDQ_FLAG))
+				fm10k_vlan_filter_set(dev, hw->mac.default_vid,
+					false);
+
+			fm10k_MAC_filter_set(dev, hw->mac.addr, false,
+					MAIN_VSI_POOL_NUMBER);
+
+			/*
+			 * Add default mac address and vlan for the logical
+			 * ports that have been created, leave to the
+			 * application to fully recover Rx filtering.
+			 */
+			fm10k_MAC_filter_set(dev, hw->mac.addr, true,
+					MAIN_VSI_POOL_NUMBER);
+
+			if (!(dev->data->dev_conf.rxmode.mq_mode &
+						ETH_MQ_RX_VMDQ_FLAG))
+				fm10k_vlan_filter_set(dev, hw->mac.default_vid,
+					true);
+
+			dev_info->sm_down = 0;
+			_rte_eth_dev_callback_process(dev,
+					RTE_ETH_EVENT_INTR_LSC,
+					NULL, NULL);
+		}
+	}
 
 	/* Handle mailbox message */
 	fm10k_mbx_lock(hw);
-	hw->mbx.ops.process(hw, &hw->mbx);
+	err = hw->mbx.ops.process(hw, &hw->mbx);
 	fm10k_mbx_unlock(hw);
 
+	if (err == FM10K_ERR_RESET_REQUESTED) {
+		PMD_INIT_LOG(INFO, "INT: Switch is down");
+		dev_info->sm_down = 1;
+		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+				NULL, NULL);
+	}
+
 	/* Handle SRAM error */
 	if (cause & FM10K_EICR_SRAMERROR) {
 		PMD_INIT_LOG(ERR, "INT: SRAM error on PEP");
@@ -2616,6 +2681,11 @@ fm10k_dev_interrupt_handler_vf(void *param)
 {
 	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 	struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	struct fm10k_dev_info *dev_info =
+		FM10K_DEV_PRIVATE_TO_INFO(dev->data->dev_private);
+	const enum fm10k_mbx_state state = mbx->state;
+	int status_mbx;
 
 	if (hw->mac.type != fm10k_mac_vf)
 		return;
@@ -2625,6 +2695,49 @@ fm10k_dev_interrupt_handler_vf(void *param)
 	hw->mbx.ops.process(hw, &hw->mbx);
 	fm10k_mbx_unlock(hw);
 
+	if (state == FM10K_STATE_OPEN && mbx->state == FM10K_STATE_CONNECT) {
+		PMD_INIT_LOG(INFO, "INT: Switch has gone down");
+
+		fm10k_mbx_lock(hw);
+		hw->mac.ops.update_lport_state(hw, hw->mac.dglort_map,
+				MAX_LPORT_NUM, 1);
+		fm10k_mbx_unlock(hw);
+
+		/* Setting reset flag */
+		dev_info->sm_down = 1;
+		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+				NULL, NULL);
+	}
+
+	if (dev_info->sm_down == 1 &&
+			hw->mac.dglort_map == FM10K_DGLORTMAP_ZERO) {
+		PMD_INIT_LOG(INFO, "INT: Switch has gone up");
+		fm10k_mbx_lock(hw);
+		status_mbx = hw->mac.ops.update_xcast_mode(hw,
+				hw->mac.dglort_map, FM10K_XCAST_MODE_NONE);
+		if (status_mbx != FM10K_SUCCESS)
+			PMD_INIT_LOG(ERR, "Failed to set XCAST mode");
+		fm10k_mbx_unlock(hw);
+
+		/* first clear the internal SW recording structure */
+		fm10k_vlan_filter_set(dev, hw->mac.default_vid, false);
+		fm10k_MAC_filter_set(dev, hw->mac.addr, false,
+				MAIN_VSI_POOL_NUMBER);
+
+		/*
+		 * Add default mac address and vlan for the logical ports that
+		 * have been created, leave to the application to fully recover
+		 * Rx filtering.
+		 */
+		fm10k_MAC_filter_set(dev, hw->mac.addr, true,
+				MAIN_VSI_POOL_NUMBER);
+		fm10k_vlan_filter_set(dev, hw->mac.default_vid, true);
+
+		dev_info->sm_down = 0;
+		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
+				NULL, NULL);
+	}
+
 	/* Re-enable interrupt from device side */
 	FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK |
 					FM10K_ITR_MASK_CLEAR);
@@ -2908,7 +3021,6 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev)
 	}
 
 	rte_eth_copy_pci_info(dev, pdev);
-	dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
 	memset(macvlan, 0, sizeof(*macvlan));
@@ -3142,7 +3254,8 @@ static const struct rte_pci_id pci_id_fm10k_map[] = {
 
 static struct rte_pci_driver rte_pmd_fm10k = {
 	.id_table = pci_id_fm10k_map,
-	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+		     RTE_PCI_DRV_IOVA_AS_VA,
 	.probe = eth_fm10k_pci_probe,
 	.remove = eth_fm10k_pci_remove,
 };
diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c
index c9bb04a0..d6081e48 100644
--- a/drivers/net/fm10k/fm10k_rxtx.c
+++ b/drivers/net/fm10k/fm10k_rxtx.c
@@ -158,10 +158,10 @@ fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 * Packets in fm10k device always carry at least one VLAN tag.
 		 * For those packets coming in without VLAN tag,
 		 * the port default VLAN tag will be used.
-		 * So, always PKT_RX_VLAN_PKT flag is set and vlan_tci
+		 * So, always PKT_RX_VLAN flag is set and vlan_tci
 		 * is valid for each RX packet's mbuf.
 		 */
-		mbuf->ol_flags |= PKT_RX_VLAN_PKT;
+		mbuf->ol_flags |= PKT_RX_VLAN;
 		mbuf->vlan_tci = desc.w.vlan;
 		/**
 		 * mbuf->vlan_tci_outer is an idle field in fm10k driver,
@@ -198,7 +198,7 @@ fm10k_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 					q->alloc_thresh);
 
 		if (unlikely(ret != 0)) {
-			uint8_t port = q->port_id;
+			uint16_t port = q->port_id;
 			PMD_RX_LOG(ERR, "Failed to alloc mbuf");
 			/*
 			 * Need to restore next_dd if we cannot allocate new
@@ -319,10 +319,10 @@ fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		 * Packets in fm10k device always carry at least one VLAN tag.
 		 * For those packets coming in without VLAN tag,
 		 * the port default VLAN tag will be used.
-		 * So, always PKT_RX_VLAN_PKT flag is set and vlan_tci
+		 * So, always PKT_RX_VLAN flag is set and vlan_tci
 		 * is valid for each RX packet's mbuf.
 		 */
-		first_seg->ol_flags |= PKT_RX_VLAN_PKT;
+		first_seg->ol_flags |= PKT_RX_VLAN;
 		first_seg->vlan_tci = desc.w.vlan;
 		/**
 		 * mbuf->vlan_tci_outer is an idle field in fm10k driver,
@@ -356,7 +356,7 @@ fm10k_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 					q->alloc_thresh);
 
 		if (unlikely(ret != 0)) {
-			uint8_t port = q->port_id;
+			uint16_t port = q->port_id;
 			PMD_RX_LOG(ERR, "Failed to alloc mbuf");
 			/*
 			 * Need to restore next_dd if we cannot allocate new
diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c b/drivers/net/fm10k/fm10k_rxtx_vec.c
index d23bfe9b..ce042d3d 100644
--- a/drivers/net/fm10k/fm10k_rxtx_vec.c
+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c
@@ -81,8 +81,8 @@ fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
 
 	const __m128i pkttype_msk = _mm_set_epi16(
 			0x0000, 0x0000, 0x0000, 0x0000,
-			PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,
-			PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT);
+			PKT_RX_VLAN, PKT_RX_VLAN,
+			PKT_RX_VLAN, PKT_RX_VLAN);
 
 	/* mask everything except rss type */
 	const __m128i rsstype_msk = _mm_set_epi16(
@@ -330,8 +330,8 @@ fm10k_rxq_rearm(struct fm10k_rx_queue *rxq)
 		p1 = (uintptr_t)&mb1->rearm_data;
 		*(uint64_t *)p1 = rxq->mbuf_initializer;
 
-		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_physaddr) !=
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
 				offsetof(struct rte_mbuf, buf_addr) + 8);
 		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
 		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
diff --git a/drivers/net/i40e/Makefile b/drivers/net/i40e/Makefile
index 55c79a60..9ab8c84d 100644
--- a/drivers/net/i40e/Makefile
+++ b/drivers/net/i40e/Makefile
@@ -39,10 +39,13 @@ LIB = librte_pmd_i40e.a
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS) -DPF_DRIVER -DVF_DRIVER -DINTEGRATED_VF
 CFLAGS += -DX722_A0_SUPPORT
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
+LDLIBS += -lrte_bus_pci
 
 EXPORT_MAP := rte_pmd_i40e_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 #
 # Add extra flags for base driver files (also known as shared code)
@@ -78,7 +81,7 @@ endif
 
 CFLAGS_i40e_lan_hmc.o += -Wno-error
 endif
-OBJS_BASE_DRIVER=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))
+OBJS_BASE_DRIVER=$(sort $(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))))
 $(foreach obj, $(OBJS_BASE_DRIVER), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
 
 VPATH += $(SRCDIR)/base
diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
index c57ecded..8e5c593c 100644
--- a/drivers/net/i40e/base/i40e_osdep.h
+++ b/drivers/net/i40e/base/i40e_osdep.h
@@ -35,6 +35,7 @@
 
 #include <string.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdarg.h>
 
@@ -57,7 +58,6 @@ typedef uint16_t        u16;
 typedef uint32_t        u32;
 typedef int32_t         s32;
 typedef uint64_t        u64;
-typedef int             bool;
 
 typedef enum i40e_status_code i40e_status;
 #define __iomem
@@ -99,7 +99,6 @@ typedef enum i40e_status_code i40e_status;
 #define max(a,b) RTE_MAX(a,b)
 
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
-#define ASSERT(x) if(!(x)) rte_panic("IXGBE: x")
 
 #define DEBUGOUT(S)        PMD_DRV_LOG_RAW(DEBUG, S)
 #define DEBUGOUT1(S, A...) PMD_DRV_LOG_RAW(DEBUG, S, ##A)
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 5f26e24a..811cc9ff 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -43,6 +43,7 @@
 #include <rte_eal.h>
 #include <rte_string_fns.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_ethdev_pci.h>
@@ -65,6 +66,7 @@
 #include "i40e_rxtx.h"
 #include "i40e_pf.h"
 #include "i40e_regs.h"
+#include "rte_pmd_i40e.h"
 
 #define ETH_I40E_FLOATING_VEB_ARG	"enable_floating_veb"
 #define ETH_I40E_FLOATING_VEB_LIST_ARG	"floating_veb_list"
@@ -86,12 +88,6 @@
 /* Flow control default timer */
 #define I40E_DEFAULT_PAUSE_TIME 0xFFFFU
 
-/* Flow control default high water */
-#define I40E_DEFAULT_HIGH_WATER (0x1C40/1024)
-
-/* Flow control default low water */
-#define I40E_DEFAULT_LOW_WATER  (0x1A40/1024)
-
 /* Flow control enable fwd bit */
 #define I40E_PRTMAC_FWD_CTRL   0x00000001
 
@@ -101,6 +97,12 @@
 /* Kilobytes shift */
 #define I40E_KILOSHIFT 10
 
+/* Flow control default high water */
+#define I40E_DEFAULT_HIGH_WATER (0xF2000 >> I40E_KILOSHIFT)
+
+/* Flow control default low water */
+#define I40E_DEFAULT_LOW_WATER  (0xF2000 >> I40E_KILOSHIFT)
+
 /* Receive Average Packet Size in Byte*/
 #define I40E_PACKET_AVERAGE_SIZE 128
 
@@ -137,10 +139,6 @@
 #define I40E_PRTTSYN_TSYNTYPE    0x0e000000
 #define I40E_CYCLECOUNTER_MASK   0xffffffffffffffffULL
 
-#define I40E_MAX_PERCENT            100
-#define I40E_DEFAULT_DCB_APP_NUM    1
-#define I40E_DEFAULT_DCB_APP_PRIO   3
-
 /**
  * Below are values for writing un-exposed registers suggested
  * by silicon experts
@@ -250,13 +248,14 @@ static int i40e_dev_configure(struct rte_eth_dev *dev);
 static int i40e_dev_start(struct rte_eth_dev *dev);
 static void i40e_dev_stop(struct rte_eth_dev *dev);
 static void i40e_dev_close(struct rte_eth_dev *dev);
+static int  i40e_dev_reset(struct rte_eth_dev *dev);
 static void i40e_dev_promiscuous_enable(struct rte_eth_dev *dev);
 static void i40e_dev_promiscuous_disable(struct rte_eth_dev *dev);
 static void i40e_dev_allmulticast_enable(struct rte_eth_dev *dev);
 static void i40e_dev_allmulticast_disable(struct rte_eth_dev *dev);
 static int i40e_dev_set_link_up(struct rte_eth_dev *dev);
 static int i40e_dev_set_link_down(struct rte_eth_dev *dev);
-static void i40e_dev_stats_get(struct rte_eth_dev *dev,
+static int i40e_dev_stats_get(struct rte_eth_dev *dev,
 			       struct rte_eth_stats *stats);
 static int i40e_dev_xstats_get(struct rte_eth_dev *dev,
 			       struct rte_eth_xstat *xstats, unsigned n);
@@ -278,7 +277,7 @@ static int i40e_vlan_filter_set(struct rte_eth_dev *dev,
 static int i40e_vlan_tpid_set(struct rte_eth_dev *dev,
 			      enum rte_vlan_type vlan_type,
 			      uint16_t tpid);
-static void i40e_vlan_offload_set(struct rte_eth_dev *dev, int mask);
+static int i40e_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 static void i40e_vlan_strip_queue_set(struct rte_eth_dev *dev,
 				      uint16_t queue,
 				      int on);
@@ -308,7 +307,6 @@ static int i40e_pf_parameter_init(struct rte_eth_dev *dev);
 static int i40e_pf_setup(struct i40e_pf *pf);
 static int i40e_dev_rxtx_init(struct i40e_pf *pf);
 static int i40e_vmdq_setup(struct rte_eth_dev *dev);
-static int i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb);
 static int i40e_dcb_setup(struct rte_eth_dev *dev);
 static void i40e_stat_update_32(struct i40e_hw *hw, uint32_t reg,
 		bool offset_loaded, uint64_t *offset, uint64_t *stat);
@@ -360,6 +358,12 @@ static int i40e_dev_sync_phy_type(struct i40e_hw *hw);
 static void i40e_configure_registers(struct i40e_hw *hw);
 static void i40e_hw_init(struct rte_eth_dev *dev);
 static int i40e_config_qinq(struct i40e_hw *hw, struct i40e_vsi *vsi);
+static enum i40e_status_code i40e_aq_del_mirror_rule(struct i40e_hw *hw,
+						     uint16_t seid,
+						     uint16_t rule_type,
+						     uint16_t *entries,
+						     uint16_t count,
+						     uint16_t rule_id);
 static int i40e_mirror_rule_set(struct rte_eth_dev *dev,
 			struct rte_eth_mirror_conf *mirror_conf,
 			uint8_t sw_id, uint8_t on);
@@ -449,6 +453,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
 	.dev_start                    = i40e_dev_start,
 	.dev_stop                     = i40e_dev_stop,
 	.dev_close                    = i40e_dev_close,
+	.dev_reset		      = i40e_dev_reset,
 	.promiscuous_enable           = i40e_dev_promiscuous_enable,
 	.promiscuous_disable          = i40e_dev_promiscuous_disable,
 	.allmulticast_enable          = i40e_dev_allmulticast_enable,
@@ -645,7 +650,8 @@ static int eth_i40e_pci_remove(struct rte_pci_device *pci_dev)
 
 static struct rte_pci_driver rte_i40e_pmd = {
 	.id_table = pci_id_i40e_map,
-	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+		     RTE_PCI_DRV_IOVA_AS_VA,
 	.probe = eth_i40e_pci_probe,
 	.remove = eth_i40e_pci_remove,
 };
@@ -695,23 +701,22 @@ RTE_PMD_REGISTER_KMOD_DEP(net_i40e, "* igb_uio | uio_pci_generic | vfio-pci");
 static inline void i40e_GLQF_reg_init(struct i40e_hw *hw)
 {
 	/*
-	 * Initialize registers for flexible payload, which should be set by NVM.
-	 * This should be removed from code once it is fixed in NVM.
+	 * Force global configuration for flexible payload
+	 * to the first 16 bytes of the corresponding L2/L3/L4 paylod.
+	 * This should be removed from code once proper
+	 * configuration API is added to avoid configuration conflicts
+	 * between ports of the same device.
 	 */
-	I40E_WRITE_REG(hw, I40E_GLQF_ORT(18), 0x00000030);
-	I40E_WRITE_REG(hw, I40E_GLQF_ORT(19), 0x00000030);
-	I40E_WRITE_REG(hw, I40E_GLQF_ORT(26), 0x0000002B);
-	I40E_WRITE_REG(hw, I40E_GLQF_ORT(30), 0x0000002B);
 	I40E_WRITE_REG(hw, I40E_GLQF_ORT(33), 0x000000E0);
 	I40E_WRITE_REG(hw, I40E_GLQF_ORT(34), 0x000000E3);
 	I40E_WRITE_REG(hw, I40E_GLQF_ORT(35), 0x000000E6);
-	I40E_WRITE_REG(hw, I40E_GLQF_ORT(20), 0x00000031);
-	I40E_WRITE_REG(hw, I40E_GLQF_ORT(23), 0x00000031);
-	I40E_WRITE_REG(hw, I40E_GLQF_ORT(63), 0x0000002D);
-	I40E_WRITE_REG(hw, I40E_GLQF_PIT(16), 0x00007480);
-	I40E_WRITE_REG(hw, I40E_GLQF_PIT(17), 0x00007440);
 
-	/* Initialize registers for parsing packet type of QinQ */
+	/*
+	 * Initialize registers for parsing packet type of QinQ
+	 * This should be removed from code once proper
+	 * configuration API is added to avoid configuration conflicts
+	 * between ports of the same device.
+	 */
 	I40E_WRITE_REG(hw, I40E_GLQF_ORT(40), 0x00000029);
 	I40E_WRITE_REG(hw, I40E_GLQF_PIT(9), 0x00009420);
 }
@@ -1034,6 +1039,35 @@ err_fdir_hash_map_alloc:
 	return ret;
 }
 
+static void
+i40e_init_customized_info(struct i40e_pf *pf)
+{
+	int i;
+
+	/* Initialize customized pctype */
+	for (i = I40E_CUSTOMIZED_GTPC; i < I40E_CUSTOMIZED_MAX; i++) {
+		pf->customized_pctype[i].index = i;
+		pf->customized_pctype[i].pctype = I40E_FILTER_PCTYPE_INVALID;
+		pf->customized_pctype[i].valid = false;
+	}
+
+	pf->gtp_support = false;
+}
+
+void
+i40e_init_queue_region_conf(struct rte_eth_dev *dev)
+{
+	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	struct i40e_queue_regions *info = &pf->queue_region;
+	uint16_t i;
+
+	for (i = 0; i < I40E_PFQF_HREGION_MAX_INDEX; i++)
+		i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(i), 0);
+
+	memset(info, 0, sizeof(struct i40e_queue_regions));
+}
+
 static int
 eth_i40e_dev_init(struct rte_eth_dev *dev)
 {
@@ -1062,11 +1096,11 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
 		return 0;
 	}
 	i40e_set_default_ptype_table(dev);
+	i40e_set_default_pctype_table(dev);
 	pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	intr_handle = &pci_dev->intr_handle;
 
 	rte_eth_copy_pci_info(dev, pci_dev);
-	dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	pf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	pf->adapter->eth_dev = dev;
@@ -1299,6 +1333,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
 	/* initialize Traffic Manager configuration */
 	i40e_tm_conf_init(dev);
 
+	/* Initialize customized information */
+	i40e_init_customized_info(pf);
+
 	ret = i40e_init_ethtype_filter_list(dev);
 	if (ret < 0)
 		goto err_init_ethtype_filter_list;
@@ -1309,6 +1346,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
 	if (ret < 0)
 		goto err_init_fdir_filter_list;
 
+	/* initialize queue region configuration */
+	i40e_init_queue_region_conf(dev);
+
 	return 0;
 
 err_init_fdir_filter_list:
@@ -1594,7 +1634,8 @@ i40e_vsi_queues_unbind_intr(struct i40e_vsi *vsi)
 
 static void
 __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t msix_vect,
-		       int base_queue, int nb_queue)
+		       int base_queue, int nb_queue,
+		       uint16_t itr_idx)
 {
 	int i;
 	uint32_t val;
@@ -1603,7 +1644,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t msix_vect,
 	/* Bind all RX queues to allocated MSIX interrupt */
 	for (i = 0; i < nb_queue; i++) {
 		val = (msix_vect << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
-			I40E_QINT_RQCTL_ITR_INDX_MASK |
+			itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT |
 			((base_queue + i + 1) <<
 			 I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
 			(0 << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
@@ -1666,7 +1707,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t msix_vect,
 }
 
 void
-i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi)
+i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t itr_idx)
 {
 	struct rte_eth_dev *dev = vsi->adapter->eth_dev;
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
@@ -1694,7 +1735,8 @@ i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi)
 	/* VF bind interrupt */
 	if (vsi->type == I40E_VSI_SRIOV) {
 		__vsi_queues_bind_intr(vsi, msix_vect,
-				       vsi->base_queue, vsi->nb_qps);
+				       vsi->base_queue, vsi->nb_qps,
+				       itr_idx);
 		return;
 	}
 
@@ -1720,7 +1762,8 @@ i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi)
 			/* no enough msix_vect, map all to one */
 			__vsi_queues_bind_intr(vsi, msix_vect,
 					       vsi->base_queue + i,
-					       vsi->nb_used_qps - i);
+					       vsi->nb_used_qps - i,
+					       itr_idx);
 			for (; !!record && i < vsi->nb_used_qps; i++)
 				intr_handle->intr_vec[queue_idx + i] =
 					msix_vect;
@@ -1728,7 +1771,8 @@ i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi)
 		}
 		/* 1:1 queue/msix_vect mapping */
 		__vsi_queues_bind_intr(vsi, msix_vect,
-				       vsi->base_queue + i, 1);
+				       vsi->base_queue + i, 1,
+				       itr_idx);
 		if (!!record)
 			intr_handle->intr_vec[queue_idx + i] = msix_vect;
 
@@ -1918,8 +1962,9 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	hw->adapter_stopped = 0;
 
 	if (dev->data->dev_conf.link_speeds & ETH_LINK_SPEED_FIXED) {
-		PMD_INIT_LOG(ERR, "Invalid link_speeds for port %hhu; autonegotiation disabled",
-			     dev->data->port_id);
+		PMD_INIT_LOG(ERR,
+		"Invalid link_speeds for port %u, autonegotiation disabled",
+			      dev->data->port_id);
 		return -EINVAL;
 	}
 
@@ -1957,19 +2002,21 @@ i40e_dev_start(struct rte_eth_dev *dev)
 	/* Map queues with MSIX interrupt */
 	main_vsi->nb_used_qps = dev->data->nb_rx_queues -
 		pf->nb_cfg_vmdq_vsi * RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM;
-	i40e_vsi_queues_bind_intr(main_vsi);
+	i40e_vsi_queues_bind_intr(main_vsi, I40E_ITR_INDEX_DEFAULT);
 	i40e_vsi_enable_queues_intr(main_vsi);
 
 	/* Map VMDQ VSI queues with MSIX interrupt */
 	for (i = 0; i < pf->nb_cfg_vmdq_vsi; i++) {
 		pf->vmdq[i].vsi->nb_used_qps = RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM;
-		i40e_vsi_queues_bind_intr(pf->vmdq[i].vsi);
+		i40e_vsi_queues_bind_intr(pf->vmdq[i].vsi,
+					  I40E_ITR_INDEX_DEFAULT);
 		i40e_vsi_enable_queues_intr(pf->vmdq[i].vsi);
 	}
 
 	/* enable FDIR MSIX interrupt */
 	if (pf->fdir.fdir_vsi) {
-		i40e_vsi_queues_bind_intr(pf->fdir.fdir_vsi);
+		i40e_vsi_queues_bind_intr(pf->fdir.fdir_vsi,
+					  I40E_ITR_INDEX_NONE);
 		i40e_vsi_enable_queues_intr(pf->fdir.fdir_vsi);
 	}
 
@@ -2063,7 +2110,6 @@ i40e_dev_stop(struct rte_eth_dev *dev)
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_vsi *main_vsi = pf->main_vsi;
-	struct i40e_mirror_rule *p_mirror;
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 	int i;
@@ -2092,13 +2138,6 @@ i40e_dev_stop(struct rte_eth_dev *dev)
 	/* Set link down */
 	i40e_dev_set_link_down(dev);
 
-	/* Remove all mirror rules */
-	while ((p_mirror = TAILQ_FIRST(&pf->mirror_list))) {
-		TAILQ_REMOVE(&pf->mirror_list, p_mirror, rules);
-		rte_free(p_mirror);
-	}
-	pf->nb_mirror_rule = 0;
-
 	if (!rte_intr_allow_others(intr_handle))
 		/* resume to the default handler */
 		rte_intr_callback_register(intr_handle,
@@ -2115,6 +2154,9 @@ i40e_dev_stop(struct rte_eth_dev *dev)
 	/* reset hierarchy commit */
 	pf->tm_conf.committed = false;
 
+	/* Remove all the queue region configuration */
+	i40e_flush_queue_region_all_conf(dev, hw, pf, 0);
+
 	hw->adapter_stopped = 1;
 }
 
@@ -2125,12 +2167,34 @@ i40e_dev_close(struct rte_eth_dev *dev)
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+	struct i40e_mirror_rule *p_mirror;
 	uint32_t reg;
 	int i;
+	int ret;
 
 	PMD_INIT_FUNC_TRACE();
 
 	i40e_dev_stop(dev);
+
+	/* Remove all mirror rules */
+	while ((p_mirror = TAILQ_FIRST(&pf->mirror_list))) {
+		ret = i40e_aq_del_mirror_rule(hw,
+					      pf->main_vsi->veb->seid,
+					      p_mirror->rule_type,
+					      p_mirror->entries,
+					      p_mirror->num_entries,
+					      p_mirror->id);
+		if (ret < 0)
+			PMD_DRV_LOG(ERR, "failed to remove mirror rule: "
+				    "status = %d, aq_err = %d.", ret,
+				    hw->aq.asq_last_status);
+
+		/* remove mirror software resource anyway */
+		TAILQ_REMOVE(&pf->mirror_list, p_mirror, rules);
+		rte_free(p_mirror);
+		pf->nb_mirror_rule--;
+	}
+
 	i40e_dev_free_queues(dev);
 
 	/* Disable interrupt */
@@ -2165,6 +2229,32 @@ i40e_dev_close(struct rte_eth_dev *dev)
 	I40E_WRITE_FLUSH(hw);
 }
 
+/*
+ * Reset PF device only to re-initialize resources in PMD layer
+ */
+static int
+i40e_dev_reset(struct rte_eth_dev *dev)
+{
+	int ret;
+
+	/* When a DPDK PMD PF begin to reset PF port, it should notify all
+	 * its VF to make them align with it. The detailed notification
+	 * mechanism is PMD specific. As to i40e PF, it is rather complex.
+	 * To avoid unexpected behavior in VF, currently reset of PF with
+	 * SR-IOV activation is not supported. It might be supported later.
+	 */
+	if (dev->data->sriov.active)
+		return -ENOTSUP;
+
+	ret = eth_i40e_dev_uninit(dev);
+	if (ret)
+		return ret;
+
+	ret = eth_i40e_dev_init(dev);
+
+	return ret;
+}
+
 static void
 i40e_dev_promiscuous_enable(struct rte_eth_dev *dev)
 {
@@ -2653,7 +2743,7 @@ i40e_read_stats_registers(struct i40e_pf *pf, struct i40e_hw *hw)
 }
 
 /* Get all statistics of a port */
-static void
+static int
 i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -2664,13 +2754,14 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	/* call read registers - updates values, now write them to struct */
 	i40e_read_stats_registers(pf, hw);
 
-	stats->ipackets = pf->main_vsi->eth_stats.rx_unicast +
-			pf->main_vsi->eth_stats.rx_multicast +
-			pf->main_vsi->eth_stats.rx_broadcast -
+	stats->ipackets = ns->eth.rx_unicast +
+			ns->eth.rx_multicast +
+			ns->eth.rx_broadcast -
+			ns->eth.rx_discards -
 			pf->main_vsi->eth_stats.rx_discards;
-	stats->opackets = pf->main_vsi->eth_stats.tx_unicast +
-			pf->main_vsi->eth_stats.tx_multicast +
-			pf->main_vsi->eth_stats.tx_broadcast;
+	stats->opackets = ns->eth.tx_unicast +
+			ns->eth.tx_multicast +
+			ns->eth.tx_broadcast;
 	stats->ibytes   = ns->eth.rx_bytes;
 	stats->obytes   = ns->eth.tx_bytes;
 	stats->oerrors  = ns->eth.tx_errors +
@@ -2752,6 +2843,7 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		    ns->checksum_error);
 	PMD_DRV_LOG(DEBUG, "fdir_match:               %"PRIu64"", ns->fd_sb_match);
 	PMD_DRV_LOG(DEBUG, "***************** PF stats end ********************");
+	return 0;
 }
 
 /* Reset the statistics */
@@ -2965,7 +3057,7 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->hash_key_size = (I40E_PFQF_HKEY_MAX_INDEX + 1) *
 						sizeof(uint32_t);
 	dev_info->reta_size = pf->hash_lut_size;
-	dev_info->flow_type_rss_offloads = I40E_RSS_OFFLOAD_ALL;
+	dev_info->flow_type_rss_offloads = pf->adapter->flow_types_mask;
 
 	dev_info->default_rxconf = (struct rte_eth_rxconf) {
 		.rx_thresh = {
@@ -3129,7 +3221,7 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev,
 	return ret;
 }
 
-static void
+static int
 i40e_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -3162,6 +3254,8 @@ i40e_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 		else
 			i40e_vsi_config_double_vlan(vsi, FALSE);
 	}
+
+	return 0;
 }
 
 static void
@@ -3225,6 +3319,13 @@ i40e_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 
 	fc_conf->pause_time = pf->fc_conf.pause_time;
+
+	/* read out from register, in case they are modified by other port */
+	pf->fc_conf.high_water[I40E_MAX_TRAFFIC_CLASS] =
+		I40E_READ_REG(hw, I40E_GLRPB_GHW) >> I40E_KILOSHIFT;
+	pf->fc_conf.low_water[I40E_MAX_TRAFFIC_CLASS] =
+		I40E_READ_REG(hw, I40E_GLRPB_GLW) >> I40E_KILOSHIFT;
+
 	fc_conf->high_water =  pf->fc_conf.high_water[I40E_MAX_TRAFFIC_CLASS];
 	fc_conf->low_water = pf->fc_conf.low_water[I40E_MAX_TRAFFIC_CLASS];
 
@@ -3400,7 +3501,7 @@ i40e_macaddr_add(struct rte_eth_dev *dev,
 		return -EINVAL;
 	}
 
-	(void)rte_memcpy(&mac_filter.mac_addr, mac_addr, ETHER_ADDR_LEN);
+	rte_memcpy(&mac_filter.mac_addr, mac_addr, ETHER_ADDR_LEN);
 	if (dev->data->dev_conf.rxmode.hw_vlan_filter)
 		mac_filter.filter_type = RTE_MACVLAN_PERFECT_MATCH;
 	else
@@ -3505,10 +3606,10 @@ i40e_vf_mac_filter_set(struct i40e_pf *pf,
 	}
 
 	if (add) {
-		(void)rte_memcpy(&old_mac, hw->mac.addr, ETHER_ADDR_LEN);
-		(void)rte_memcpy(hw->mac.addr, new_mac->addr_bytes,
+		rte_memcpy(&old_mac, hw->mac.addr, ETHER_ADDR_LEN);
+		rte_memcpy(hw->mac.addr, new_mac->addr_bytes,
 				ETHER_ADDR_LEN);
-		(void)rte_memcpy(&mac_filter.mac_addr, &filter->mac_addr,
+		rte_memcpy(&mac_filter.mac_addr, &filter->mac_addr,
 				 ETHER_ADDR_LEN);
 
 		mac_filter.filter_type = filter->filter_type;
@@ -3519,7 +3620,7 @@ i40e_vf_mac_filter_set(struct i40e_pf *pf,
 		}
 		ether_addr_copy(new_mac, &pf->dev_addr);
 	} else {
-		(void)rte_memcpy(hw->mac.addr, hw->mac.perm_addr,
+		rte_memcpy(hw->mac.addr, hw->mac.perm_addr,
 				ETHER_ADDR_LEN);
 		ret = i40e_vsi_delete_mac(vf->vsi, &filter->mac_addr);
 		if (ret != I40E_SUCCESS) {
@@ -3741,7 +3842,7 @@ i40e_allocate_dma_mem_d(__attribute__((unused)) struct i40e_hw *hw,
 
 	mem->size = size;
 	mem->va = mz->addr;
-	mem->pa = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
+	mem->pa = mz->iova;
 	mem->zone = (const void *)mz;
 	PMD_DRV_LOG(DEBUG,
 		"memzone %s allocated with physical address: %"PRIu64,
@@ -4311,7 +4412,7 @@ i40e_vsi_vlan_pvid_set(struct i40e_vsi *vsi,
 	vsi->info.valid_sections =
 		rte_cpu_to_le_16(I40E_AQ_VSI_PROP_VLAN_VALID);
 	memset(&ctxt, 0, sizeof(ctxt));
-	(void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+	rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
 	ctxt.seid = vsi->seid;
 
 	hw = I40E_VSI_TO_HW(vsi);
@@ -4350,7 +4451,7 @@ i40e_vsi_update_tc_bandwidth(struct i40e_vsi *vsi, uint8_t enabled_tcmap)
 		return ret;
 	}
 
-	(void)rte_memcpy(vsi->info.qs_handle, tc_bw_data.qs_handles,
+	rte_memcpy(vsi->info.qs_handle, tc_bw_data.qs_handles,
 					sizeof(vsi->info.qs_handle));
 	return I40E_SUCCESS;
 }
@@ -4607,7 +4708,7 @@ i40e_update_default_filter_setting(struct i40e_vsi *vsi)
 	if (vsi->type != I40E_VSI_MAIN)
 		return I40E_ERR_CONFIG;
 	memset(&def_filter, 0, sizeof(def_filter));
-	(void)rte_memcpy(def_filter.mac_addr, hw->mac.perm_addr,
+	rte_memcpy(def_filter.mac_addr, hw->mac.perm_addr,
 					ETH_ADDR_LEN);
 	def_filter.vlan_tag = 0;
 	def_filter.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
@@ -4626,7 +4727,7 @@ i40e_update_default_filter_setting(struct i40e_vsi *vsi)
 			return I40E_ERR_NO_MEMORY;
 		}
 		mac = &f->mac_info.mac_addr;
-		(void)rte_memcpy(&mac->addr_bytes, hw->mac.perm_addr,
+		rte_memcpy(&mac->addr_bytes, hw->mac.perm_addr,
 				ETH_ADDR_LEN);
 		f->mac_info.filter_type = RTE_MACVLAN_PERFECT_MATCH;
 		TAILQ_INSERT_TAIL(&vsi->mac_list, f, next);
@@ -4634,7 +4735,7 @@ i40e_update_default_filter_setting(struct i40e_vsi *vsi)
 
 		return ret;
 	}
-	(void)rte_memcpy(&filter.mac_addr,
+	rte_memcpy(&filter.mac_addr,
 		(struct ether_addr *)(hw->mac.perm_addr), ETH_ADDR_LEN);
 	filter.filter_type = RTE_MACVLAN_PERFECT_MATCH;
 	return i40e_vsi_add_mac(vsi, &filter);
@@ -4895,7 +4996,7 @@ i40e_vsi_setup(struct i40e_pf *pf,
 			PMD_DRV_LOG(ERR, "Failed to get VSI params");
 			goto fail_msix_alloc;
 		}
-		(void)rte_memcpy(&vsi->info, &ctxt.info,
+		rte_memcpy(&vsi->info, &ctxt.info,
 			sizeof(struct i40e_aqc_vsi_properties_data));
 		vsi->vsi_id = ctxt.vsi_number;
 		vsi->info.valid_sections = 0;
@@ -4913,7 +5014,7 @@ i40e_vsi_setup(struct i40e_pf *pf,
 			rte_cpu_to_le_16(I40E_AQ_VSI_PROP_VLAN_VALID);
 		vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
 					I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH;
-		(void)rte_memcpy(&ctxt.info, &vsi->info,
+		rte_memcpy(&ctxt.info, &vsi->info,
 			sizeof(struct i40e_aqc_vsi_properties_data));
 		ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info,
 						I40E_DEFAULT_TCMAP);
@@ -4934,15 +5035,15 @@ i40e_vsi_setup(struct i40e_pf *pf,
 			goto fail_msix_alloc;
 		}
 
-		(void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,
+		rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,
 						sizeof(vsi->info.tc_mapping));
-		(void)rte_memcpy(&vsi->info.queue_mapping,
+		rte_memcpy(&vsi->info.queue_mapping,
 				&ctxt.info.queue_mapping,
 			sizeof(vsi->info.queue_mapping));
 		vsi->info.mapping_flags = ctxt.info.mapping_flags;
 		vsi->info.valid_sections = 0;
 
-		(void)rte_memcpy(pf->dev_addr.addr_bytes, hw->mac.perm_addr,
+		rte_memcpy(pf->dev_addr.addr_bytes, hw->mac.perm_addr,
 				ETH_ADDR_LEN);
 
 		/**
@@ -5085,7 +5186,7 @@ i40e_vsi_setup(struct i40e_pf *pf,
 	}
 
 	/* MAC/VLAN configuration */
-	(void)rte_memcpy(&filter.mac_addr, &broadcast, ETHER_ADDR_LEN);
+	rte_memcpy(&filter.mac_addr, &broadcast, ETHER_ADDR_LEN);
 	filter.filter_type = RTE_MACVLAN_PERFECT_MATCH;
 
 	ret = i40e_vsi_add_mac(vsi, &filter);
@@ -5197,7 +5298,7 @@ i40e_vsi_config_vlan_stripping(struct i40e_vsi *vsi, bool on)
 	vsi->info.port_vlan_flags &= ~(I40E_AQ_VSI_PVLAN_EMOD_MASK);
 	vsi->info.port_vlan_flags |= vlan_flags;
 	ctxt.seid = vsi->seid;
-	(void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+	rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
 	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
 	if (ret)
 		PMD_DRV_LOG(INFO, "Update VSI failed to %s vlan stripping",
@@ -5215,7 +5316,11 @@ i40e_dev_init_vlan(struct rte_eth_dev *dev)
 
 	/* Apply vlan offload setting */
 	mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK;
-	i40e_vlan_offload_set(dev, mask);
+	ret = i40e_vlan_offload_set(dev, mask);
+	if (ret) {
+		PMD_DRV_LOG(INFO, "Failed to update vlan offload");
+		return ret;
+	}
 
 	/* Apply double-vlan setting, not implemented yet */
 
@@ -5991,7 +6096,7 @@ i40e_add_macvlan_filters(struct i40e_vsi *vsi,
 		memset(req_list, 0, ele_buff_size);
 
 		for (i = 0; i < actual_num; i++) {
-			(void)rte_memcpy(req_list[i].mac_addr,
+			rte_memcpy(req_list[i].mac_addr,
 				&filter[num + i].macaddr, ETH_ADDR_LEN);
 			req_list[i].vlan_tag =
 				rte_cpu_to_le_16(filter[num + i].vlan_id);
@@ -6066,7 +6171,7 @@ i40e_remove_macvlan_filters(struct i40e_vsi *vsi,
 		memset(req_list, 0, ele_buff_size);
 
 		for (i = 0; i < actual_num; i++) {
-			(void)rte_memcpy(req_list[i].mac_addr,
+			rte_memcpy(req_list[i].mac_addr,
 				&filter[num + i].macaddr, ETH_ADDR_LEN);
 			req_list[i].vlan_tag =
 				rte_cpu_to_le_16(filter[num + i].vlan_id);
@@ -6217,7 +6322,7 @@ i40e_find_all_vlan_for_mac(struct i40e_vsi *vsi,
 							"vlan number doesn't match");
 						return I40E_ERR_PARAM;
 					}
-					(void)rte_memcpy(&mv_f[i].macaddr,
+					rte_memcpy(&mv_f[i].macaddr,
 							addr, ETH_ADDR_LEN);
 					mv_f[i].vlan_id =
 						j * I40E_UINT32_BIT_SIZE + k;
@@ -6246,7 +6351,7 @@ i40e_find_all_mac_for_vlan(struct i40e_vsi *vsi,
 			PMD_DRV_LOG(ERR, "buffer number not match");
 			return I40E_ERR_PARAM;
 		}
-		(void)rte_memcpy(&mv_f[i].macaddr, &f->mac_info.mac_addr,
+		rte_memcpy(&mv_f[i].macaddr, &f->mac_info.mac_addr,
 				ETH_ADDR_LEN);
 		mv_f[i].vlan_id = vlan;
 		mv_f[i].filter_type = f->mac_info.filter_type;
@@ -6282,7 +6387,7 @@ i40e_vsi_remove_all_macvlan_filter(struct i40e_vsi *vsi)
 	i = 0;
 	if (vsi->vlan_num == 0) {
 		TAILQ_FOREACH(f, &vsi->mac_list, next) {
-			(void)rte_memcpy(&mv_f[i].macaddr,
+			rte_memcpy(&mv_f[i].macaddr,
 				&f->mac_info.mac_addr, ETH_ADDR_LEN);
 			mv_f[i].filter_type = f->mac_info.filter_type;
 			mv_f[i].vlan_id = 0;
@@ -6452,7 +6557,7 @@ i40e_vsi_add_mac(struct i40e_vsi *vsi, struct i40e_mac_filter_info *mac_filter)
 
 	for (i = 0; i < vlan_num; i++) {
 		mv_f[i].filter_type = mac_filter->filter_type;
-		(void)rte_memcpy(&mv_f[i].macaddr, &mac_filter->mac_addr,
+		rte_memcpy(&mv_f[i].macaddr, &mac_filter->mac_addr,
 				ETH_ADDR_LEN);
 	}
 
@@ -6475,7 +6580,7 @@ i40e_vsi_add_mac(struct i40e_vsi *vsi, struct i40e_mac_filter_info *mac_filter)
 		ret = I40E_ERR_NO_MEMORY;
 		goto DONE;
 	}
-	(void)rte_memcpy(&f->mac_info.mac_addr, &mac_filter->mac_addr,
+	rte_memcpy(&f->mac_info.mac_addr, &mac_filter->mac_addr,
 			ETH_ADDR_LEN);
 	f->mac_info.filter_type = mac_filter->filter_type;
 	TAILQ_INSERT_TAIL(&vsi->mac_list, f, next);
@@ -6522,7 +6627,7 @@ i40e_vsi_delete_mac(struct i40e_vsi *vsi, struct ether_addr *addr)
 
 	for (i = 0; i < vlan_num; i++) {
 		mv_f[i].filter_type = filter_type;
-		(void)rte_memcpy(&mv_f[i].macaddr, &f->mac_info.mac_addr,
+		rte_memcpy(&mv_f[i].macaddr, &f->mac_info.mac_addr,
 				ETH_ADDR_LEN);
 	}
 	if (filter_type == RTE_MACVLAN_PERFECT_MATCH ||
@@ -6549,104 +6654,36 @@ DONE:
 
 /* Configure hash enable flags for RSS */
 uint64_t
-i40e_config_hena(uint64_t flags, enum i40e_mac_type type)
+i40e_config_hena(const struct i40e_adapter *adapter, uint64_t flags)
 {
 	uint64_t hena = 0;
+	int i;
 
 	if (!flags)
 		return hena;
 
-	if (flags & ETH_RSS_FRAG_IPV4)
-		hena |= 1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4;
-	if (flags & ETH_RSS_NONFRAG_IPV4_TCP) {
-		if (type == I40E_MAC_X722) {
-			hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) |
-			 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
-		} else
-			hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
-	}
-	if (flags & ETH_RSS_NONFRAG_IPV4_UDP) {
-		if (type == I40E_MAC_X722) {
-			hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
-			 (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
-			 (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
-		} else
-			hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
-	}
-	if (flags & ETH_RSS_NONFRAG_IPV4_SCTP)
-		hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
-	if (flags & ETH_RSS_NONFRAG_IPV4_OTHER)
-		hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
-	if (flags & ETH_RSS_FRAG_IPV6)
-		hena |= 1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6;
-	if (flags & ETH_RSS_NONFRAG_IPV6_TCP) {
-		if (type == I40E_MAC_X722) {
-			hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) |
-			 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
-		} else
-			hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
-	}
-	if (flags & ETH_RSS_NONFRAG_IPV6_UDP) {
-		if (type == I40E_MAC_X722) {
-			hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
-			 (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
-			 (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
-		} else
-			hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+	for (i = RTE_ETH_FLOW_UNKNOWN + 1; i < I40E_FLOW_TYPE_MAX; i++) {
+		if (flags & (1ULL << i))
+			hena |= adapter->pctypes_tbl[i];
 	}
-	if (flags & ETH_RSS_NONFRAG_IPV6_SCTP)
-		hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP;
-	if (flags & ETH_RSS_NONFRAG_IPV6_OTHER)
-		hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER;
-	if (flags & ETH_RSS_L2_PAYLOAD)
-		hena |= 1ULL << I40E_FILTER_PCTYPE_L2_PAYLOAD;
 
 	return hena;
 }
 
 /* Parse the hash enable flags */
 uint64_t
-i40e_parse_hena(uint64_t flags)
+i40e_parse_hena(const struct i40e_adapter *adapter, uint64_t flags)
 {
 	uint64_t rss_hf = 0;
 
 	if (!flags)
 		return rss_hf;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4))
-		rss_hf |= ETH_RSS_FRAG_IPV4;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP))
-		rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK))
-		rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP))
-		rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP))
-		rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP))
-		rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP))
-		rss_hf |= ETH_RSS_NONFRAG_IPV4_SCTP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER))
-		rss_hf |= ETH_RSS_NONFRAG_IPV4_OTHER;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6))
-		rss_hf |= ETH_RSS_FRAG_IPV6;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP))
-		rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK))
-		rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP))
-		rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP))
-		rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
-		rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP))
-		rss_hf |= ETH_RSS_NONFRAG_IPV6_SCTP;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER))
-		rss_hf |= ETH_RSS_NONFRAG_IPV6_OTHER;
-	if (flags & (1ULL << I40E_FILTER_PCTYPE_L2_PAYLOAD))
-		rss_hf |= ETH_RSS_L2_PAYLOAD;
+	int i;
 
+	for (i = RTE_ETH_FLOW_UNKNOWN + 1; i < I40E_FLOW_TYPE_MAX; i++) {
+		if (flags & adapter->pctypes_tbl[i])
+			rss_hf |= (1ULL << i);
+	}
 	return rss_hf;
 }
 
@@ -6655,16 +6692,9 @@ static void
 i40e_pf_disable_rss(struct i40e_pf *pf)
 {
 	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
-	uint64_t hena;
 
-	hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0));
-	hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32;
-	if (hw->mac.type == I40E_MAC_X722)
-		hena &= ~I40E_RSS_HENA_ALL_X722;
-	else
-		hena &= ~I40E_RSS_HENA_ALL;
-	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena);
-	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32));
+	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0);
+	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0);
 	I40E_WRITE_FLUSH(hw);
 }
 
@@ -6736,7 +6766,6 @@ static int
 i40e_hw_rss_hash_set(struct i40e_pf *pf, struct rte_eth_rss_conf *rss_conf)
 {
 	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
-	uint64_t rss_hf;
 	uint64_t hena;
 	int ret;
 
@@ -6745,14 +6774,7 @@ i40e_hw_rss_hash_set(struct i40e_pf *pf, struct rte_eth_rss_conf *rss_conf)
 	if (ret)
 		return ret;
 
-	rss_hf = rss_conf->rss_hf;
-	hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0));
-	hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32;
-	if (hw->mac.type == I40E_MAC_X722)
-		hena &= ~I40E_RSS_HENA_ALL_X722;
-	else
-		hena &= ~I40E_RSS_HENA_ALL;
-	hena |= i40e_config_hena(rss_hf, hw->mac.type);
+	hena = i40e_config_hena(pf->adapter, rss_conf->rss_hf);
 	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena);
 	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32));
 	I40E_WRITE_FLUSH(hw);
@@ -6766,14 +6788,13 @@ i40e_dev_rss_hash_update(struct rte_eth_dev *dev,
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	uint64_t rss_hf = rss_conf->rss_hf & I40E_RSS_OFFLOAD_ALL;
+	uint64_t rss_hf = rss_conf->rss_hf & pf->adapter->flow_types_mask;
 	uint64_t hena;
 
 	hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0));
 	hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32;
-	if (!(hena & ((hw->mac.type == I40E_MAC_X722)
-		 ? I40E_RSS_HENA_ALL_X722
-		 : I40E_RSS_HENA_ALL))) { /* RSS disabled */
+
+	if (!(hena & pf->adapter->pctypes_mask)) { /* RSS disabled */
 		if (rss_hf != 0) /* Enable RSS */
 			return -EINVAL;
 		return 0; /* Nothing to do */
@@ -6798,7 +6819,7 @@ i40e_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
 
 	hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0));
 	hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32;
-	rss_conf->rss_hf = i40e_parse_hena(hena);
+	rss_conf->rss_hf = i40e_parse_hena(pf->adapter, hena);
 
 	return 0;
 }
@@ -7071,7 +7092,7 @@ i40e_status_code i40e_replace_mpls_l1_filter(struct i40e_pf *pf)
 	/* create L1 filter */
 	filter_replace.old_filter_type =
 		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC;
-	filter_replace.new_filter_type = I40E_AQC_ADD_L1_FILTER_TEID_MPLS;
+	filter_replace.new_filter_type = I40E_AQC_ADD_L1_FILTER_0X11;
 	filter_replace.tr_bit = 0;
 
 	/* Prepare the buffer, 3 entries */
@@ -7119,12 +7140,12 @@ i40e_status_code i40e_replace_mpls_cloud_filter(struct i40e_pf *pf)
 		I40E_AQC_MIRROR_CLOUD_FILTER;
 	filter_replace.old_filter_type = I40E_AQC_ADD_CLOUD_FILTER_IIP;
 	filter_replace.new_filter_type =
-		I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoUDP;
+		I40E_AQC_ADD_CLOUD_FILTER_0X11;
 	/* Prepare the buffer, 2 entries */
 	filter_replace_buf.data[0] = I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG;
 	filter_replace_buf.data[0] |=
 		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
-	filter_replace_buf.data[4] = I40E_AQC_ADD_L1_FILTER_TEID_MPLS;
+	filter_replace_buf.data[4] = I40E_AQC_ADD_L1_FILTER_0X11;
 	filter_replace_buf.data[4] |=
 		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
 	status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
@@ -7142,12 +7163,131 @@ i40e_status_code i40e_replace_mpls_cloud_filter(struct i40e_pf *pf)
 		I40E_AQC_MIRROR_CLOUD_FILTER;
 	filter_replace.old_filter_type = I40E_AQC_ADD_CLOUD_FILTER_IMAC;
 	filter_replace.new_filter_type =
-		I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoGRE;
+		I40E_AQC_ADD_CLOUD_FILTER_0X12;
 	/* Prepare the buffer, 2 entries */
 	filter_replace_buf.data[0] = I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG;
 	filter_replace_buf.data[0] |=
 		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
-	filter_replace_buf.data[4] = I40E_AQC_ADD_L1_FILTER_TEID_MPLS;
+	filter_replace_buf.data[4] = I40E_AQC_ADD_L1_FILTER_0X11;
+	filter_replace_buf.data[4] |=
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+
+	status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
+					       &filter_replace_buf);
+	return status;
+}
+
+static enum i40e_status_code
+i40e_replace_gtp_l1_filter(struct i40e_pf *pf)
+{
+	struct i40e_aqc_replace_cloud_filters_cmd  filter_replace;
+	struct i40e_aqc_replace_cloud_filters_cmd_buf  filter_replace_buf;
+	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+	enum i40e_status_code status = I40E_SUCCESS;
+
+	/* For GTP-C */
+	memset(&filter_replace, 0,
+	       sizeof(struct i40e_aqc_replace_cloud_filters_cmd));
+	memset(&filter_replace_buf, 0,
+	       sizeof(struct i40e_aqc_replace_cloud_filters_cmd_buf));
+	/* create L1 filter */
+	filter_replace.old_filter_type =
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC;
+	filter_replace.new_filter_type = I40E_AQC_ADD_L1_FILTER_0X12;
+	filter_replace.tr_bit = I40E_AQC_NEW_TR_22 |
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+	/* Prepare the buffer, 2 entries */
+	filter_replace_buf.data[0] =
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TEID_WORD0;
+	filter_replace_buf.data[0] |=
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+	filter_replace_buf.data[2] = 0xFF;
+	filter_replace_buf.data[3] = 0xFF;
+	filter_replace_buf.data[4] =
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TEID_WORD1;
+	filter_replace_buf.data[4] |=
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+	filter_replace_buf.data[6] = 0xFF;
+	filter_replace_buf.data[7] = 0xFF;
+	status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
+					       &filter_replace_buf);
+	if (status < 0)
+		return status;
+
+	/* for GTP-U */
+	memset(&filter_replace, 0,
+	       sizeof(struct i40e_aqc_replace_cloud_filters_cmd));
+	memset(&filter_replace_buf, 0,
+	       sizeof(struct i40e_aqc_replace_cloud_filters_cmd_buf));
+	/* create L1 filter */
+	filter_replace.old_filter_type =
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TUNNLE_KEY;
+	filter_replace.new_filter_type = I40E_AQC_ADD_L1_FILTER_0X13;
+	filter_replace.tr_bit = I40E_AQC_NEW_TR_21 |
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+	/* Prepare the buffer, 2 entries */
+	filter_replace_buf.data[0] =
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TEID_WORD0;
+	filter_replace_buf.data[0] |=
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+	filter_replace_buf.data[2] = 0xFF;
+	filter_replace_buf.data[3] = 0xFF;
+	filter_replace_buf.data[4] =
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TEID_WORD1;
+	filter_replace_buf.data[4] |=
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+	filter_replace_buf.data[6] = 0xFF;
+	filter_replace_buf.data[7] = 0xFF;
+
+	status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
+					       &filter_replace_buf);
+	return status;
+}
+
+static enum
+i40e_status_code i40e_replace_gtp_cloud_filter(struct i40e_pf *pf)
+{
+	struct i40e_aqc_replace_cloud_filters_cmd  filter_replace;
+	struct i40e_aqc_replace_cloud_filters_cmd_buf  filter_replace_buf;
+	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+	enum i40e_status_code status = I40E_SUCCESS;
+
+	/* for GTP-C */
+	memset(&filter_replace, 0,
+	       sizeof(struct i40e_aqc_replace_cloud_filters_cmd));
+	memset(&filter_replace_buf, 0,
+	       sizeof(struct i40e_aqc_replace_cloud_filters_cmd_buf));
+	filter_replace.valid_flags = I40E_AQC_REPLACE_CLOUD_FILTER;
+	filter_replace.old_filter_type = I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN;
+	filter_replace.new_filter_type =
+		I40E_AQC_ADD_CLOUD_FILTER_0X11;
+	/* Prepare the buffer, 2 entries */
+	filter_replace_buf.data[0] = I40E_AQC_ADD_L1_FILTER_0X12;
+	filter_replace_buf.data[0] |=
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+	filter_replace_buf.data[4] = I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG;
+	filter_replace_buf.data[4] |=
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+	status = i40e_aq_replace_cloud_filters(hw, &filter_replace,
+					       &filter_replace_buf);
+	if (status < 0)
+		return status;
+
+	/* for GTP-U */
+	memset(&filter_replace, 0,
+	       sizeof(struct i40e_aqc_replace_cloud_filters_cmd));
+	memset(&filter_replace_buf, 0,
+	       sizeof(struct i40e_aqc_replace_cloud_filters_cmd_buf));
+	filter_replace.valid_flags = I40E_AQC_REPLACE_CLOUD_FILTER;
+	filter_replace.old_filter_type =
+		I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID;
+	filter_replace.new_filter_type =
+		I40E_AQC_ADD_CLOUD_FILTER_0X12;
+	/* Prepare the buffer, 2 entries */
+	filter_replace_buf.data[0] = I40E_AQC_ADD_L1_FILTER_0X13;
+	filter_replace_buf.data[0] |=
+		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
+	filter_replace_buf.data[4] = I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG;
 	filter_replace_buf.data[4] |=
 		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
 
@@ -7238,7 +7378,7 @@ i40e_dev_consistent_tunnel_filter_set(struct i40e_pf *pf,
 		pfilter->general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD2] =
 			0x40;
 		big_buffer = 1;
-		tun_type = I40E_AQC_ADD_CLOUD_TNL_TYPE_MPLSoUDP;
+		tun_type = I40E_AQC_ADD_CLOUD_TNL_TYPE_MPLSOUDP;
 		break;
 	case I40E_TUNNEL_TYPE_MPLSoGRE:
 		if (!pf->mpls_replace_flag) {
@@ -7254,7 +7394,37 @@ i40e_dev_consistent_tunnel_filter_set(struct i40e_pf *pf,
 		pfilter->general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD2] =
 			0x0;
 		big_buffer = 1;
-		tun_type = I40E_AQC_ADD_CLOUD_TNL_TYPE_MPLSoGRE;
+		tun_type = I40E_AQC_ADD_CLOUD_TNL_TYPE_MPLSOGRE;
+		break;
+	case I40E_TUNNEL_TYPE_GTPC:
+		if (!pf->gtp_replace_flag) {
+			i40e_replace_gtp_l1_filter(pf);
+			i40e_replace_gtp_cloud_filter(pf);
+			pf->gtp_replace_flag = 1;
+		}
+		teid_le = rte_cpu_to_le_32(tunnel_filter->tenant_id);
+		pfilter->general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD0] =
+			(teid_le >> 16) & 0xFFFF;
+		pfilter->general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD1] =
+			teid_le & 0xFFFF;
+		pfilter->general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD2] =
+			0x0;
+		big_buffer = 1;
+		break;
+	case I40E_TUNNEL_TYPE_GTPU:
+		if (!pf->gtp_replace_flag) {
+			i40e_replace_gtp_l1_filter(pf);
+			i40e_replace_gtp_cloud_filter(pf);
+			pf->gtp_replace_flag = 1;
+		}
+		teid_le = rte_cpu_to_le_32(tunnel_filter->tenant_id);
+		pfilter->general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD0] =
+			(teid_le >> 16) & 0xFFFF;
+		pfilter->general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD1] =
+			teid_le & 0xFFFF;
+		pfilter->general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD2] =
+			0x0;
+		big_buffer = 1;
 		break;
 	case I40E_TUNNEL_TYPE_QINQ:
 		if (!pf->qinq_replace_flag) {
@@ -7282,13 +7452,19 @@ i40e_dev_consistent_tunnel_filter_set(struct i40e_pf *pf,
 
 	if (tunnel_filter->tunnel_type == I40E_TUNNEL_TYPE_MPLSoUDP)
 		pfilter->element.flags =
-			I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoUDP;
+			I40E_AQC_ADD_CLOUD_FILTER_0X11;
 	else if (tunnel_filter->tunnel_type == I40E_TUNNEL_TYPE_MPLSoGRE)
 		pfilter->element.flags =
-			I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoGRE;
+			I40E_AQC_ADD_CLOUD_FILTER_0X12;
+	else if (tunnel_filter->tunnel_type == I40E_TUNNEL_TYPE_GTPC)
+		pfilter->element.flags =
+			I40E_AQC_ADD_CLOUD_FILTER_0X11;
+	else if (tunnel_filter->tunnel_type == I40E_TUNNEL_TYPE_GTPU)
+		pfilter->element.flags =
+			I40E_AQC_ADD_CLOUD_FILTER_0X12;
 	else if (tunnel_filter->tunnel_type == I40E_TUNNEL_TYPE_QINQ)
 		pfilter->element.flags |=
-			I40E_AQC_ADD_CLOUD_FILTER_CUSTOM_QINQ;
+			I40E_AQC_ADD_CLOUD_FILTER_0X10;
 	else {
 		val = i40e_dev_get_filter_type(tunnel_filter->filter_type,
 						&pfilter->element.flags);
@@ -7573,7 +7749,7 @@ i40e_pf_config_rss(struct i40e_pf *pf)
 	}
 
 	rss_conf = pf->dev_data->dev_conf.rx_adv_conf.rss_conf;
-	if ((rss_conf.rss_hf & I40E_RSS_OFFLOAD_ALL) == 0) {
+	if ((rss_conf.rss_hf & pf->adapter->flow_types_mask) == 0) {
 		i40e_pf_disable_rss(pf);
 		return 0;
 	}
@@ -7794,9 +7970,9 @@ static int
 i40e_get_hash_filter_global_config(struct i40e_hw *hw,
 				   struct rte_eth_hash_global_conf *g_cfg)
 {
-	uint32_t reg, mask = I40E_FLOW_TYPES;
-	uint16_t i;
-	enum i40e_filter_pctype pctype;
+	struct i40e_adapter *adapter = (struct i40e_adapter *)hw->back;
+	uint32_t reg;
+	uint16_t i, j;
 
 	memset(g_cfg, 0, sizeof(*g_cfg));
 	reg = i40e_read_rx_ctl(hw, I40E_GLQF_CTL);
@@ -7807,29 +7983,38 @@ i40e_get_hash_filter_global_config(struct i40e_hw *hw,
 	PMD_DRV_LOG(DEBUG, "Hash function is %s",
 		(reg & I40E_GLQF_CTL_HTOEP_MASK) ? "Toeplitz" : "Simple XOR");
 
-	for (i = 0; mask && i < RTE_ETH_FLOW_MAX; i++) {
-		if (!(mask & (1UL << i)))
-			continue;
-		mask &= ~(1UL << i);
-		/* Bit set indicats the coresponding flow type is supported */
-		g_cfg->valid_bit_mask[0] |= (1UL << i);
-		/* if flowtype is invalid, continue */
-		if (!I40E_VALID_FLOW(i))
+	/*
+	 * We work only with lowest 32 bits which is not correct, but to work
+	 * properly the valid_bit_mask size should be increased up to 64 bits
+	 * and this will brake ABI. This modification will be done in next
+	 * release
+	 */
+	g_cfg->valid_bit_mask[0] = (uint32_t)adapter->flow_types_mask;
+
+	for (i = RTE_ETH_FLOW_UNKNOWN + 1; i < UINT32_BIT; i++) {
+		if (!adapter->pctypes_tbl[i])
 			continue;
-		pctype = i40e_flowtype_to_pctype(i);
-		reg = i40e_read_rx_ctl(hw, I40E_GLQF_HSYM(pctype));
-		if (reg & I40E_GLQF_HSYM_SYMH_ENA_MASK)
-			g_cfg->sym_hash_enable_mask[0] |= (1UL << i);
+		for (j = I40E_FILTER_PCTYPE_INVALID + 1;
+		     j < I40E_FILTER_PCTYPE_MAX; j++) {
+			if (adapter->pctypes_tbl[i] & (1ULL << j)) {
+				reg = i40e_read_rx_ctl(hw, I40E_GLQF_HSYM(j));
+				if (reg & I40E_GLQF_HSYM_SYMH_ENA_MASK) {
+					g_cfg->sym_hash_enable_mask[0] |=
+								(1UL << i);
+				}
+			}
+		}
 	}
 
 	return 0;
 }
 
 static int
-i40e_hash_global_config_check(struct rte_eth_hash_global_conf *g_cfg)
+i40e_hash_global_config_check(const struct i40e_adapter *adapter,
+			      const struct rte_eth_hash_global_conf *g_cfg)
 {
 	uint32_t i;
-	uint32_t mask0, i40e_mask = I40E_FLOW_TYPES;
+	uint32_t mask0, i40e_mask = adapter->flow_types_mask;
 
 	if (g_cfg->hash_func != RTE_ETH_HASH_FUNCTION_TOEPLITZ &&
 		g_cfg->hash_func != RTE_ETH_HASH_FUNCTION_SIMPLE_XOR &&
@@ -7872,64 +8057,36 @@ static int
 i40e_set_hash_filter_global_config(struct i40e_hw *hw,
 				   struct rte_eth_hash_global_conf *g_cfg)
 {
+	struct i40e_adapter *adapter = (struct i40e_adapter *)hw->back;
 	int ret;
-	uint16_t i;
+	uint16_t i, j;
 	uint32_t reg;
-	uint32_t mask0 = g_cfg->valid_bit_mask[0];
-	enum i40e_filter_pctype pctype;
+	/*
+	 * We work only with lowest 32 bits which is not correct, but to work
+	 * properly the valid_bit_mask size should be increased up to 64 bits
+	 * and this will brake ABI. This modification will be done in next
+	 * release
+	 */
+	uint32_t mask0 = g_cfg->valid_bit_mask[0] &
+					(uint32_t)adapter->flow_types_mask;
 
 	/* Check the input parameters */
-	ret = i40e_hash_global_config_check(g_cfg);
+	ret = i40e_hash_global_config_check(adapter, g_cfg);
 	if (ret < 0)
 		return ret;
 
-	for (i = 0; mask0 && i < UINT32_BIT; i++) {
-		if (!(mask0 & (1UL << i)))
-			continue;
-		mask0 &= ~(1UL << i);
-		/* if flowtype is invalid, continue */
-		if (!I40E_VALID_FLOW(i))
-			continue;
-		pctype = i40e_flowtype_to_pctype(i);
-		reg = (g_cfg->sym_hash_enable_mask[0] & (1UL << i)) ?
-				I40E_GLQF_HSYM_SYMH_ENA_MASK : 0;
-		if (hw->mac.type == I40E_MAC_X722) {
-			if (pctype == I40E_FILTER_PCTYPE_NONF_IPV4_UDP) {
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
-				  I40E_FILTER_PCTYPE_NONF_IPV4_UDP), reg);
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
-				  I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP),
-				  reg);
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
-				  I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP),
-				  reg);
-			} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV4_TCP) {
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
-				  I40E_FILTER_PCTYPE_NONF_IPV4_TCP), reg);
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
-				  I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK),
-				  reg);
-			} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV6_UDP) {
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
-				  I40E_FILTER_PCTYPE_NONF_IPV6_UDP), reg);
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
-				  I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP),
-				  reg);
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
-				  I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP),
-				  reg);
-			} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV6_TCP) {
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
-				  I40E_FILTER_PCTYPE_NONF_IPV6_TCP), reg);
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(
-				  I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK),
-				  reg);
-			} else {
-				i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(pctype),
-				  reg);
+	for (i = RTE_ETH_FLOW_UNKNOWN + 1; mask0 && i < UINT32_BIT; i++) {
+		if (mask0 & (1UL << i)) {
+			reg = (g_cfg->sym_hash_enable_mask[0] & (1UL << i)) ?
+					I40E_GLQF_HSYM_SYMH_ENA_MASK : 0;
+
+			for (j = I40E_FILTER_PCTYPE_INVALID + 1;
+			     j < I40E_FILTER_PCTYPE_MAX; j++) {
+				if (adapter->pctypes_tbl[i] & (1ULL << j))
+					i40e_write_rx_ctl(hw,
+							  I40E_GLQF_HSYM(j),
+							  reg);
 			}
-		} else {
-			i40e_write_rx_ctl(hw, I40E_GLQF_HSYM(pctype), reg);
 		}
 	}
 
@@ -8551,16 +8708,14 @@ i40e_filter_input_set_init(struct i40e_pf *pf)
 	uint64_t input_set, inset_reg;
 	uint32_t mask_reg[I40E_INSET_MASK_NUM_REG] = {0};
 	int num, i;
+	uint16_t flow_type;
 
 	for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
 	     pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) {
-		if (hw->mac.type == I40E_MAC_X722) {
-			if (!I40E_VALID_PCTYPE_X722(pctype))
-				continue;
-		} else {
-			if (!I40E_VALID_PCTYPE(pctype))
-				continue;
-		}
+		flow_type = i40e_pctype_to_flowtype(pf->adapter, pctype);
+
+		if (flow_type == RTE_ETH_FLOW_UNKNOWN)
+			continue;
 
 		input_set = i40e_get_default_input_set(pctype);
 
@@ -8623,7 +8778,8 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw,
 		return -EINVAL;
 	}
 
-	if (!I40E_VALID_FLOW(conf->flow_type)) {
+	pctype = i40e_flowtype_to_pctype(pf->adapter, conf->flow_type);
+	if (pctype == I40E_FILTER_PCTYPE_INVALID) {
 		PMD_DRV_LOG(ERR, "invalid flow_type input.");
 		return -EINVAL;
 	}
@@ -8631,10 +8787,8 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw,
 	if (hw->mac.type == I40E_MAC_X722) {
 		/* get translated pctype value in fd pctype register */
 		pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl(hw,
-			I40E_GLQF_FD_PCTYPES((int)i40e_flowtype_to_pctype(
-			conf->flow_type)));
-	} else
-		pctype = i40e_flowtype_to_pctype(conf->flow_type);
+			I40E_GLQF_FD_PCTYPES((int)pctype));
+	}
 
 	ret = i40e_parse_input_set(&input_set, pctype, conf->field,
 				   conf->inset_size);
@@ -8642,11 +8796,7 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw,
 		PMD_DRV_LOG(ERR, "Failed to parse input set");
 		return -EINVAL;
 	}
-	if (i40e_validate_input_set(pctype, RTE_ETH_FILTER_HASH,
-				    input_set) != 0) {
-		PMD_DRV_LOG(ERR, "Invalid input set");
-		return -EINVAL;
-	}
+
 	if (conf->op == RTE_ETH_INPUT_SET_ADD) {
 		/* get inset value in register */
 		inset_reg = i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, pctype));
@@ -8700,24 +8850,19 @@ i40e_fdir_filter_inset_select(struct i40e_pf *pf,
 		return -EINVAL;
 	}
 
-	if (!I40E_VALID_FLOW(conf->flow_type)) {
+	pctype = i40e_flowtype_to_pctype(pf->adapter, conf->flow_type);
+
+	if (pctype == I40E_FILTER_PCTYPE_INVALID) {
 		PMD_DRV_LOG(ERR, "invalid flow_type input.");
 		return -EINVAL;
 	}
 
-	pctype = i40e_flowtype_to_pctype(conf->flow_type);
-
 	ret = i40e_parse_input_set(&input_set, pctype, conf->field,
 				   conf->inset_size);
 	if (ret) {
 		PMD_DRV_LOG(ERR, "Failed to parse input set");
 		return -EINVAL;
 	}
-	if (i40e_validate_input_set(pctype, RTE_ETH_FILTER_FDIR,
-				    input_set) != 0) {
-		PMD_DRV_LOG(ERR, "Invalid input set");
-		return -EINVAL;
-	}
 
 	/* get inset value in register */
 	inset_reg = i40e_read_rx_ctl(hw, I40E_PRTQF_FD_INSET(pctype, 1));
@@ -9156,72 +9301,42 @@ i40e_hw_init(struct rte_eth_dev *dev)
 	i40e_set_symmetric_hash_enable_per_port(hw, 0);
 }
 
+/*
+ * For X722 it is possible to have multiple pctypes mapped to the same flowtype
+ * however this function will return only one highest pctype index,
+ * which is not quite correct. This is known problem of i40e driver
+ * and needs to be fixed later.
+ */
 enum i40e_filter_pctype
-i40e_flowtype_to_pctype(uint16_t flow_type)
-{
-	static const enum i40e_filter_pctype pctype_table[] = {
-		[RTE_ETH_FLOW_FRAG_IPV4] = I40E_FILTER_PCTYPE_FRAG_IPV4,
-		[RTE_ETH_FLOW_NONFRAG_IPV4_UDP] =
-			I40E_FILTER_PCTYPE_NONF_IPV4_UDP,
-		[RTE_ETH_FLOW_NONFRAG_IPV4_TCP] =
-			I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
-		[RTE_ETH_FLOW_NONFRAG_IPV4_SCTP] =
-			I40E_FILTER_PCTYPE_NONF_IPV4_SCTP,
-		[RTE_ETH_FLOW_NONFRAG_IPV4_OTHER] =
-			I40E_FILTER_PCTYPE_NONF_IPV4_OTHER,
-		[RTE_ETH_FLOW_FRAG_IPV6] = I40E_FILTER_PCTYPE_FRAG_IPV6,
-		[RTE_ETH_FLOW_NONFRAG_IPV6_UDP] =
-			I40E_FILTER_PCTYPE_NONF_IPV6_UDP,
-		[RTE_ETH_FLOW_NONFRAG_IPV6_TCP] =
-			I40E_FILTER_PCTYPE_NONF_IPV6_TCP,
-		[RTE_ETH_FLOW_NONFRAG_IPV6_SCTP] =
-			I40E_FILTER_PCTYPE_NONF_IPV6_SCTP,
-		[RTE_ETH_FLOW_NONFRAG_IPV6_OTHER] =
-			I40E_FILTER_PCTYPE_NONF_IPV6_OTHER,
-		[RTE_ETH_FLOW_L2_PAYLOAD] = I40E_FILTER_PCTYPE_L2_PAYLOAD,
-	};
+i40e_flowtype_to_pctype(const struct i40e_adapter *adapter, uint16_t flow_type)
+{
+	int i;
+	uint64_t pctype_mask;
 
-	return pctype_table[flow_type];
+	if (flow_type < I40E_FLOW_TYPE_MAX) {
+		pctype_mask = adapter->pctypes_tbl[flow_type];
+		for (i = I40E_FILTER_PCTYPE_MAX - 1; i > 0; i--) {
+			if (pctype_mask & (1ULL << i))
+				return (enum i40e_filter_pctype)i;
+		}
+	}
+	return I40E_FILTER_PCTYPE_INVALID;
 }
 
 uint16_t
-i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype)
+i40e_pctype_to_flowtype(const struct i40e_adapter *adapter,
+			enum i40e_filter_pctype pctype)
 {
-	static const uint16_t flowtype_table[] = {
-		[I40E_FILTER_PCTYPE_FRAG_IPV4] = RTE_ETH_FLOW_FRAG_IPV4,
-		[I40E_FILTER_PCTYPE_NONF_IPV4_UDP] =
-			RTE_ETH_FLOW_NONFRAG_IPV4_UDP,
-		[I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] =
-			RTE_ETH_FLOW_NONFRAG_IPV4_UDP,
-		[I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] =
-			RTE_ETH_FLOW_NONFRAG_IPV4_UDP,
-		[I40E_FILTER_PCTYPE_NONF_IPV4_TCP] =
-			RTE_ETH_FLOW_NONFRAG_IPV4_TCP,
-		[I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] =
-			RTE_ETH_FLOW_NONFRAG_IPV4_TCP,
-		[I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] =
-			RTE_ETH_FLOW_NONFRAG_IPV4_SCTP,
-		[I40E_FILTER_PCTYPE_NONF_IPV4_OTHER] =
-			RTE_ETH_FLOW_NONFRAG_IPV4_OTHER,
-		[I40E_FILTER_PCTYPE_FRAG_IPV6] = RTE_ETH_FLOW_FRAG_IPV6,
-		[I40E_FILTER_PCTYPE_NONF_IPV6_UDP] =
-			RTE_ETH_FLOW_NONFRAG_IPV6_UDP,
-		[I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] =
-			RTE_ETH_FLOW_NONFRAG_IPV6_UDP,
-		[I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] =
-			RTE_ETH_FLOW_NONFRAG_IPV6_UDP,
-		[I40E_FILTER_PCTYPE_NONF_IPV6_TCP] =
-			RTE_ETH_FLOW_NONFRAG_IPV6_TCP,
-		[I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] =
-			RTE_ETH_FLOW_NONFRAG_IPV6_TCP,
-		[I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] =
-			RTE_ETH_FLOW_NONFRAG_IPV6_SCTP,
-		[I40E_FILTER_PCTYPE_NONF_IPV6_OTHER] =
-			RTE_ETH_FLOW_NONFRAG_IPV6_OTHER,
-		[I40E_FILTER_PCTYPE_L2_PAYLOAD] = RTE_ETH_FLOW_L2_PAYLOAD,
-	};
+	uint16_t flowtype;
+	uint64_t pctype_mask = 1ULL << pctype;
+
+	for (flowtype = RTE_ETH_FLOW_UNKNOWN + 1; flowtype < I40E_FLOW_TYPE_MAX;
+	     flowtype++) {
+		if (adapter->pctypes_tbl[flowtype] & pctype_mask)
+			return flowtype;
+	}
 
-	return flowtype_table[pctype];
+	return RTE_ETH_FLOW_UNKNOWN;
 }
 
 /*
@@ -9238,7 +9353,7 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype)
 
 /* For both X710 and XL710 */
 #define I40E_GL_SWR_PRI_JOIN_MAP_0_VALUE_1	0x10000200
-#define I40E_GL_SWR_PRI_JOIN_MAP_0_VALUE_2	0x20000200
+#define I40E_GL_SWR_PRI_JOIN_MAP_0_VALUE_2	0x203F0200
 #define I40E_GL_SWR_PRI_JOIN_MAP_0		0x26CE00
 
 #define I40E_GL_SWR_PRI_JOIN_MAP_2_VALUE 0x011f0200
@@ -10248,9 +10363,9 @@ i40e_vsi_config_tc(struct i40e_vsi *vsi, uint8_t tc_map)
 		goto out;
 	}
 	/* update the local VSI info with updated queue map */
-	(void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,
+	rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,
 					sizeof(vsi->info.tc_mapping));
-	(void)rte_memcpy(&vsi->info.queue_mapping,
+	rte_memcpy(&vsi->info.queue_mapping,
 			&ctxt.info.queue_mapping,
 		sizeof(vsi->info.queue_mapping));
 	vsi->info.mapping_flags = ctxt.info.mapping_flags;
@@ -10377,7 +10492,7 @@ i40e_dcb_hw_configure(struct i40e_pf *pf,
  *
  * Returns 0 on success, negative value on failure
  */
-static int
+int
 i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb)
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
@@ -10809,14 +10924,14 @@ i40e_tunnel_filter_restore(struct i40e_pf *pf)
 			   sizeof(f->input.general_fields));
 
 		if (((f->input.flags &
-		     I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoUDP) ==
-		     I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoUDP) ||
+		     I40E_AQC_ADD_CLOUD_FILTER_0X11) ==
+		     I40E_AQC_ADD_CLOUD_FILTER_0X11) ||
 		    ((f->input.flags &
-		     I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoGRE) ==
-		     I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoGRE) ||
+		     I40E_AQC_ADD_CLOUD_FILTER_0X12) ==
+		     I40E_AQC_ADD_CLOUD_FILTER_0X12) ||
 		    ((f->input.flags &
-		     I40E_AQC_ADD_CLOUD_FILTER_CUSTOM_QINQ) ==
-		     I40E_AQC_ADD_CLOUD_FILTER_CUSTOM_QINQ))
+		     I40E_AQC_ADD_CLOUD_FILTER_0X10) ==
+		     I40E_AQC_ADD_CLOUD_FILTER_0X10))
 			big_buffer = 1;
 
 		if (big_buffer)
@@ -10851,6 +10966,301 @@ is_i40e_supported(struct rte_eth_dev *dev)
 	return is_device_supported(dev, &rte_i40e_pmd);
 }
 
+struct i40e_customized_pctype*
+i40e_find_customized_pctype(struct i40e_pf *pf, uint8_t index)
+{
+	int i;
+
+	for (i = 0; i < I40E_CUSTOMIZED_MAX; i++) {
+		if (pf->customized_pctype[i].index == index)
+			return &pf->customized_pctype[i];
+	}
+	return NULL;
+}
+
+static int
+i40e_update_customized_pctype(struct rte_eth_dev *dev, uint8_t *pkg,
+			      uint32_t pkg_size, uint32_t proto_num,
+			      struct rte_pmd_i40e_proto_info *proto)
+{
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	uint32_t pctype_num;
+	struct rte_pmd_i40e_ptype_info *pctype;
+	uint32_t buff_size;
+	struct i40e_customized_pctype *new_pctype = NULL;
+	uint8_t proto_id;
+	uint8_t pctype_value;
+	char name[64];
+	uint32_t i, j, n;
+	int ret;
+
+	ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
+				(uint8_t *)&pctype_num, sizeof(pctype_num),
+				RTE_PMD_I40E_PKG_INFO_PCTYPE_NUM);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "Failed to get pctype number");
+		return -1;
+	}
+	if (!pctype_num) {
+		PMD_DRV_LOG(INFO, "No new pctype added");
+		return -1;
+	}
+
+	buff_size = pctype_num * sizeof(struct rte_pmd_i40e_proto_info);
+	pctype = rte_zmalloc("new_pctype", buff_size, 0);
+	if (!pctype) {
+		PMD_DRV_LOG(ERR, "Failed to allocate memory");
+		return -1;
+	}
+	/* get information about new pctype list */
+	ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
+					(uint8_t *)pctype, buff_size,
+					RTE_PMD_I40E_PKG_INFO_PCTYPE_LIST);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "Failed to get pctype list");
+		rte_free(pctype);
+		return -1;
+	}
+
+	/* Update customized pctype. */
+	for (i = 0; i < pctype_num; i++) {
+		pctype_value = pctype[i].ptype_id;
+		memset(name, 0, sizeof(name));
+		for (j = 0; j < RTE_PMD_I40E_PROTO_NUM; j++) {
+			proto_id = pctype[i].protocols[j];
+			if (proto_id == RTE_PMD_I40E_PROTO_UNUSED)
+				continue;
+			for (n = 0; n < proto_num; n++) {
+				if (proto[n].proto_id != proto_id)
+					continue;
+				strcat(name, proto[n].name);
+				strcat(name, "_");
+				break;
+			}
+		}
+		name[strlen(name) - 1] = '\0';
+		if (!strcmp(name, "GTPC"))
+			new_pctype =
+				i40e_find_customized_pctype(pf,
+						      I40E_CUSTOMIZED_GTPC);
+		else if (!strcmp(name, "GTPU_IPV4"))
+			new_pctype =
+				i40e_find_customized_pctype(pf,
+						   I40E_CUSTOMIZED_GTPU_IPV4);
+		else if (!strcmp(name, "GTPU_IPV6"))
+			new_pctype =
+				i40e_find_customized_pctype(pf,
+						   I40E_CUSTOMIZED_GTPU_IPV6);
+		else if (!strcmp(name, "GTPU"))
+			new_pctype =
+				i40e_find_customized_pctype(pf,
+						      I40E_CUSTOMIZED_GTPU);
+		if (new_pctype) {
+			new_pctype->pctype = pctype_value;
+			new_pctype->valid = true;
+		}
+	}
+
+	rte_free(pctype);
+	return 0;
+}
+
+static int
+i40e_update_customized_ptype(struct rte_eth_dev *dev, uint8_t *pkg,
+			       uint32_t pkg_size, uint32_t proto_num,
+			       struct rte_pmd_i40e_proto_info *proto)
+{
+	struct rte_pmd_i40e_ptype_mapping *ptype_mapping;
+	uint16_t port_id = dev->data->port_id;
+	uint32_t ptype_num;
+	struct rte_pmd_i40e_ptype_info *ptype;
+	uint32_t buff_size;
+	uint8_t proto_id;
+	char name[RTE_PMD_I40E_DDP_NAME_SIZE];
+	uint32_t i, j, n;
+	bool inner_ip;
+	int ret;
+
+	/* get information about new ptype num */
+	ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
+				(uint8_t *)&ptype_num, sizeof(ptype_num),
+				RTE_PMD_I40E_PKG_INFO_PTYPE_NUM);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "Failed to get ptype number");
+		return ret;
+	}
+	if (!ptype_num) {
+		PMD_DRV_LOG(INFO, "No new ptype added");
+		return -1;
+	}
+
+	buff_size = ptype_num * sizeof(struct rte_pmd_i40e_ptype_info);
+	ptype = rte_zmalloc("new_ptype", buff_size, 0);
+	if (!ptype) {
+		PMD_DRV_LOG(ERR, "Failed to allocate memory");
+		return -1;
+	}
+
+	/* get information about new ptype list */
+	ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
+					(uint8_t *)ptype, buff_size,
+					RTE_PMD_I40E_PKG_INFO_PTYPE_LIST);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "Failed to get ptype list");
+		rte_free(ptype);
+		return ret;
+	}
+
+	buff_size = ptype_num * sizeof(struct rte_pmd_i40e_ptype_mapping);
+	ptype_mapping = rte_zmalloc("ptype_mapping", buff_size, 0);
+	if (!ptype_mapping) {
+		PMD_DRV_LOG(ERR, "Failed to allocate memory");
+		rte_free(ptype);
+		return -1;
+	}
+
+	/* Update ptype mapping table. */
+	for (i = 0; i < ptype_num; i++) {
+		ptype_mapping[i].hw_ptype = ptype[i].ptype_id;
+		ptype_mapping[i].sw_ptype = 0;
+		inner_ip = false;
+		for (j = 0; j < RTE_PMD_I40E_PROTO_NUM; j++) {
+			proto_id = ptype[i].protocols[j];
+			if (proto_id == RTE_PMD_I40E_PROTO_UNUSED)
+				continue;
+			for (n = 0; n < proto_num; n++) {
+				if (proto[n].proto_id != proto_id)
+					continue;
+				memset(name, 0, sizeof(name));
+				strcpy(name, proto[n].name);
+				if (!strncmp(name, "IPV4", 4) && !inner_ip) {
+					ptype_mapping[i].sw_ptype |=
+						RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+					inner_ip = true;
+				} else if (!strncmp(name, "IPV4FRAG", 8) &&
+					   inner_ip) {
+					ptype_mapping[i].sw_ptype |=
+					    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
+					ptype_mapping[i].sw_ptype |=
+						RTE_PTYPE_INNER_L4_FRAG;
+				} else if (!strncmp(name, "IPV4", 4) &&
+					   inner_ip)
+					ptype_mapping[i].sw_ptype |=
+					    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
+				else if (!strncmp(name, "IPV6", 4) &&
+					 !inner_ip) {
+					ptype_mapping[i].sw_ptype |=
+						RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+					inner_ip = true;
+				} else if (!strncmp(name, "IPV6FRAG", 8) &&
+					   inner_ip) {
+					ptype_mapping[i].sw_ptype |=
+					    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
+					ptype_mapping[i].sw_ptype |=
+						RTE_PTYPE_INNER_L4_FRAG;
+				} else if (!strncmp(name, "IPV6", 4) &&
+					   inner_ip)
+					ptype_mapping[i].sw_ptype |=
+					    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
+				else if (!strncmp(name, "GTPC", 4))
+					ptype_mapping[i].sw_ptype |=
+						RTE_PTYPE_TUNNEL_GTPC;
+				else if (!strncmp(name, "GTPU", 4))
+					ptype_mapping[i].sw_ptype |=
+						RTE_PTYPE_TUNNEL_GTPU;
+				else if (!strncmp(name, "UDP", 3))
+					ptype_mapping[i].sw_ptype |=
+						RTE_PTYPE_INNER_L4_UDP;
+				else if (!strncmp(name, "TCP", 3))
+					ptype_mapping[i].sw_ptype |=
+						RTE_PTYPE_INNER_L4_TCP;
+				else if (!strncmp(name, "SCTP", 4))
+					ptype_mapping[i].sw_ptype |=
+						RTE_PTYPE_INNER_L4_SCTP;
+				else if (!strncmp(name, "ICMP", 4) ||
+					 !strncmp(name, "ICMPV6", 6))
+					ptype_mapping[i].sw_ptype |=
+						RTE_PTYPE_INNER_L4_ICMP;
+
+				break;
+			}
+		}
+	}
+
+	ret = rte_pmd_i40e_ptype_mapping_update(port_id, ptype_mapping,
+						ptype_num, 0);
+	if (ret)
+		PMD_DRV_LOG(ERR, "Failed to update mapping table.");
+
+	rte_free(ptype_mapping);
+	rte_free(ptype);
+	return ret;
+}
+
+void
+i40e_update_customized_info(struct rte_eth_dev *dev, uint8_t *pkg,
+			      uint32_t pkg_size)
+{
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	uint32_t proto_num;
+	struct rte_pmd_i40e_proto_info *proto;
+	uint32_t buff_size;
+	uint32_t i;
+	int ret;
+
+	/* get information about protocol number */
+	ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
+				       (uint8_t *)&proto_num, sizeof(proto_num),
+				       RTE_PMD_I40E_PKG_INFO_PROTOCOL_NUM);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "Failed to get protocol number");
+		return;
+	}
+	if (!proto_num) {
+		PMD_DRV_LOG(INFO, "No new protocol added");
+		return;
+	}
+
+	buff_size = proto_num * sizeof(struct rte_pmd_i40e_proto_info);
+	proto = rte_zmalloc("new_proto", buff_size, 0);
+	if (!proto) {
+		PMD_DRV_LOG(ERR, "Failed to allocate memory");
+		return;
+	}
+
+	/* get information about protocol list */
+	ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
+					(uint8_t *)proto, buff_size,
+					RTE_PMD_I40E_PKG_INFO_PROTOCOL_LIST);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "Failed to get protocol list");
+		rte_free(proto);
+		return;
+	}
+
+	/* Check if GTP is supported. */
+	for (i = 0; i < proto_num; i++) {
+		if (!strncmp(proto[i].name, "GTP", 3)) {
+			pf->gtp_support = true;
+			break;
+		}
+	}
+
+	/* Update customized pctype info */
+	ret = i40e_update_customized_pctype(dev, pkg, pkg_size,
+					    proto_num, proto);
+	if (ret)
+		PMD_DRV_LOG(INFO, "No pctype is updated.");
+
+	/* Update customized ptype info */
+	ret = i40e_update_customized_ptype(dev, pkg, pkg_size,
+					   proto_num, proto);
+	if (ret)
+		PMD_DRV_LOG(INFO, "No ptype is updated.");
+
+	rte_free(proto);
+}
+
 /* Create a QinQ cloud filter
  *
  * The Fortville NIC has limited resources for tunnel filters,
@@ -10911,7 +11321,7 @@ i40e_cloud_filter_qinq_create(struct i40e_pf *pf)
 	/* create L1 filter */
 	filter_replace.old_filter_type =
 		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_IVLAN;
-	filter_replace.new_filter_type = I40E_AQC_ADD_CLOUD_FILTER_CUSTOM_QINQ;
+	filter_replace.new_filter_type = I40E_AQC_ADD_CLOUD_FILTER_0X10;
 	filter_replace.tr_bit = 0;
 
 	/* Prepare the buffer, 2 entries */
@@ -10942,13 +11352,13 @@ i40e_cloud_filter_qinq_create(struct i40e_pf *pf)
 	/* create L2 filter, input for L2 filter will be L1 filter  */
 	filter_replace.valid_flags = I40E_AQC_REPLACE_CLOUD_FILTER;
 	filter_replace.old_filter_type = I40E_AQC_ADD_CLOUD_FILTER_OIP;
-	filter_replace.new_filter_type = I40E_AQC_ADD_CLOUD_FILTER_CUSTOM_QINQ;
+	filter_replace.new_filter_type = I40E_AQC_ADD_CLOUD_FILTER_0X10;
 
 	/* Prepare the buffer, 2 entries */
 	filter_replace_buf.data[0] = I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG;
 	filter_replace_buf.data[0] |=
 		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
-	filter_replace_buf.data[4] = I40E_AQC_ADD_CLOUD_FILTER_CUSTOM_QINQ;
+	filter_replace_buf.data[4] = I40E_AQC_ADD_CLOUD_FILTER_0X10;
 	filter_replace_buf.data[4] |=
 		I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED;
 	ret = i40e_aq_replace_cloud_filters(hw, &filter_replace,
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 48abc05a..cd67453d 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -186,9 +186,9 @@ enum i40e_flxpld_layer_idx {
 
 /* Default queue interrupt throttling time in microseconds */
 #define I40E_ITR_INDEX_DEFAULT          0
+#define I40E_ITR_INDEX_NONE             3
 #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
 #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
-
 /* Special FW support this floating VEB feature */
 #define FLOATING_VEB_SUPPORTED_FW_MAJ 5
 #define FLOATING_VEB_SUPPORTED_FW_MIN 0
@@ -260,6 +260,12 @@ enum i40e_flxpld_layer_idx {
 #define I40E_QOS_BW_WEIGHT_MIN 1
 /* The max bandwidth weight is 127. */
 #define I40E_QOS_BW_WEIGHT_MAX 127
+/* The max queue region index is 7. */
+#define I40E_REGION_MAX_INDEX 7
+
+#define I40E_MAX_PERCENT            100
+#define I40E_DEFAULT_DCB_APP_NUM    1
+#define I40E_DEFAULT_DCB_APP_PRIO   3
 
 /**
  * The overhead from MTU to max frame size.
@@ -460,6 +466,119 @@ struct i40e_vmdq_info {
 #define I40E_FLEX_WORD_MASK(off) (0x80 >> (off))
 #define I40E_FDIR_IPv6_TC_OFFSET	20
 
+/* A structure used to define the input for GTP flow */
+struct i40e_gtp_flow {
+	struct rte_eth_udpv4_flow udp; /* IPv4 UDP fields to match. */
+	uint8_t msg_type;              /* Message type. */
+	uint32_t teid;                 /* TEID in big endian. */
+};
+
+/* A structure used to define the input for GTP IPV4 flow */
+struct i40e_gtp_ipv4_flow {
+	struct i40e_gtp_flow gtp;
+	struct rte_eth_ipv4_flow ip4;
+};
+
+/* A structure used to define the input for GTP IPV6 flow */
+struct i40e_gtp_ipv6_flow {
+	struct i40e_gtp_flow gtp;
+	struct rte_eth_ipv6_flow ip6;
+};
+
+/* A structure used to define the input for raw type flow */
+struct i40e_raw_flow {
+	uint16_t pctype;
+	void *packet;
+	uint32_t length;
+};
+
+/*
+ * A union contains the inputs for all types of flow
+ * items in flows need to be in big endian
+ */
+union i40e_fdir_flow {
+	struct rte_eth_l2_flow     l2_flow;
+	struct rte_eth_udpv4_flow  udp4_flow;
+	struct rte_eth_tcpv4_flow  tcp4_flow;
+	struct rte_eth_sctpv4_flow sctp4_flow;
+	struct rte_eth_ipv4_flow   ip4_flow;
+	struct rte_eth_udpv6_flow  udp6_flow;
+	struct rte_eth_tcpv6_flow  tcp6_flow;
+	struct rte_eth_sctpv6_flow sctp6_flow;
+	struct rte_eth_ipv6_flow   ipv6_flow;
+	struct i40e_gtp_flow       gtp_flow;
+	struct i40e_gtp_ipv4_flow  gtp_ipv4_flow;
+	struct i40e_gtp_ipv6_flow  gtp_ipv6_flow;
+	struct i40e_raw_flow       raw_flow;
+};
+
+enum i40e_fdir_ip_type {
+	I40E_FDIR_IPTYPE_IPV4,
+	I40E_FDIR_IPTYPE_IPV6,
+};
+
+/* A structure used to contain extend input of flow */
+struct i40e_fdir_flow_ext {
+	uint16_t vlan_tci;
+	uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN];
+	/* It is filled by the flexible payload to match. */
+	uint8_t is_vf;   /* 1 for VF, 0 for port dev */
+	uint16_t dst_id; /* VF ID, available when is_vf is 1*/
+	bool inner_ip;   /* If there is inner ip */
+	enum i40e_fdir_ip_type iip_type; /* ip type for inner ip */
+	bool customized_pctype; /* If customized pctype is used */
+	bool pkt_template; /* If raw packet template is used */
+};
+
+/* A structure used to define the input for a flow director filter entry */
+struct i40e_fdir_input {
+	enum i40e_filter_pctype pctype;
+	union i40e_fdir_flow flow;
+	/* Flow fields to match, dependent on flow_type */
+	struct i40e_fdir_flow_ext flow_ext;
+	/* Additional fields to match */
+};
+
+/* Behavior will be taken if FDIR match */
+enum i40e_fdir_behavior {
+	I40E_FDIR_ACCEPT = 0,
+	I40E_FDIR_REJECT,
+	I40E_FDIR_PASSTHRU,
+};
+
+/* Flow director report status
+ * It defines what will be reported if FDIR entry is matched.
+ */
+enum i40e_fdir_status {
+	I40E_FDIR_NO_REPORT_STATUS = 0, /* Report nothing. */
+	I40E_FDIR_REPORT_ID,            /* Only report FD ID. */
+	I40E_FDIR_REPORT_ID_FLEX_4,     /* Report FD ID and 4 flex bytes. */
+	I40E_FDIR_REPORT_FLEX_8,        /* Report 8 flex bytes. */
+};
+
+/* A structure used to define an action when match FDIR packet filter. */
+struct i40e_fdir_action {
+	uint16_t rx_queue;        /* Queue assigned to if FDIR match. */
+	enum i40e_fdir_behavior behavior;     /* Behavior will be taken */
+	enum i40e_fdir_status report_status;  /* Status report option */
+	/* If report_status is I40E_FDIR_REPORT_ID_FLEX_4 or
+	 * I40E_FDIR_REPORT_FLEX_8, flex_off specifies where the reported
+	 * flex bytes start from in flexible payload.
+	 */
+	uint8_t flex_off;
+};
+
+/* A structure used to define the flow director filter entry by filter_ctrl API
+ * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_ADD and
+ * RTE_ETH_FILTER_DELETE operations.
+ */
+struct i40e_fdir_filter_conf {
+	uint32_t soft_id;
+	/* ID, an unique value is required when deal with FDIR entry */
+	struct i40e_fdir_input input;    /* Input set */
+	struct i40e_fdir_action action;  /* Action taken when match */
+};
+
 /*
  * Structure to store flex pit for flow diretor.
  */
@@ -478,12 +597,13 @@ struct i40e_fdir_flex_mask {
 	} bitmask[I40E_FDIR_BITMASK_NUM_WORD];
 };
 
-#define I40E_FILTER_PCTYPE_MAX 64
-#define I40E_MAX_FDIR_FILTER_NUM (1024 * 8)
+#define I40E_FILTER_PCTYPE_INVALID 0
+#define I40E_FILTER_PCTYPE_MAX     64
+#define I40E_MAX_FDIR_FILTER_NUM   (1024 * 8)
 
 struct i40e_fdir_filter {
 	TAILQ_ENTRY(i40e_fdir_filter) rules;
-	struct rte_eth_fdir_filter fdir;
+	struct i40e_fdir_filter_conf fdir;
 };
 
 TAILQ_HEAD(i40e_fdir_filter_list, i40e_fdir_filter);
@@ -541,17 +661,49 @@ struct i40e_ethertype_rule {
 	struct rte_hash *hash_table;
 };
 
+/* queue region info */
+struct i40e_queue_region_info {
+	/* the region id for this configuration */
+	uint8_t region_id;
+	/* the start queue index for this region */
+	uint8_t queue_start_index;
+	/* the total queue number of this queue region */
+	uint8_t queue_num;
+	/* the total number of user priority for this region */
+	uint8_t user_priority_num;
+	/* the packet's user priority for this region */
+	uint8_t user_priority[I40E_MAX_USER_PRIORITY];
+	/* the total number of flowtype for this region */
+	uint8_t flowtype_num;
+	/**
+	 * the pctype or hardware flowtype of packet,
+	 * the specific index for each type has been defined
+	 * in file i40e_type.h as enum i40e_filter_pctype.
+	 */
+	uint8_t hw_flowtype[I40E_FILTER_PCTYPE_MAX];
+};
+
+struct i40e_queue_regions {
+	/* the total number of queue region for this port */
+	uint16_t queue_region_number;
+	struct i40e_queue_region_info region[I40E_REGION_MAX_INDEX + 1];
+};
+
 /* Tunnel filter number HW supports */
 #define I40E_MAX_TUNNEL_FILTER_NUM 400
 
 #define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TEID_WORD0 44
 #define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TEID_WORD1 45
-#define I40E_AQC_ADD_CLOUD_TNL_TYPE_MPLSoUDP 8
-#define I40E_AQC_ADD_CLOUD_TNL_TYPE_MPLSoGRE 9
-#define I40E_AQC_ADD_CLOUD_FILTER_CUSTOM_QINQ 0x10
-#define I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoUDP 0x11
-#define I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoGRE 0x12
-#define I40E_AQC_ADD_L1_FILTER_TEID_MPLS 0x11
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_MPLSOUDP	8
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_MPLSOGRE	9
+#define I40E_AQC_ADD_CLOUD_FILTER_0X10		0x10
+#define I40E_AQC_ADD_CLOUD_FILTER_0X11		0x11
+#define I40E_AQC_ADD_CLOUD_FILTER_0X12		0x12
+#define I40E_AQC_ADD_L1_FILTER_0X11		0x11
+#define I40E_AQC_ADD_L1_FILTER_0X12		0x12
+#define I40E_AQC_ADD_L1_FILTER_0X13		0x13
+#define I40E_AQC_NEW_TR_21			21
+#define I40E_AQC_NEW_TR_22			22
 
 enum i40e_tunnel_iptype {
 	I40E_TUNNEL_IPTYPE_IPV4,
@@ -599,6 +751,8 @@ enum i40e_tunnel_type {
 	I40E_TUNNEL_TYPE_MPLSoUDP,
 	I40E_TUNNEL_TYPE_MPLSoGRE,
 	I40E_TUNNEL_TYPE_QINQ,
+	I40E_TUNNEL_TYPE_GTPC,
+	I40E_TUNNEL_TYPE_GTPU,
 	I40E_TUNNEL_TYPE_MAX,
 };
 
@@ -722,6 +876,21 @@ struct i40e_tm_conf {
 	bool committed;
 };
 
+enum i40e_new_pctype {
+	I40E_CUSTOMIZED_GTPC = 0,
+	I40E_CUSTOMIZED_GTPU_IPV4,
+	I40E_CUSTOMIZED_GTPU_IPV6,
+	I40E_CUSTOMIZED_GTPU,
+	I40E_CUSTOMIZED_MAX,
+};
+
+#define I40E_FILTER_PCTYPE_INVALID     0
+struct i40e_customized_pctype {
+	enum i40e_new_pctype index;  /* Indicate which customized pctype */
+	uint8_t pctype;   /* New pctype value */
+	bool valid;   /* Check if it's valid */
+};
+
 /*
  * Structure to store private data specific for PF instance.
  */
@@ -776,6 +945,7 @@ struct i40e_pf {
 	struct i40e_fdir_info fdir; /* flow director info */
 	struct i40e_ethertype_rule ethertype; /* Ethertype filter rule */
 	struct i40e_tunnel_rule tunnel; /* Tunnel filter rule */
+	struct i40e_queue_regions queue_region; /* queue region info */
 	struct i40e_fc_conf fc_conf; /* Flow control conf */
 	struct i40e_mirror_rule_list mirror_list;
 	uint16_t nb_mirror_rule;   /* The number of mirror rules */
@@ -784,8 +954,14 @@ struct i40e_pf {
 	bool floating_veb_list[I40E_MAX_VF];
 	struct i40e_flow_list flow_list;
 	bool mpls_replace_flag;  /* 1 - MPLS filter replace is done */
+	bool gtp_replace_flag;   /* 1 - GTP-C/U filter replace is done */
 	bool qinq_replace_flag;  /* QINQ filter replace is done */
 	struct i40e_tm_conf tm_conf;
+
+	/* Dynamic Device Personalization */
+	bool gtp_support; /* 1 - support GTP-C and GTP-U */
+	/* customer customized pctype */
+	struct i40e_customized_pctype customized_pctype[I40E_CUSTOMIZED_MAX];
 };
 
 enum pending_msg {
@@ -852,7 +1028,8 @@ struct i40e_vf {
 	uint64_t flags;
 };
 
-#define I40E_MAX_PKT_TYPE 256
+#define I40E_MAX_PKT_TYPE  256
+#define I40E_FLOW_TYPE_MAX 64
 
 /*
  * Structure to store private data for each PF/VF instance.
@@ -881,13 +1058,17 @@ struct i40e_adapter {
 
 	/* ptype mapping table */
 	uint32_t ptype_tbl[I40E_MAX_PKT_TYPE] __rte_cache_min_aligned;
+	/* flow type to pctype mapping table */
+	uint64_t pctypes_tbl[I40E_FLOW_TYPE_MAX] __rte_cache_min_aligned;
+	uint64_t flow_types_mask;
+	uint64_t pctypes_mask;
 };
 
 extern const struct rte_flow_ops i40e_flow_ops;
 
 union i40e_filter_t {
 	struct rte_eth_ethertype_filter ethertype_filter;
-	struct rte_eth_fdir_filter fdir_filter;
+	struct i40e_fdir_filter_conf fdir_filter;
 	struct rte_eth_tunnel_filter_conf tunnel_filter;
 	struct i40e_tunnel_filter_conf consistent_tunnel_filter;
 };
@@ -919,14 +1100,14 @@ void i40e_update_vsi_stats(struct i40e_vsi *vsi);
 void i40e_pf_disable_irq0(struct i40e_hw *hw);
 void i40e_pf_enable_irq0(struct i40e_hw *hw);
 int i40e_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
-void i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi);
+void i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t itr_idx);
 void i40e_vsi_queues_unbind_intr(struct i40e_vsi *vsi);
 int i40e_vsi_vlan_pvid_set(struct i40e_vsi *vsi,
 			   struct i40e_vsi_vlan_pvid_info *info);
 int i40e_vsi_config_vlan_stripping(struct i40e_vsi *vsi, bool on);
 int i40e_vsi_config_vlan_filter(struct i40e_vsi *vsi, bool on);
-uint64_t i40e_config_hena(uint64_t flags, enum i40e_mac_type type);
-uint64_t i40e_parse_hena(uint64_t flags);
+uint64_t i40e_config_hena(const struct i40e_adapter *adapter, uint64_t flags);
+uint64_t i40e_parse_hena(const struct i40e_adapter *adapter, uint64_t flags);
 enum i40e_status_code i40e_fdir_setup_tx_resources(struct i40e_pf *pf);
 enum i40e_status_code i40e_fdir_setup_rx_resources(struct i40e_pf *pf);
 int i40e_fdir_setup(struct i40e_pf *pf);
@@ -935,8 +1116,11 @@ const struct rte_memzone *i40e_memzone_reserve(const char *name,
 					int socket_id);
 int i40e_fdir_configure(struct rte_eth_dev *dev);
 void i40e_fdir_teardown(struct i40e_pf *pf);
-enum i40e_filter_pctype i40e_flowtype_to_pctype(uint16_t flow_type);
-uint16_t i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype);
+enum i40e_filter_pctype
+	i40e_flowtype_to_pctype(const struct i40e_adapter *adapter,
+				uint16_t flow_type);
+uint16_t i40e_pctype_to_flowtype(const struct i40e_adapter *adapter,
+				 enum i40e_filter_pctype pctype);
 int i40e_fdir_ctrl_func(struct rte_eth_dev *dev,
 			  enum rte_filter_op filter_op,
 			  void *arg);
@@ -961,7 +1145,7 @@ i40e_sw_ethertype_filter_lookup(struct i40e_ethertype_rule *ethertype_rule,
 int i40e_sw_ethertype_filter_del(struct i40e_pf *pf,
 				 struct i40e_ethertype_filter_input *input);
 int i40e_sw_fdir_filter_del(struct i40e_pf *pf,
-			    struct rte_eth_fdir_input *input);
+			    struct i40e_fdir_input *input);
 struct i40e_tunnel_filter *
 i40e_sw_tunnel_filter_lookup(struct i40e_tunnel_rule *tunnel_rule,
 			     const struct i40e_tunnel_filter_input *input);
@@ -974,6 +1158,9 @@ int i40e_ethertype_filter_set(struct i40e_pf *pf,
 int i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
 			     const struct rte_eth_fdir_filter *filter,
 			     bool add);
+int i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev,
+				  const struct i40e_fdir_filter_conf *filter,
+				  bool add);
 int i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
 			       struct rte_eth_tunnel_filter_conf *tunnel_filter,
 			       uint8_t add);
@@ -1003,6 +1190,14 @@ void i40e_check_write_reg(struct i40e_hw *hw, uint32_t addr, uint32_t val);
 int i40e_tm_ops_get(struct rte_eth_dev *dev, void *ops);
 void i40e_tm_conf_init(struct rte_eth_dev *dev);
 void i40e_tm_conf_uninit(struct rte_eth_dev *dev);
+struct i40e_customized_pctype*
+i40e_find_customized_pctype(struct i40e_pf *pf, uint8_t index);
+void i40e_update_customized_info(struct rte_eth_dev *dev, uint8_t *pkg,
+				 uint32_t pkg_size);
+int i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb);
+int i40e_flush_queue_region_all_conf(struct rte_eth_dev *dev,
+		struct i40e_hw *hw, struct i40e_pf *pf, uint16_t on);
+void i40e_init_queue_region_conf(struct rte_eth_dev *dev);
 
 #define I40E_DEV_TO_PCI(eth_dev) \
 	RTE_DEV_TO_PCI((eth_dev)->device)
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index f6d82934..02d9e579 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -47,10 +47,10 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
 #include <rte_alarm.h>
 #include <rte_ether.h>
@@ -67,8 +67,6 @@
 #include "i40e_rxtx.h"
 #include "i40e_ethdev.h"
 #include "i40e_pf.h"
-#define I40EVF_VSI_DEFAULT_MSIX_INTR     1
-#define I40EVF_VSI_DEFAULT_MSIX_INTR_LNX 0
 
 /* busy wait delay in msec */
 #define I40EVF_BUSY_WAIT_DELAY 10
@@ -108,7 +106,7 @@ static void i40evf_dev_info_get(struct rte_eth_dev *dev,
 				struct rte_eth_dev_info *dev_info);
 static int i40evf_dev_link_update(struct rte_eth_dev *dev,
 				  int wait_to_complete);
-static void i40evf_dev_stats_get(struct rte_eth_dev *dev,
+static int i40evf_dev_stats_get(struct rte_eth_dev *dev,
 				struct rte_eth_stats *stats);
 static int i40evf_dev_xstats_get(struct rte_eth_dev *dev,
 				 struct rte_eth_xstat *xstats, unsigned n);
@@ -118,10 +116,9 @@ static int i40evf_dev_xstats_get_names(struct rte_eth_dev *dev,
 static void i40evf_dev_xstats_reset(struct rte_eth_dev *dev);
 static int i40evf_vlan_filter_set(struct rte_eth_dev *dev,
 				  uint16_t vlan_id, int on);
-static void i40evf_vlan_offload_set(struct rte_eth_dev *dev, int mask);
-static int i40evf_vlan_pvid_set(struct rte_eth_dev *dev, uint16_t pvid,
-				int on);
+static int i40evf_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 static void i40evf_dev_close(struct rte_eth_dev *dev);
+static int  i40evf_dev_reset(struct rte_eth_dev *dev);
 static void i40evf_dev_promiscuous_enable(struct rte_eth_dev *dev);
 static void i40evf_dev_promiscuous_disable(struct rte_eth_dev *dev);
 static void i40evf_dev_allmulticast_enable(struct rte_eth_dev *dev);
@@ -199,15 +196,16 @@ static const struct eth_dev_ops i40evf_eth_dev_ops = {
 	.allmulticast_disable = i40evf_dev_allmulticast_disable,
 	.link_update          = i40evf_dev_link_update,
 	.stats_get            = i40evf_dev_stats_get,
+	.stats_reset          = i40evf_dev_xstats_reset,
 	.xstats_get           = i40evf_dev_xstats_get,
 	.xstats_get_names     = i40evf_dev_xstats_get_names,
 	.xstats_reset         = i40evf_dev_xstats_reset,
 	.dev_close            = i40evf_dev_close,
+	.dev_reset	      = i40evf_dev_reset,
 	.dev_infos_get        = i40evf_dev_info_get,
 	.dev_supported_ptypes_get = i40e_dev_supported_ptypes_get,
 	.vlan_filter_set      = i40evf_vlan_filter_set,
 	.vlan_offload_set     = i40evf_vlan_offload_set,
-	.vlan_pvid_set        = i40evf_vlan_pvid_set,
 	.rx_queue_start       = i40evf_dev_rx_queue_start,
 	.rx_queue_stop        = i40evf_dev_rx_queue_stop,
 	.tx_queue_start       = i40evf_dev_tx_queue_start,
@@ -431,9 +429,7 @@ i40evf_check_api_version(struct rte_eth_dev *dev)
 	pver = (struct virtchnl_version_info *)args.out_buffer;
 	vf->version_major = pver->major;
 	vf->version_minor = pver->minor;
-	if (vf->version_major == I40E_DPDK_VERSION_MAJOR)
-		PMD_DRV_LOG(INFO, "Peer is DPDK PF host");
-	else if ((vf->version_major == VIRTCHNL_VERSION_MAJOR) &&
+	if ((vf->version_major == VIRTCHNL_VERSION_MAJOR) &&
 		(vf->version_minor <= VIRTCHNL_VERSION_MINOR))
 		PMD_DRV_LOG(INFO, "Peer is Linux PF host");
 	else {
@@ -481,7 +477,7 @@ i40evf_get_vf_resource(struct rte_eth_dev *dev)
 	len =  sizeof(struct virtchnl_vf_resource) +
 		I40E_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource);
 
-	(void)rte_memcpy(vf->vf_res, args.out_buffer,
+	rte_memcpy(vf->vf_res, args.out_buffer,
 			RTE_MIN(args.out_size, len));
 	i40e_vf_parse_hw_config(hw, vf->vf_res);
 
@@ -563,37 +559,6 @@ i40evf_disable_vlan_strip(struct rte_eth_dev *dev)
 	return ret;
 }
 
-static int
-i40evf_config_vlan_pvid(struct rte_eth_dev *dev,
-				struct i40e_vsi_vlan_pvid_info *info)
-{
-	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-	int err;
-	struct vf_cmd_info args;
-	struct virtchnl_pvid_info tpid_info;
-
-	if (info == NULL) {
-		PMD_DRV_LOG(ERR, "invalid parameters");
-		return I40E_ERR_PARAM;
-	}
-
-	memset(&tpid_info, 0, sizeof(tpid_info));
-	tpid_info.vsi_id = vf->vsi_res->vsi_id;
-	(void)rte_memcpy(&tpid_info.info, info, sizeof(*info));
-
-	args.ops = (enum virtchnl_ops)I40E_VIRTCHNL_OP_CFG_VLAN_PVID;
-	args.in_args = (uint8_t *)&tpid_info;
-	args.in_args_size = sizeof(tpid_info);
-	args.out_buffer = vf->aq_resp;
-	args.out_size = I40E_AQ_BUF_SZ;
-
-	err = i40evf_execute_vf_cmd(dev, &args);
-	if (err)
-		PMD_DRV_LOG(ERR, "fail to execute command CFG_VLAN_PVID");
-
-	return err;
-}
-
 static void
 i40evf_fill_virtchnl_vsi_txq_info(struct virtchnl_txq_info *txq_info,
 				  uint16_t vsi_id,
@@ -629,7 +594,6 @@ i40evf_fill_virtchnl_vsi_rxq_info(struct virtchnl_rxq_info *rxq_info,
 	}
 }
 
-/* It configures VSI queues to co-work with Linux PF host */
 static int
 i40evf_configure_vsi_queues(struct rte_eth_dev *dev)
 {
@@ -673,72 +637,6 @@ i40evf_configure_vsi_queues(struct rte_eth_dev *dev)
 	return ret;
 }
 
-/* It configures VSI queues to co-work with DPDK PF host */
-static int
-i40evf_configure_vsi_queues_ext(struct rte_eth_dev *dev)
-{
-	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-	struct i40e_rx_queue **rxq =
-		(struct i40e_rx_queue **)dev->data->rx_queues;
-	struct i40e_tx_queue **txq =
-		(struct i40e_tx_queue **)dev->data->tx_queues;
-	struct virtchnl_vsi_queue_config_ext_info *vc_vqcei;
-	struct virtchnl_queue_pair_ext_info *vc_qpei;
-	struct vf_cmd_info args;
-	uint16_t i, nb_qp = vf->num_queue_pairs;
-	const uint32_t size =
-		I40E_VIRTCHNL_CONFIG_VSI_QUEUES_SIZE(vc_vqcei, nb_qp);
-	uint8_t buff[size];
-	int ret;
-
-	memset(buff, 0, sizeof(buff));
-	vc_vqcei = (struct virtchnl_vsi_queue_config_ext_info *)buff;
-	vc_vqcei->vsi_id = vf->vsi_res->vsi_id;
-	vc_vqcei->num_queue_pairs = nb_qp;
-	vc_qpei = vc_vqcei->qpair;
-	for (i = 0; i < nb_qp; i++, vc_qpei++) {
-		i40evf_fill_virtchnl_vsi_txq_info(&vc_qpei->txq,
-			vc_vqcei->vsi_id, i, dev->data->nb_tx_queues, txq[i]);
-		i40evf_fill_virtchnl_vsi_rxq_info(&vc_qpei->rxq,
-			vc_vqcei->vsi_id, i, dev->data->nb_rx_queues,
-					vf->max_pkt_len, rxq[i]);
-		if (i < dev->data->nb_rx_queues)
-			/*
-			 * It adds extra info for configuring VSI queues, which
-			 * is needed to enable the configurable crc stripping
-			 * in VF.
-			 */
-			vc_qpei->rxq_ext.crcstrip =
-				dev->data->dev_conf.rxmode.hw_strip_crc;
-	}
-	memset(&args, 0, sizeof(args));
-	args.ops =
-		(enum virtchnl_ops)VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT;
-	args.in_args = (uint8_t *)vc_vqcei;
-	args.in_args_size = size;
-	args.out_buffer = vf->aq_resp;
-	args.out_size = I40E_AQ_BUF_SZ;
-	ret = i40evf_execute_vf_cmd(dev, &args);
-	if (ret)
-		PMD_DRV_LOG(ERR, "Failed to execute command of "
-			"VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT");
-
-	return ret;
-}
-
-static int
-i40evf_configure_queues(struct rte_eth_dev *dev)
-{
-	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-
-	if (vf->version_major == I40E_DPDK_VERSION_MAJOR)
-		/* To support DPDK PF host */
-		return i40evf_configure_vsi_queues_ext(dev);
-	else
-		/* To support Linux PF host */
-		return i40evf_configure_vsi_queues(dev);
-}
-
 static int
 i40evf_config_irq_map(struct rte_eth_dev *dev)
 {
@@ -752,14 +650,10 @@ i40evf_config_irq_map(struct rte_eth_dev *dev)
 	uint32_t vector_id;
 	int i, err;
 
-	if (rte_intr_allow_others(intr_handle)) {
-		if (vf->version_major == I40E_DPDK_VERSION_MAJOR)
-			vector_id = I40EVF_VSI_DEFAULT_MSIX_INTR;
-		else
-			vector_id = I40EVF_VSI_DEFAULT_MSIX_INTR_LNX;
-	} else {
+	if (rte_intr_allow_others(intr_handle))
+		vector_id = I40E_RX_VEC_START;
+	else
 		vector_id = I40E_MISC_VEC_ID;
-	}
 
 	map_info = (struct virtchnl_irq_map_info *)cmd_buffer;
 	map_info->num_vectors = 1;
@@ -888,7 +782,7 @@ i40evf_add_mac_addr(struct rte_eth_dev *dev,
 	int err;
 	struct vf_cmd_info args;
 
-	if (i40e_validate_mac_addr(addr->addr_bytes) != I40E_SUCCESS) {
+	if (is_zero_ether_addr(addr)) {
 		PMD_DRV_LOG(ERR, "Invalid mac:%x:%x:%x:%x:%x:%x",
 			    addr->addr_bytes[0], addr->addr_bytes[1],
 			    addr->addr_bytes[2], addr->addr_bytes[3],
@@ -899,7 +793,7 @@ i40evf_add_mac_addr(struct rte_eth_dev *dev,
 	list = (struct virtchnl_ether_addr_list *)cmd_buffer;
 	list->vsi_id = vf->vsi_res->vsi_id;
 	list->num_elements = 1;
-	(void)rte_memcpy(list->list[0].addr, addr->addr_bytes,
+	rte_memcpy(list->list[0].addr, addr->addr_bytes,
 					sizeof(addr->addr_bytes));
 
 	args.ops = VIRTCHNL_OP_ADD_ETH_ADDR;
@@ -939,7 +833,7 @@ i40evf_del_mac_addr_by_addr(struct rte_eth_dev *dev,
 	list = (struct virtchnl_ether_addr_list *)cmd_buffer;
 	list->vsi_id = vf->vsi_res->vsi_id;
 	list->num_elements = 1;
-	(void)rte_memcpy(list->list[0].addr, addr->addr_bytes,
+	rte_memcpy(list->list[0].addr, addr->addr_bytes,
 			sizeof(addr->addr_bytes));
 
 	args.ops = VIRTCHNL_OP_DEL_ETH_ADDR;
@@ -968,7 +862,7 @@ i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index)
 }
 
 static int
-i40evf_update_stats(struct rte_eth_dev *dev, struct i40e_eth_stats **pstats)
+i40evf_query_stats(struct rte_eth_dev *dev, struct i40e_eth_stats **pstats)
 {
 	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct virtchnl_queue_select q_stats;
@@ -993,26 +887,58 @@ i40evf_update_stats(struct rte_eth_dev *dev, struct i40e_eth_stats **pstats)
 	return 0;
 }
 
-static int
-i40evf_get_statistics(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+static void
+i40evf_stat_update_48(uint64_t *offset,
+		   uint64_t *stat)
 {
-	int ret;
-	struct i40e_eth_stats *pstats = NULL;
+	if (*stat >= *offset)
+		*stat = *stat - *offset;
+	else
+		*stat = (uint64_t)((*stat +
+			((uint64_t)1 << I40E_48_BIT_WIDTH)) - *offset);
 
-	ret = i40evf_update_stats(dev, &pstats);
-	if (ret != 0)
-		return 0;
+	*stat &= I40E_48_BIT_MASK;
+}
 
-	stats->ipackets = pstats->rx_unicast + pstats->rx_multicast +
-						pstats->rx_broadcast;
-	stats->opackets = pstats->tx_broadcast + pstats->tx_multicast +
-						pstats->tx_unicast;
-	stats->imissed = pstats->rx_discards;
-	stats->oerrors = pstats->tx_errors + pstats->tx_discards;
-	stats->ibytes = pstats->rx_bytes;
-	stats->obytes = pstats->tx_bytes;
+static void
+i40evf_stat_update_32(uint64_t *offset,
+		   uint64_t *stat)
+{
+	if (*stat >= *offset)
+		*stat = (uint64_t)(*stat - *offset);
+	else
+		*stat = (uint64_t)((*stat +
+			((uint64_t)1 << I40E_32_BIT_WIDTH)) - *offset);
+}
 
-	return 0;
+static void
+i40evf_update_stats(struct i40e_vsi *vsi,
+					struct i40e_eth_stats *nes)
+{
+	struct i40e_eth_stats *oes = &vsi->eth_stats_offset;
+
+	i40evf_stat_update_48(&oes->rx_bytes,
+			    &nes->rx_bytes);
+	i40evf_stat_update_48(&oes->rx_unicast,
+			    &nes->rx_unicast);
+	i40evf_stat_update_48(&oes->rx_multicast,
+			    &nes->rx_multicast);
+	i40evf_stat_update_48(&oes->rx_broadcast,
+			    &nes->rx_broadcast);
+	i40evf_stat_update_32(&oes->rx_discards,
+				&nes->rx_discards);
+	i40evf_stat_update_32(&oes->rx_unknown_protocol,
+			    &nes->rx_unknown_protocol);
+	i40evf_stat_update_48(&oes->tx_bytes,
+			    &nes->tx_bytes);
+	i40evf_stat_update_48(&oes->tx_unicast,
+			    &nes->tx_unicast);
+	i40evf_stat_update_48(&oes->tx_multicast,
+			    &nes->tx_multicast);
+	i40evf_stat_update_48(&oes->tx_broadcast,
+			    &nes->tx_broadcast);
+	i40evf_stat_update_32(&oes->tx_errors, &nes->tx_errors);
+	i40evf_stat_update_32(&oes->tx_discards, &nes->tx_discards);
 }
 
 static void
@@ -1022,10 +948,10 @@ i40evf_dev_xstats_reset(struct rte_eth_dev *dev)
 	struct i40e_eth_stats *pstats = NULL;
 
 	/* read stat values to clear hardware registers */
-	i40evf_update_stats(dev, &pstats);
+	i40evf_query_stats(dev, &pstats);
 
 	/* set stats offset base on current values */
-	vf->vsi.eth_stats_offset = vf->vsi.eth_stats;
+	vf->vsi.eth_stats_offset = *pstats;
 }
 
 static int i40evf_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
@@ -1049,17 +975,21 @@ static int i40evf_dev_xstats_get(struct rte_eth_dev *dev,
 	int ret;
 	unsigned i;
 	struct i40e_eth_stats *pstats = NULL;
+	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct i40e_vsi *vsi = &vf->vsi;
 
 	if (n < I40EVF_NB_XSTATS)
 		return I40EVF_NB_XSTATS;
 
-	ret = i40evf_update_stats(dev, &pstats);
+	ret = i40evf_query_stats(dev, &pstats);
 	if (ret != 0)
 		return 0;
 
 	if (!xstats)
 		return 0;
 
+	i40evf_update_stats(vsi, pstats);
+
 	/* loop over xstats array and values from pstats */
 	for (i = 0; i < I40EVF_NB_XSTATS; i++) {
 		xstats[i].id = i;
@@ -1179,10 +1109,30 @@ i40evf_enable_irq0(struct i40e_hw *hw)
 }
 
 static int
-i40evf_reset_vf(struct i40e_hw *hw)
+i40evf_check_vf_reset_done(struct i40e_hw *hw)
 {
 	int i, reset;
 
+	for (i = 0; i < MAX_RESET_WAIT_CNT; i++) {
+		reset = I40E_READ_REG(hw, I40E_VFGEN_RSTAT) &
+			I40E_VFGEN_RSTAT_VFR_STATE_MASK;
+		reset = reset >> I40E_VFGEN_RSTAT_VFR_STATE_SHIFT;
+		if (reset == VIRTCHNL_VFR_VFACTIVE ||
+		    reset == VIRTCHNL_VFR_COMPLETED)
+			break;
+		rte_delay_ms(50);
+	}
+
+	if (i >= MAX_RESET_WAIT_CNT)
+		return -1;
+
+	return 0;
+}
+static int
+i40evf_reset_vf(struct i40e_hw *hw)
+{
+	int ret;
+
 	if (i40e_vf_reset(hw) != I40E_SUCCESS) {
 		PMD_INIT_LOG(ERR, "Reset VF NIC failed");
 		return -1;
@@ -1198,19 +1148,10 @@ i40evf_reset_vf(struct i40e_hw *hw)
 	  */
 	rte_delay_ms(200);
 
-	for (i = 0; i < MAX_RESET_WAIT_CNT; i++) {
-		reset = rd32(hw, I40E_VFGEN_RSTAT) &
-			I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-		reset = reset >> I40E_VFGEN_RSTAT_VFR_STATE_SHIFT;
-		if (VIRTCHNL_VFR_COMPLETED == reset || VIRTCHNL_VFR_VFACTIVE == reset)
-			break;
-		else
-			rte_delay_ms(50);
-	}
-
-	if (i >= MAX_RESET_WAIT_CNT) {
-		PMD_INIT_LOG(ERR, "Reset VF NIC failed");
-		return -1;
+	ret = i40evf_check_vf_reset_done(hw);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "VF is still resetting");
+		return ret;
 	}
 
 	return 0;
@@ -1233,6 +1174,10 @@ i40evf_init_vf(struct rte_eth_dev *dev)
 		goto err;
 	}
 
+	err = i40evf_check_vf_reset_done(hw);
+	if (err)
+		goto err;
+
 	i40e_init_adminq_parameter(hw);
 	err = i40e_init_adminq(hw);
 	if (err) {
@@ -1249,29 +1194,30 @@ i40evf_init_vf(struct rte_eth_dev *dev)
 	/* VF reset, shutdown admin queue and initialize again */
 	if (i40e_shutdown_adminq(hw) != I40E_SUCCESS) {
 		PMD_INIT_LOG(ERR, "i40e_shutdown_adminq failed");
-		return -1;
+		goto err;
 	}
 
 	i40e_init_adminq_parameter(hw);
 	if (i40e_init_adminq(hw) != I40E_SUCCESS) {
 		PMD_INIT_LOG(ERR, "init_adminq failed");
-		return -1;
+		goto err;
 	}
+
 	vf->aq_resp = rte_zmalloc("vf_aq_resp", I40E_AQ_BUF_SZ, 0);
 	if (!vf->aq_resp) {
 		PMD_INIT_LOG(ERR, "unable to allocate vf_aq_resp memory");
-			goto err_aq;
+		goto err_aq;
 	}
 	if (i40evf_check_api_version(dev) != 0) {
 		PMD_INIT_LOG(ERR, "check_api version failed");
-		goto err_aq;
+		goto err_api;
 	}
 	bufsz = sizeof(struct virtchnl_vf_resource) +
 		(I40E_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource));
 	vf->vf_res = rte_zmalloc("vf_res", bufsz, 0);
 	if (!vf->vf_res) {
 		PMD_INIT_LOG(ERR, "unable to allocate vf_res memory");
-			goto err_aq;
+		goto err_api;
 	}
 
 	if (i40evf_get_vf_resource(dev) != 0) {
@@ -1293,7 +1239,15 @@ i40evf_init_vf(struct rte_eth_dev *dev)
 	if (hw->mac.type == I40E_MAC_X722_VF)
 		vf->flags = I40E_FLAG_RSS_AQ_CAPABLE;
 	vf->vsi.vsi_id = vf->vsi_res->vsi_id;
-	vf->vsi.type = (enum i40e_vsi_type)vf->vsi_res->vsi_type;
+
+	switch (vf->vsi_res->vsi_type) {
+	case VIRTCHNL_VSI_SRIOV:
+		vf->vsi.type = I40E_VSI_SRIOV;
+		break;
+	default:
+		vf->vsi.type = I40E_VSI_TYPE_UNKNOWN;
+		break;
+	}
 	vf->vsi.nb_qps = vf->vsi_res->num_queue_pairs;
 	vf->vsi.adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 
@@ -1303,20 +1257,20 @@ i40evf_init_vf(struct rte_eth_dev *dev)
 	else
 		eth_random_addr(hw->mac.addr); /* Generate a random one */
 
-	/* If the PF host is not DPDK, set the interval of ITR0 to max*/
-	if (vf->version_major != I40E_DPDK_VERSION_MAJOR) {
-		I40E_WRITE_REG(hw, I40E_VFINT_DYN_CTL01,
-			       (I40E_ITR_INDEX_DEFAULT <<
-				I40E_VFINT_DYN_CTL0_ITR_INDX_SHIFT) |
-			       (interval <<
-				I40E_VFINT_DYN_CTL0_INTERVAL_SHIFT));
-		I40EVF_WRITE_FLUSH(hw);
-	}
+	I40E_WRITE_REG(hw, I40E_VFINT_DYN_CTL01,
+		       (I40E_ITR_INDEX_DEFAULT <<
+			I40E_VFINT_DYN_CTL0_ITR_INDX_SHIFT) |
+		       (interval <<
+			I40E_VFINT_DYN_CTL0_INTERVAL_SHIFT));
+	I40EVF_WRITE_FLUSH(hw);
 
 	return 0;
 
 err_alloc:
 	rte_free(vf->vf_res);
+	vf->vsi_res = NULL;
+err_api:
+	rte_free(vf->aq_resp);
 err_aq:
 	i40e_shutdown_adminq(hw); /* ignore error */
 err:
@@ -1476,7 +1430,6 @@ i40evf_dev_interrupt_handler(void *param)
 
 done:
 	i40evf_enable_irq0(hw);
-	rte_intr_enable(dev->intr_handle);
 }
 
 static int
@@ -1503,8 +1456,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
 		return 0;
 	}
 	i40e_set_default_ptype_table(eth_dev);
+	i40e_set_default_pctype_table(eth_dev);
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	hw->vendor_id = pci_dev->id.vendor_id;
 	hw->device_id = pci_dev->id.device_id;
@@ -1586,7 +1539,7 @@ static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev)
  */
 static struct rte_pci_driver rte_i40evf_pmd = {
 	.id_table = pci_id_i40evf_map,
-	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_IOVA_AS_VA,
 	.probe = eth_i40evf_pci_probe,
 	.remove = eth_i40evf_pci_remove,
 };
@@ -1630,19 +1583,11 @@ i40evf_dev_configure(struct rte_eth_dev *dev)
 static int
 i40evf_init_vlan(struct rte_eth_dev *dev)
 {
-	struct rte_eth_dev_data *data = dev->data;
-	int ret;
-
 	/* Apply vlan offload setting */
-	i40evf_vlan_offload_set(dev, ETH_VLAN_STRIP_MASK);
-
-	/* Apply pvid setting */
-	ret = i40evf_vlan_pvid_set(dev, data->dev_conf.txmode.pvid,
-				data->dev_conf.txmode.hw_vlan_insert_pvid);
-	return ret;
+	return i40evf_vlan_offload_set(dev, ETH_VLAN_STRIP_MASK);
 }
 
-static void
+static int
 i40evf_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
@@ -1655,30 +1600,6 @@ i40evf_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 		else
 			i40evf_disable_vlan_strip(dev);
 	}
-}
-
-static int
-i40evf_vlan_pvid_set(struct rte_eth_dev *dev, uint16_t pvid, int on)
-{
-	struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
-	struct i40e_vsi_vlan_pvid_info info;
-	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-
-	memset(&info, 0, sizeof(info));
-	info.on = on;
-
-	/* Linux pf host don't support vlan offload yet */
-	if (vf->version_major == I40E_DPDK_VERSION_MAJOR) {
-		if (info.on)
-			info.config.pvid = pvid;
-		else {
-			info.config.reject.tagged =
-				dev_conf->txmode.hw_vlan_reject_tagged;
-			info.config.reject.untagged =
-				dev_conf->txmode.hw_vlan_reject_untagged;
-		}
-		return i40evf_config_vlan_pvid(dev, &info);
-	}
 
 	return 0;
 }
@@ -1899,7 +1820,6 @@ i40evf_tx_init(struct rte_eth_dev *dev)
 static inline void
 i40evf_enable_queues_intr(struct rte_eth_dev *dev)
 {
-	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
@@ -1914,25 +1834,12 @@ i40evf_enable_queues_intr(struct rte_eth_dev *dev)
 		return;
 	}
 
-	if (vf->version_major == I40E_DPDK_VERSION_MAJOR)
-		/* To support DPDK PF host */
-		I40E_WRITE_REG(hw,
-			I40E_VFINT_DYN_CTLN1(I40EVF_VSI_DEFAULT_MSIX_INTR - 1),
-			I40E_VFINT_DYN_CTLN1_INTENA_MASK |
-			I40E_VFINT_DYN_CTLN_CLEARPBA_MASK);
-	/* If host driver is kernel driver, do nothing.
-	 * Interrupt 0 is used for rx packets, but don't set
-	 * I40E_VFINT_DYN_CTL01,
-	 * because it is already done in i40evf_enable_irq0.
-	 */
-
 	I40EVF_WRITE_FLUSH(hw);
 }
 
 static inline void
 i40evf_disable_queues_intr(struct rte_eth_dev *dev)
 {
-	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
@@ -1944,17 +1851,6 @@ i40evf_disable_queues_intr(struct rte_eth_dev *dev)
 		return;
 	}
 
-	if (vf->version_major == I40E_DPDK_VERSION_MAJOR)
-		I40E_WRITE_REG(hw,
-			       I40E_VFINT_DYN_CTLN1(I40EVF_VSI_DEFAULT_MSIX_INTR
-						    - 1),
-			       0);
-	/* If host driver is kernel driver, do nothing.
-	 * Interrupt 0 is used for rx packets, but don't zero
-	 * I40E_VFINT_DYN_CTL01,
-	 * because interrupt 0 is also used for adminq processing.
-	 */
-
 	I40EVF_WRITE_FLUSH(hw);
 }
 
@@ -2050,7 +1946,7 @@ i40evf_add_del_all_mac_addr(struct rte_eth_dev *dev, bool add)
 			addr = &dev->data->mac_addrs[i];
 			if (is_zero_ether_addr(addr))
 				continue;
-			(void)rte_memcpy(list->list[j].addr, addr->addr_bytes,
+			rte_memcpy(list->list[j].addr, addr->addr_bytes,
 					 sizeof(addr->addr_bytes));
 			PMD_DRV_LOG(DEBUG, "add/rm mac:%x:%x:%x:%x:%x:%x",
 				    addr->addr_bytes[0], addr->addr_bytes[1],
@@ -2124,7 +2020,7 @@ i40evf_dev_start(struct rte_eth_dev *dev)
 
 	i40evf_tx_init(dev);
 
-	if (i40evf_configure_queues(dev) != 0) {
+	if (i40evf_configure_vsi_queues(dev) != 0) {
 		PMD_DRV_LOG(ERR, "configure queues failed");
 		goto err_queue;
 	}
@@ -2141,7 +2037,20 @@ i40evf_dev_start(struct rte_eth_dev *dev)
 		goto err_mac;
 	}
 
+	/* When a VF port is bound to VFIO-PCI, only miscellaneous interrupt
+	 * is mapped to VFIO vector 0 in i40evf_dev_init( ).
+	 * If previous VFIO interrupt mapping set in i40evf_dev_init( ) is
+	 * not cleared, it will fail when rte_intr_enable( ) tries to map Rx
+	 * queue interrupt to other VFIO vectors.
+	 * So clear uio/vfio intr/evevnfd first to avoid failure.
+	 */
+	if (dev->data->dev_conf.intr_conf.rxq != 0) {
+		rte_intr_disable(intr_handle);
+		rte_intr_enable(intr_handle);
+	}
+
 	i40evf_enable_queues_intr(dev);
+
 	return 0;
 
 err_mac:
@@ -2155,7 +2064,7 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
 {
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
-	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev);
+	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
 	PMD_INIT_FUNC_TRACE();
 
@@ -2216,6 +2125,8 @@ i40evf_dev_link_update(struct rte_eth_dev *dev,
 	new_link.link_duplex = ETH_LINK_FULL_DUPLEX;
 	new_link.link_status = vf->link_up ? ETH_LINK_UP :
 					     ETH_LINK_DOWN;
+	new_link.link_autoneg =
+		dev->data->dev_conf.link_speeds & ETH_LINK_SPEED_FIXED;
 
 	i40evf_dev_atomic_write_link_status(dev, &new_link);
 
@@ -2295,7 +2206,7 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->max_rx_pktlen = I40E_FRAME_SIZE_MAX;
 	dev_info->hash_key_size = (I40E_VFQF_HKEY_MAX_INDEX + 1) * sizeof(uint32_t);
 	dev_info->reta_size = ETH_RSS_RETA_SIZE_64;
-	dev_info->flow_type_rss_offloads = I40E_RSS_OFFLOAD_ALL;
+	dev_info->flow_type_rss_offloads = vf->adapter->flow_types_mask;
 	dev_info->max_mac_addrs = I40E_NUM_MACADDR_MAX;
 	dev_info->rx_offload_capa =
 		DEV_RX_OFFLOAD_VLAN_STRIP |
@@ -2346,11 +2257,30 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	};
 }
 
-static void
+static int
 i40evf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-	if (i40evf_get_statistics(dev, stats))
+	int ret;
+	struct i40e_eth_stats *pstats = NULL;
+	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct i40e_vsi *vsi = &vf->vsi;
+
+	ret = i40evf_query_stats(dev, &pstats);
+	if (ret == 0) {
+		i40evf_update_stats(vsi, pstats);
+
+		stats->ipackets = pstats->rx_unicast + pstats->rx_multicast +
+						pstats->rx_broadcast;
+		stats->opackets = pstats->tx_broadcast + pstats->tx_multicast +
+						pstats->tx_unicast;
+		stats->imissed = pstats->rx_discards;
+		stats->oerrors = pstats->tx_errors + pstats->tx_discards;
+		stats->ibytes = pstats->rx_bytes;
+		stats->obytes = pstats->tx_bytes;
+	} else {
 		PMD_DRV_LOG(ERR, "Get statistics failed");
+	}
+	return ret;
 }
 
 static void
@@ -2373,6 +2303,23 @@ i40evf_dev_close(struct rte_eth_dev *dev)
 	i40evf_disable_irq0(hw);
 }
 
+/*
+ * Reset VF device only to re-initialize resources in PMD layer
+ */
+static int
+i40evf_dev_reset(struct rte_eth_dev *dev)
+{
+	int ret;
+
+	ret = i40evf_dev_uninit(dev);
+	if (ret)
+		return ret;
+
+	ret = i40evf_dev_init(dev);
+
+	return ret;
+}
+
 static int
 i40evf_get_rss_lut(struct i40e_vsi *vsi, uint8_t *lut, uint16_t lut_size)
 {
@@ -2580,7 +2527,7 @@ static int
 i40evf_hw_rss_hash_set(struct i40e_vf *vf, struct rte_eth_rss_conf *rss_conf)
 {
 	struct i40e_hw *hw = I40E_VF_TO_HW(vf);
-	uint64_t rss_hf, hena;
+	uint64_t hena;
 	int ret;
 
 	ret = i40evf_set_rss_key(&vf->vsi, rss_conf->rss_key,
@@ -2588,14 +2535,7 @@ i40evf_hw_rss_hash_set(struct i40e_vf *vf, struct rte_eth_rss_conf *rss_conf)
 	if (ret)
 		return ret;
 
-	rss_hf = rss_conf->rss_hf;
-	hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0));
-	hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32;
-	if (hw->mac.type == I40E_MAC_X722)
-		hena &= ~I40E_RSS_HENA_ALL_X722;
-	else
-		hena &= ~I40E_RSS_HENA_ALL;
-	hena |= i40e_config_hena(rss_hf, hw->mac.type);
+	hena = i40e_config_hena(vf->adapter, rss_conf->rss_hf);
 	i40e_write_rx_ctl(hw, I40E_VFQF_HENA(0), (uint32_t)hena);
 	i40e_write_rx_ctl(hw, I40E_VFQF_HENA(1), (uint32_t)(hena >> 32));
 	I40EVF_WRITE_FLUSH(hw);
@@ -2607,16 +2547,9 @@ static void
 i40evf_disable_rss(struct i40e_vf *vf)
 {
 	struct i40e_hw *hw = I40E_VF_TO_HW(vf);
-	uint64_t hena;
 
-	hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0));
-	hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32;
-	if (hw->mac.type == I40E_MAC_X722)
-		hena &= ~I40E_RSS_HENA_ALL_X722;
-	else
-		hena &= ~I40E_RSS_HENA_ALL;
-	i40e_write_rx_ctl(hw, I40E_VFQF_HENA(0), (uint32_t)hena);
-	i40e_write_rx_ctl(hw, I40E_VFQF_HENA(1), (uint32_t)(hena >> 32));
+	i40e_write_rx_ctl(hw, I40E_VFQF_HENA(0), 0);
+	i40e_write_rx_ctl(hw, I40E_VFQF_HENA(1), 0);
 	I40EVF_WRITE_FLUSH(hw);
 }
 
@@ -2645,7 +2578,7 @@ i40evf_config_rss(struct i40e_vf *vf)
 	}
 
 	rss_conf = vf->dev_data->dev_conf.rx_adv_conf.rss_conf;
-	if ((rss_conf.rss_hf & I40E_RSS_OFFLOAD_ALL) == 0) {
+	if ((rss_conf.rss_hf & vf->adapter->flow_types_mask) == 0) {
 		i40evf_disable_rss(vf);
 		PMD_DRV_LOG(DEBUG, "No hash flag is set");
 		return 0;
@@ -2670,14 +2603,13 @@ i40evf_dev_rss_hash_update(struct rte_eth_dev *dev,
 {
 	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	uint64_t rss_hf = rss_conf->rss_hf & I40E_RSS_OFFLOAD_ALL;
+	uint64_t rss_hf = rss_conf->rss_hf & vf->adapter->flow_types_mask;
 	uint64_t hena;
 
 	hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0));
 	hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32;
-	if (!(hena & ((hw->mac.type == I40E_MAC_X722)
-		 ? I40E_RSS_HENA_ALL_X722
-		 : I40E_RSS_HENA_ALL))) { /* RSS disabled */
+
+	if (!(hena & vf->adapter->pctypes_mask)) { /* RSS disabled */
 		if (rss_hf != 0) /* Enable RSS */
 			return -EINVAL;
 		return 0;
@@ -2703,7 +2635,7 @@ i40evf_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
 
 	hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0));
 	hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32;
-	rss_conf->rss_hf = i40e_parse_hena(hena);
+	rss_conf->rss_hf = i40e_parse_hena(vf->adapter, hena);
 
 	return 0;
 }
diff --git a/drivers/net/i40e/i40e_fdir.c b/drivers/net/i40e/i40e_fdir.c
index 8013add4..3d7170d5 100644
--- a/drivers/net/i40e/i40e_fdir.c
+++ b/drivers/net/i40e/i40e_fdir.c
@@ -49,6 +49,7 @@
 #include <rte_udp.h>
 #include <rte_tcp.h>
 #include <rte_sctp.h>
+#include <rte_hash_crc.h>
 
 #include "i40e_logs.h"
 #include "base/i40e_type.h"
@@ -71,6 +72,16 @@
 #define I40E_FDIR_IPv6_DEFAULT_HOP_LIMITS   0xFF
 #define I40E_FDIR_IPv6_PAYLOAD_LEN          380
 #define I40E_FDIR_UDP_DEFAULT_LEN           400
+#define I40E_FDIR_GTP_DEFAULT_LEN           384
+#define I40E_FDIR_INNER_IP_DEFAULT_LEN      384
+#define I40E_FDIR_INNER_IPV6_DEFAULT_LEN    344
+
+#define I40E_FDIR_GTPC_DST_PORT             2123
+#define I40E_FDIR_GTPU_DST_PORT             2152
+#define I40E_FDIR_GTP_VER_FLAG_0X30         0x30
+#define I40E_FDIR_GTP_VER_FLAG_0X32         0x32
+#define I40E_FDIR_GTP_MSG_TYPE_0X01         0x01
+#define I40E_FDIR_GTP_MSG_TYPE_0XFF         0xFF
 
 /* Wait time for fdir filter programming */
 #define I40E_FDIR_MAX_WAIT_US 10000
@@ -100,13 +111,18 @@ static int i40e_fdir_filter_programming(struct i40e_pf *pf,
 			enum i40e_filter_pctype pctype,
 			const struct rte_eth_fdir_filter *filter,
 			bool add);
-static int i40e_fdir_filter_convert(const struct rte_eth_fdir_filter *input,
+static int i40e_fdir_filter_convert(const struct i40e_fdir_filter_conf *input,
 			 struct i40e_fdir_filter *filter);
 static struct i40e_fdir_filter *
 i40e_sw_fdir_filter_lookup(struct i40e_fdir_info *fdir_info,
-			const struct rte_eth_fdir_input *input);
+			const struct i40e_fdir_input *input);
 static int i40e_sw_fdir_filter_insert(struct i40e_pf *pf,
 				   struct i40e_fdir_filter *filter);
+static int
+i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
+				  enum i40e_filter_pctype pctype,
+				  const struct i40e_fdir_filter_conf *filter,
+				  bool add);
 
 static int
 i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq)
@@ -249,7 +265,7 @@ i40e_fdir_setup(struct i40e_pf *pf)
 		goto fail_mem;
 	}
 	pf->fdir.prg_pkt = mz->addr;
-	pf->fdir.dma_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
+	pf->fdir.dma_addr = mz->iova;
 
 	pf->fdir.match_counter_index = I40E_COUNTER_INDEX_FDIR(hw->pf_id);
 	PMD_DRV_LOG(INFO, "FDIR setup successfully, with programming queue %u.",
@@ -323,6 +339,7 @@ i40e_init_flx_pld(struct i40e_pf *pf)
 	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
 	uint8_t pctype;
 	int i, index;
+	uint16_t flow_type;
 
 	/*
 	 * Define the bytes stream extracted as flexible payload in
@@ -344,15 +361,10 @@ i40e_init_flx_pld(struct i40e_pf *pf)
 	/* initialize the masks */
 	for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
 	     pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) {
-		if (hw->mac.type == I40E_MAC_X722) {
-			if (!I40E_VALID_PCTYPE_X722(
-				 (enum i40e_filter_pctype)pctype))
-				continue;
-		} else {
-			if (!I40E_VALID_PCTYPE(
-				 (enum i40e_filter_pctype)pctype))
-				continue;
-		}
+		flow_type = i40e_pctype_to_flowtype(pf->adapter, pctype);
+
+		if (flow_type == RTE_ETH_FLOW_UNKNOWN)
+			continue;
 		pf->fdir.flex_mask[pctype].word_mask = 0;
 		i40e_write_rx_ctl(hw, I40E_PRTQF_FD_FLXINSET(pctype), 0);
 		for (i = 0; i < I40E_FDIR_BITMASK_NUM_WORD; i++) {
@@ -449,7 +461,8 @@ i40e_check_fdir_flex_payload(const struct rte_eth_flex_payload_cfg *flex_cfg)
  * arguments are valid
  */
 static int
-i40e_check_fdir_flex_conf(const struct rte_eth_fdir_flex_conf *conf)
+i40e_check_fdir_flex_conf(const struct i40e_adapter *adapter,
+			  const struct rte_eth_fdir_flex_conf *conf)
 {
 	const struct rte_eth_flex_payload_cfg *flex_cfg;
 	const struct rte_eth_fdir_flex_mask *flex_mask;
@@ -457,6 +470,7 @@ i40e_check_fdir_flex_conf(const struct rte_eth_fdir_flex_conf *conf)
 	uint8_t nb_bitmask;
 	uint16_t i, j;
 	int ret = 0;
+	enum i40e_filter_pctype pctype;
 
 	if (conf == NULL) {
 		PMD_DRV_LOG(INFO, "NULL pointer.");
@@ -487,7 +501,8 @@ i40e_check_fdir_flex_conf(const struct rte_eth_fdir_flex_conf *conf)
 	}
 	for (i = 0; i < conf->nb_flexmasks; i++) {
 		flex_mask = &conf->flex_mask[i];
-		if (!I40E_VALID_FLOW(flex_mask->flow_type)) {
+		pctype = i40e_flowtype_to_pctype(adapter, flex_mask->flow_type);
+		if (pctype == I40E_FILTER_PCTYPE_INVALID) {
 			PMD_DRV_LOG(WARNING, "invalid flow type.");
 			return -EINVAL;
 		}
@@ -650,7 +665,7 @@ i40e_fdir_configure(struct rte_eth_dev *dev)
 	i40e_init_flx_pld(pf); /* set flex config to default value */
 
 	conf = &dev->data->dev_conf.fdir_conf.flex_conf;
-	ret = i40e_check_fdir_flex_conf(conf);
+	ret = i40e_check_fdir_flex_conf(pf->adapter, conf);
 	if (ret < 0) {
 		PMD_DRV_LOG(ERR, " invalid configuration arguments.");
 		return -EINVAL;
@@ -664,11 +679,11 @@ i40e_fdir_configure(struct rte_eth_dev *dev)
 			/* get translated pctype value in fd pctype register */
 			pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl(
 				hw, I40E_GLQF_FD_PCTYPES(
-				(int)i40e_flowtype_to_pctype(
+				(int)i40e_flowtype_to_pctype(pf->adapter,
 				conf->flex_mask[i].flow_type)));
 		} else
-			pctype = i40e_flowtype_to_pctype(
-				conf->flex_mask[i].flow_type);
+			pctype = i40e_flowtype_to_pctype(pf->adapter,
+						conf->flex_mask[i].flow_type);
 
 		i40e_set_flex_mask_on_pctype(pf, pctype, &conf->flex_mask[i]);
 	}
@@ -926,6 +941,358 @@ i40e_fdir_construct_pkt(struct i40e_pf *pf,
 		dst = pf->fdir.flex_set[pit_idx].dst_offset * sizeof(uint16_t);
 		ptr = payload +
 			pf->fdir.flex_set[pit_idx].src_offset * sizeof(uint16_t);
+		rte_memcpy(ptr,
+				 &fdir_input->flow_ext.flexbytes[dst],
+				 size * sizeof(uint16_t));
+	}
+
+	return 0;
+}
+
+static struct i40e_customized_pctype *
+i40e_flow_fdir_find_customized_pctype(struct i40e_pf *pf, uint8_t pctype)
+{
+	struct i40e_customized_pctype *cus_pctype;
+	enum i40e_new_pctype i = I40E_CUSTOMIZED_GTPC;
+
+	for (; i < I40E_CUSTOMIZED_MAX; i++) {
+		cus_pctype = &pf->customized_pctype[i];
+		if (pctype == cus_pctype->pctype)
+			return cus_pctype;
+	}
+	return NULL;
+}
+
+static inline int
+i40e_flow_fdir_fill_eth_ip_head(struct i40e_pf *pf,
+				const struct i40e_fdir_input *fdir_input,
+				unsigned char *raw_pkt,
+				bool vlan)
+{
+	struct i40e_customized_pctype *cus_pctype = NULL;
+	static uint8_t vlan_frame[] = {0x81, 0, 0, 0};
+	uint16_t *ether_type;
+	uint8_t len = 2 * sizeof(struct ether_addr);
+	struct ipv4_hdr *ip;
+	struct ipv6_hdr *ip6;
+	uint8_t pctype = fdir_input->pctype;
+	bool is_customized_pctype = fdir_input->flow_ext.customized_pctype;
+	static const uint8_t next_proto[] = {
+		[I40E_FILTER_PCTYPE_FRAG_IPV4] = IPPROTO_IP,
+		[I40E_FILTER_PCTYPE_NONF_IPV4_TCP] = IPPROTO_TCP,
+		[I40E_FILTER_PCTYPE_NONF_IPV4_UDP] = IPPROTO_UDP,
+		[I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] = IPPROTO_SCTP,
+		[I40E_FILTER_PCTYPE_NONF_IPV4_OTHER] = IPPROTO_IP,
+		[I40E_FILTER_PCTYPE_FRAG_IPV6] = IPPROTO_NONE,
+		[I40E_FILTER_PCTYPE_NONF_IPV6_TCP] = IPPROTO_TCP,
+		[I40E_FILTER_PCTYPE_NONF_IPV6_UDP] = IPPROTO_UDP,
+		[I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] = IPPROTO_SCTP,
+		[I40E_FILTER_PCTYPE_NONF_IPV6_OTHER] = IPPROTO_NONE,
+	};
+
+	raw_pkt += 2 * sizeof(struct ether_addr);
+	if (vlan && fdir_input->flow_ext.vlan_tci) {
+		rte_memcpy(raw_pkt, vlan_frame, sizeof(vlan_frame));
+		rte_memcpy(raw_pkt + sizeof(uint16_t),
+			   &fdir_input->flow_ext.vlan_tci,
+			   sizeof(uint16_t));
+		raw_pkt += sizeof(vlan_frame);
+		len += sizeof(vlan_frame);
+	}
+	ether_type = (uint16_t *)raw_pkt;
+	raw_pkt += sizeof(uint16_t);
+	len += sizeof(uint16_t);
+
+	if (is_customized_pctype) {
+		cus_pctype = i40e_flow_fdir_find_customized_pctype(pf, pctype);
+		if (!cus_pctype) {
+			PMD_DRV_LOG(ERR, "unknown pctype %u.",
+				    fdir_input->pctype);
+			return -1;
+		}
+	}
+
+	if (pctype == I40E_FILTER_PCTYPE_L2_PAYLOAD)
+		*ether_type = fdir_input->flow.l2_flow.ether_type;
+	else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV4_TCP ||
+		 pctype == I40E_FILTER_PCTYPE_NONF_IPV4_UDP ||
+		 pctype == I40E_FILTER_PCTYPE_NONF_IPV4_SCTP ||
+		 pctype == I40E_FILTER_PCTYPE_NONF_IPV4_OTHER ||
+		 pctype == I40E_FILTER_PCTYPE_FRAG_IPV4 ||
+		 is_customized_pctype) {
+		ip = (struct ipv4_hdr *)raw_pkt;
+
+		*ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+		ip->version_ihl = I40E_FDIR_IP_DEFAULT_VERSION_IHL;
+		/* set len to by default */
+		ip->total_length = rte_cpu_to_be_16(I40E_FDIR_IP_DEFAULT_LEN);
+		ip->time_to_live = fdir_input->flow.ip4_flow.ttl ?
+			fdir_input->flow.ip4_flow.ttl :
+			I40E_FDIR_IP_DEFAULT_TTL;
+		ip->type_of_service = fdir_input->flow.ip4_flow.tos;
+		/**
+		 * The source and destination fields in the transmitted packet
+		 * need to be presented in a reversed order with respect
+		 * to the expected received packets.
+		 */
+		ip->src_addr = fdir_input->flow.ip4_flow.dst_ip;
+		ip->dst_addr = fdir_input->flow.ip4_flow.src_ip;
+
+		if (!is_customized_pctype)
+			ip->next_proto_id = fdir_input->flow.ip4_flow.proto ?
+				fdir_input->flow.ip4_flow.proto :
+				next_proto[fdir_input->pctype];
+		else if (cus_pctype->index == I40E_CUSTOMIZED_GTPC ||
+			 cus_pctype->index == I40E_CUSTOMIZED_GTPU_IPV4 ||
+			 cus_pctype->index == I40E_CUSTOMIZED_GTPU_IPV6 ||
+			 cus_pctype->index == I40E_CUSTOMIZED_GTPU)
+			ip->next_proto_id = IPPROTO_UDP;
+		len += sizeof(struct ipv4_hdr);
+	} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV6_TCP ||
+		   pctype == I40E_FILTER_PCTYPE_NONF_IPV6_UDP ||
+		   pctype == I40E_FILTER_PCTYPE_NONF_IPV6_SCTP ||
+		   pctype == I40E_FILTER_PCTYPE_NONF_IPV6_OTHER ||
+		   pctype == I40E_FILTER_PCTYPE_FRAG_IPV6) {
+		ip6 = (struct ipv6_hdr *)raw_pkt;
+
+		*ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+		ip6->vtc_flow =
+			rte_cpu_to_be_32(I40E_FDIR_IPv6_DEFAULT_VTC_FLOW |
+					 (fdir_input->flow.ipv6_flow.tc <<
+					  I40E_FDIR_IPv6_TC_OFFSET));
+		ip6->payload_len =
+			rte_cpu_to_be_16(I40E_FDIR_IPv6_PAYLOAD_LEN);
+		ip6->proto = fdir_input->flow.ipv6_flow.proto ?
+			fdir_input->flow.ipv6_flow.proto :
+			next_proto[fdir_input->pctype];
+		ip6->hop_limits = fdir_input->flow.ipv6_flow.hop_limits ?
+			fdir_input->flow.ipv6_flow.hop_limits :
+			I40E_FDIR_IPv6_DEFAULT_HOP_LIMITS;
+		/**
+		 * The source and destination fields in the transmitted packet
+		 * need to be presented in a reversed order with respect
+		 * to the expected received packets.
+		 */
+		rte_memcpy(&ip6->src_addr,
+			   &fdir_input->flow.ipv6_flow.dst_ip,
+			   IPV6_ADDR_LEN);
+		rte_memcpy(&ip6->dst_addr,
+			   &fdir_input->flow.ipv6_flow.src_ip,
+			   IPV6_ADDR_LEN);
+		len += sizeof(struct ipv6_hdr);
+	} else {
+		PMD_DRV_LOG(ERR, "unknown pctype %u.",
+			    fdir_input->pctype);
+		return -1;
+	}
+
+	return len;
+}
+
+/**
+ * i40e_flow_fdir_construct_pkt - construct packet based on fields in input
+ * @pf: board private structure
+ * @fdir_input: input set of the flow director entry
+ * @raw_pkt: a packet to be constructed
+ */
+static int
+i40e_flow_fdir_construct_pkt(struct i40e_pf *pf,
+			     const struct i40e_fdir_input *fdir_input,
+			     unsigned char *raw_pkt)
+{
+	unsigned char *payload = NULL;
+	unsigned char *ptr;
+	struct udp_hdr *udp;
+	struct tcp_hdr *tcp;
+	struct sctp_hdr *sctp;
+	struct rte_flow_item_gtp *gtp;
+	struct ipv4_hdr *gtp_ipv4;
+	struct ipv6_hdr *gtp_ipv6;
+	uint8_t size, dst = 0;
+	uint8_t i, pit_idx, set_idx = I40E_FLXPLD_L4_IDX; /* use l4 by default*/
+	int len;
+	uint8_t pctype = fdir_input->pctype;
+	struct i40e_customized_pctype *cus_pctype;
+
+	/* raw pcket template - just copy contents of the raw packet */
+	if (fdir_input->flow_ext.pkt_template) {
+		memcpy(raw_pkt, fdir_input->flow.raw_flow.packet,
+		       fdir_input->flow.raw_flow.length);
+		return 0;
+	}
+
+	/* fill the ethernet and IP head */
+	len = i40e_flow_fdir_fill_eth_ip_head(pf, fdir_input, raw_pkt,
+					      !!fdir_input->flow_ext.vlan_tci);
+	if (len < 0)
+		return -EINVAL;
+
+	/* fill the L4 head */
+	if (pctype == I40E_FILTER_PCTYPE_NONF_IPV4_UDP) {
+		udp = (struct udp_hdr *)(raw_pkt + len);
+		payload = (unsigned char *)udp + sizeof(struct udp_hdr);
+		/**
+		 * The source and destination fields in the transmitted packet
+		 * need to be presented in a reversed order with respect
+		 * to the expected received packets.
+		 */
+		udp->src_port = fdir_input->flow.udp4_flow.dst_port;
+		udp->dst_port = fdir_input->flow.udp4_flow.src_port;
+		udp->dgram_len = rte_cpu_to_be_16(I40E_FDIR_UDP_DEFAULT_LEN);
+	} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV4_TCP) {
+		tcp = (struct tcp_hdr *)(raw_pkt + len);
+		payload = (unsigned char *)tcp + sizeof(struct tcp_hdr);
+		/**
+		 * The source and destination fields in the transmitted packet
+		 * need to be presented in a reversed order with respect
+		 * to the expected received packets.
+		 */
+		tcp->src_port = fdir_input->flow.tcp4_flow.dst_port;
+		tcp->dst_port = fdir_input->flow.tcp4_flow.src_port;
+		tcp->data_off = I40E_FDIR_TCP_DEFAULT_DATAOFF;
+	} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) {
+		sctp = (struct sctp_hdr *)(raw_pkt + len);
+		payload = (unsigned char *)sctp + sizeof(struct sctp_hdr);
+		/**
+		 * The source and destination fields in the transmitted packet
+		 * need to be presented in a reversed order with respect
+		 * to the expected received packets.
+		 */
+		sctp->src_port = fdir_input->flow.sctp4_flow.dst_port;
+		sctp->dst_port = fdir_input->flow.sctp4_flow.src_port;
+		sctp->tag = fdir_input->flow.sctp4_flow.verify_tag;
+	} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV4_OTHER ||
+		   pctype == I40E_FILTER_PCTYPE_FRAG_IPV4) {
+		payload = raw_pkt + len;
+		set_idx = I40E_FLXPLD_L3_IDX;
+	} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV6_UDP) {
+		udp = (struct udp_hdr *)(raw_pkt + len);
+		payload = (unsigned char *)udp + sizeof(struct udp_hdr);
+		/**
+		 * The source and destination fields in the transmitted packet
+		 * need to be presented in a reversed order with respect
+		 * to the expected received packets.
+		 */
+		udp->src_port = fdir_input->flow.udp6_flow.dst_port;
+		udp->dst_port = fdir_input->flow.udp6_flow.src_port;
+		udp->dgram_len = rte_cpu_to_be_16(I40E_FDIR_IPv6_PAYLOAD_LEN);
+	} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV6_TCP) {
+		tcp = (struct tcp_hdr *)(raw_pkt + len);
+		payload = (unsigned char *)tcp + sizeof(struct tcp_hdr);
+		/**
+		 * The source and destination fields in the transmitted packet
+		 * need to be presented in a reversed order with respect
+		 * to the expected received packets.
+		 */
+		tcp->data_off = I40E_FDIR_TCP_DEFAULT_DATAOFF;
+		tcp->src_port = fdir_input->flow.udp6_flow.dst_port;
+		tcp->dst_port = fdir_input->flow.udp6_flow.src_port;
+	} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) {
+		sctp = (struct sctp_hdr *)(raw_pkt + len);
+		payload = (unsigned char *)sctp + sizeof(struct sctp_hdr);
+		/**
+		 * The source and destination fields in the transmitted packet
+		 * need to be presented in a reversed order with respect
+		 * to the expected received packets.
+		 */
+		sctp->src_port = fdir_input->flow.sctp6_flow.dst_port;
+		sctp->dst_port = fdir_input->flow.sctp6_flow.src_port;
+		sctp->tag = fdir_input->flow.sctp6_flow.verify_tag;
+	} else if (pctype == I40E_FILTER_PCTYPE_NONF_IPV6_OTHER ||
+		   pctype == I40E_FILTER_PCTYPE_FRAG_IPV6) {
+		payload = raw_pkt + len;
+		set_idx = I40E_FLXPLD_L3_IDX;
+	} else if (pctype == I40E_FILTER_PCTYPE_L2_PAYLOAD) {
+		payload = raw_pkt + len;
+		/**
+		 * ARP packet is a special case on which the payload
+		 * starts after the whole ARP header
+		 */
+		if (fdir_input->flow.l2_flow.ether_type ==
+				rte_cpu_to_be_16(ETHER_TYPE_ARP))
+			payload += sizeof(struct arp_hdr);
+		set_idx = I40E_FLXPLD_L2_IDX;
+	} else if (fdir_input->flow_ext.customized_pctype) {
+		/* If customized pctype is used */
+		cus_pctype = i40e_flow_fdir_find_customized_pctype(pf, pctype);
+		if (cus_pctype->index == I40E_CUSTOMIZED_GTPC ||
+		    cus_pctype->index == I40E_CUSTOMIZED_GTPU_IPV4 ||
+		    cus_pctype->index == I40E_CUSTOMIZED_GTPU_IPV6 ||
+		    cus_pctype->index == I40E_CUSTOMIZED_GTPU) {
+			udp = (struct udp_hdr *)(raw_pkt + len);
+			udp->dgram_len =
+				rte_cpu_to_be_16(I40E_FDIR_UDP_DEFAULT_LEN);
+
+			gtp = (struct rte_flow_item_gtp *)
+				((unsigned char *)udp + sizeof(struct udp_hdr));
+			gtp->msg_len =
+				rte_cpu_to_be_16(I40E_FDIR_GTP_DEFAULT_LEN);
+			gtp->teid = fdir_input->flow.gtp_flow.teid;
+			gtp->msg_type = I40E_FDIR_GTP_MSG_TYPE_0X01;
+
+			/* GTP-C message type is not supported. */
+			if (cus_pctype->index == I40E_CUSTOMIZED_GTPC) {
+				udp->dst_port =
+				      rte_cpu_to_be_16(I40E_FDIR_GTPC_DST_PORT);
+				gtp->v_pt_rsv_flags =
+					I40E_FDIR_GTP_VER_FLAG_0X32;
+			} else {
+				udp->dst_port =
+				      rte_cpu_to_be_16(I40E_FDIR_GTPU_DST_PORT);
+				gtp->v_pt_rsv_flags =
+					I40E_FDIR_GTP_VER_FLAG_0X30;
+			}
+
+			if (cus_pctype->index == I40E_CUSTOMIZED_GTPU_IPV4) {
+				gtp->msg_type = I40E_FDIR_GTP_MSG_TYPE_0XFF;
+				gtp_ipv4 = (struct ipv4_hdr *)
+					((unsigned char *)gtp +
+					 sizeof(struct rte_flow_item_gtp));
+				gtp_ipv4->version_ihl =
+					I40E_FDIR_IP_DEFAULT_VERSION_IHL;
+				gtp_ipv4->next_proto_id = IPPROTO_IP;
+				gtp_ipv4->total_length =
+					rte_cpu_to_be_16(
+						I40E_FDIR_INNER_IP_DEFAULT_LEN);
+				payload = (unsigned char *)gtp_ipv4 +
+					sizeof(struct ipv4_hdr);
+			} else if (cus_pctype->index ==
+				   I40E_CUSTOMIZED_GTPU_IPV6) {
+				gtp->msg_type = I40E_FDIR_GTP_MSG_TYPE_0XFF;
+				gtp_ipv6 = (struct ipv6_hdr *)
+					((unsigned char *)gtp +
+					 sizeof(struct rte_flow_item_gtp));
+				gtp_ipv6->vtc_flow =
+					rte_cpu_to_be_32(
+					       I40E_FDIR_IPv6_DEFAULT_VTC_FLOW |
+					       (0 << I40E_FDIR_IPv6_TC_OFFSET));
+				gtp_ipv6->proto = IPPROTO_NONE;
+				gtp_ipv6->payload_len =
+					rte_cpu_to_be_16(
+					      I40E_FDIR_INNER_IPV6_DEFAULT_LEN);
+				gtp_ipv6->hop_limits =
+					I40E_FDIR_IPv6_DEFAULT_HOP_LIMITS;
+				payload = (unsigned char *)gtp_ipv6 +
+					sizeof(struct ipv6_hdr);
+			} else
+				payload = (unsigned char *)gtp +
+					sizeof(struct rte_flow_item_gtp);
+		}
+	} else {
+		PMD_DRV_LOG(ERR, "unknown pctype %u.",
+			    fdir_input->pctype);
+		return -1;
+	}
+
+	/* fill the flexbytes to payload */
+	for (i = 0; i < I40E_MAX_FLXPLD_FIED; i++) {
+		pit_idx = set_idx * I40E_MAX_FLXPLD_FIED + i;
+		size = pf->fdir.flex_set[pit_idx].size;
+		if (size == 0)
+			continue;
+		dst = pf->fdir.flex_set[pit_idx].dst_offset * sizeof(uint16_t);
+		ptr = payload +
+		      pf->fdir.flex_set[pit_idx].src_offset * sizeof(uint16_t);
 		(void)rte_memcpy(ptr,
 				 &fdir_input->flow_ext.flexbytes[dst],
 				 size * sizeof(uint16_t));
@@ -1007,21 +1374,34 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq)
 }
 
 static int
-i40e_fdir_filter_convert(const struct rte_eth_fdir_filter *input,
+i40e_fdir_filter_convert(const struct i40e_fdir_filter_conf *input,
 			 struct i40e_fdir_filter *filter)
 {
-	rte_memcpy(&filter->fdir, input, sizeof(struct rte_eth_fdir_filter));
+	rte_memcpy(&filter->fdir, input, sizeof(struct i40e_fdir_filter_conf));
+	if (input->input.flow_ext.pkt_template) {
+		filter->fdir.input.flow.raw_flow.packet = NULL;
+		filter->fdir.input.flow.raw_flow.length =
+			rte_hash_crc(input->input.flow.raw_flow.packet,
+				     input->input.flow.raw_flow.length,
+				     input->input.flow.raw_flow.pctype);
+	}
 	return 0;
 }
 
 /* Check if there exists the flow director filter */
 static struct i40e_fdir_filter *
 i40e_sw_fdir_filter_lookup(struct i40e_fdir_info *fdir_info,
-			const struct rte_eth_fdir_input *input)
+			const struct i40e_fdir_input *input)
 {
 	int ret;
 
-	ret = rte_hash_lookup(fdir_info->hash_table, (const void *)input);
+	if (input->flow_ext.pkt_template)
+		ret = rte_hash_lookup_with_hash(fdir_info->hash_table,
+						(const void *)input,
+						input->flow.raw_flow.length);
+	else
+		ret = rte_hash_lookup(fdir_info->hash_table,
+				      (const void *)input);
 	if (ret < 0)
 		return NULL;
 
@@ -1035,8 +1415,13 @@ i40e_sw_fdir_filter_insert(struct i40e_pf *pf, struct i40e_fdir_filter *filter)
 	struct i40e_fdir_info *fdir_info = &pf->fdir;
 	int ret;
 
-	ret = rte_hash_add_key(fdir_info->hash_table,
-			       &filter->fdir.input);
+	if (filter->fdir.input.flow_ext.pkt_template)
+		ret = rte_hash_add_key_with_hash(fdir_info->hash_table,
+				 &filter->fdir.input,
+				 filter->fdir.input.flow.raw_flow.length);
+	else
+		ret = rte_hash_add_key(fdir_info->hash_table,
+				       &filter->fdir.input);
 	if (ret < 0) {
 		PMD_DRV_LOG(ERR,
 			    "Failed to insert fdir filter to hash table %d!",
@@ -1052,13 +1437,18 @@ i40e_sw_fdir_filter_insert(struct i40e_pf *pf, struct i40e_fdir_filter *filter)
 
 /* Delete a flow director filter from the SW list */
 int
-i40e_sw_fdir_filter_del(struct i40e_pf *pf, struct rte_eth_fdir_input *input)
+i40e_sw_fdir_filter_del(struct i40e_pf *pf, struct i40e_fdir_input *input)
 {
 	struct i40e_fdir_info *fdir_info = &pf->fdir;
 	struct i40e_fdir_filter *filter;
 	int ret;
 
-	ret = rte_hash_del_key(fdir_info->hash_table, input);
+	if (input->flow_ext.pkt_template)
+		ret = rte_hash_del_key_with_hash(fdir_info->hash_table,
+						 input,
+						 input->flow.raw_flow.length);
+	else
+		ret = rte_hash_del_key(fdir_info->hash_table, input);
 	if (ret < 0) {
 		PMD_DRV_LOG(ERR,
 			    "Failed to delete fdir filter to hash table %d!",
@@ -1082,16 +1472,13 @@ i40e_sw_fdir_filter_del(struct i40e_pf *pf, struct rte_eth_fdir_input *input)
  */
 int
 i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
-			    const struct rte_eth_fdir_filter *filter,
-			    bool add)
+			 const struct rte_eth_fdir_filter *filter,
+			 bool add)
 {
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	unsigned char *pkt = (unsigned char *)pf->fdir.prg_pkt;
 	enum i40e_filter_pctype pctype;
-	struct i40e_fdir_info *fdir_info = &pf->fdir;
-	struct i40e_fdir_filter *fdir_filter, *node;
-	struct i40e_fdir_filter check_filter; /* Check if the filter exists */
 	int ret = 0;
 
 	if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_PERFECT) {
@@ -1100,7 +1487,8 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
 		return -ENOTSUP;
 	}
 
-	if (!I40E_VALID_FLOW(filter->input.flow_type)) {
+	pctype = i40e_flowtype_to_pctype(pf->adapter, filter->input.flow_type);
+	if (pctype == I40E_FILTER_PCTYPE_INVALID) {
 		PMD_DRV_LOG(ERR, "invalid flow_type input.");
 		return -EINVAL;
 	}
@@ -1114,6 +1502,76 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
 		return -EINVAL;
 	}
 
+	memset(pkt, 0, I40E_FDIR_PKT_LEN);
+
+	ret = i40e_fdir_construct_pkt(pf, &filter->input, pkt);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "construct packet for fdir fails.");
+		return ret;
+	}
+
+	if (hw->mac.type == I40E_MAC_X722) {
+		/* get translated pctype value in fd pctype register */
+		pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl(
+			hw, I40E_GLQF_FD_PCTYPES((int)pctype));
+	}
+
+	ret = i40e_fdir_filter_programming(pf, pctype, filter, add);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "fdir programming fails for PCTYPE(%u).",
+			    pctype);
+		return ret;
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_flow_add_del_fdir_filter - add or remove a flow director filter.
+ * @pf: board private structure
+ * @filter: fdir filter entry
+ * @add: 0 - delete, 1 - add
+ */
+int
+i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev,
+			      const struct i40e_fdir_filter_conf *filter,
+			      bool add)
+{
+	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	unsigned char *pkt = (unsigned char *)pf->fdir.prg_pkt;
+	enum i40e_filter_pctype pctype;
+	struct i40e_fdir_info *fdir_info = &pf->fdir;
+	struct i40e_fdir_filter *fdir_filter, *node;
+	struct i40e_fdir_filter check_filter; /* Check if the filter exists */
+	int ret = 0;
+
+	if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_PERFECT) {
+		PMD_DRV_LOG(ERR, "FDIR is not enabled, please check the mode in fdir_conf.");
+		return -ENOTSUP;
+	}
+
+	if (filter->action.rx_queue >= pf->dev_data->nb_rx_queues) {
+		PMD_DRV_LOG(ERR, "Invalid queue ID");
+		return -EINVAL;
+	}
+	if (filter->input.flow_ext.is_vf &&
+	    filter->input.flow_ext.dst_id >= pf->vf_num) {
+		PMD_DRV_LOG(ERR, "Invalid VF ID");
+		return -EINVAL;
+	}
+	if (filter->input.flow_ext.pkt_template) {
+		if (filter->input.flow.raw_flow.length > I40E_FDIR_PKT_LEN ||
+		    !filter->input.flow.raw_flow.packet) {
+			PMD_DRV_LOG(ERR, "Invalid raw packet template"
+				" flow filter parameters!");
+			return -EINVAL;
+		}
+		pctype = filter->input.flow.raw_flow.pctype;
+	} else {
+		pctype = filter->input.pctype;
+	}
+
 	/* Check if there is the filter in SW list */
 	memset(&check_filter, 0, sizeof(check_filter));
 	i40e_fdir_filter_convert(filter, &check_filter);
@@ -1132,7 +1590,7 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
 
 	memset(pkt, 0, I40E_FDIR_PKT_LEN);
 
-	ret = i40e_fdir_construct_pkt(pf, &filter->input, pkt);
+	ret = i40e_flow_fdir_construct_pkt(pf, &filter->input, pkt);
 	if (ret < 0) {
 		PMD_DRV_LOG(ERR, "construct packet for fdir fails.");
 		return ret;
@@ -1141,13 +1599,10 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
 	if (hw->mac.type == I40E_MAC_X722) {
 		/* get translated pctype value in fd pctype register */
 		pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl(
-			hw, I40E_GLQF_FD_PCTYPES(
-			(int)i40e_flowtype_to_pctype(
-			filter->input.flow_type)));
-	} else
-		pctype = i40e_flowtype_to_pctype(filter->input.flow_type);
+			hw, I40E_GLQF_FD_PCTYPES((int)pctype));
+	}
 
-	ret = i40e_fdir_filter_programming(pf, pctype, filter, add);
+	ret = i40e_flow_fdir_filter_programming(pf, pctype, filter, add);
 	if (ret < 0) {
 		PMD_DRV_LOG(ERR, "fdir programming fails for PCTYPE(%u).",
 			    pctype);
@@ -1302,6 +1757,140 @@ i40e_fdir_filter_programming(struct i40e_pf *pf,
 }
 
 /*
+ * i40e_flow_fdir_filter_programming - Program a flow director filter rule.
+ * Is done by Flow Director Programming Descriptor followed by packet
+ * structure that contains the filter fields need to match.
+ * @pf: board private structure
+ * @pctype: pctype
+ * @filter: fdir filter entry
+ * @add: 0 - delete, 1 - add
+ */
+static int
+i40e_flow_fdir_filter_programming(struct i40e_pf *pf,
+				  enum i40e_filter_pctype pctype,
+				  const struct i40e_fdir_filter_conf *filter,
+				  bool add)
+{
+	struct i40e_tx_queue *txq = pf->fdir.txq;
+	struct i40e_rx_queue *rxq = pf->fdir.rxq;
+	const struct i40e_fdir_action *fdir_action = &filter->action;
+	volatile struct i40e_tx_desc *txdp;
+	volatile struct i40e_filter_program_desc *fdirdp;
+	uint32_t td_cmd;
+	uint16_t vsi_id, i;
+	uint8_t dest;
+
+	PMD_DRV_LOG(INFO, "filling filter programming descriptor.");
+	fdirdp = (volatile struct i40e_filter_program_desc *)
+				(&txq->tx_ring[txq->tx_tail]);
+
+	fdirdp->qindex_flex_ptype_vsi =
+			rte_cpu_to_le_32((fdir_action->rx_queue <<
+					  I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
+					  I40E_TXD_FLTR_QW0_QINDEX_MASK);
+
+	fdirdp->qindex_flex_ptype_vsi |=
+			rte_cpu_to_le_32((fdir_action->flex_off <<
+					  I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
+					  I40E_TXD_FLTR_QW0_FLEXOFF_MASK);
+
+	fdirdp->qindex_flex_ptype_vsi |=
+			rte_cpu_to_le_32((pctype <<
+					  I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
+					  I40E_TXD_FLTR_QW0_PCTYPE_MASK);
+
+	if (filter->input.flow_ext.is_vf)
+		vsi_id = pf->vfs[filter->input.flow_ext.dst_id].vsi->vsi_id;
+	else
+		/* Use LAN VSI Id by default */
+		vsi_id = pf->main_vsi->vsi_id;
+	fdirdp->qindex_flex_ptype_vsi |=
+		rte_cpu_to_le_32(((uint32_t)vsi_id <<
+				  I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
+				  I40E_TXD_FLTR_QW0_DEST_VSI_MASK);
+
+	fdirdp->dtype_cmd_cntindex =
+			rte_cpu_to_le_32(I40E_TX_DESC_DTYPE_FILTER_PROG);
+
+	if (add)
+		fdirdp->dtype_cmd_cntindex |= rte_cpu_to_le_32(
+				I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
+				I40E_TXD_FLTR_QW1_PCMD_SHIFT);
+	else
+		fdirdp->dtype_cmd_cntindex |= rte_cpu_to_le_32(
+				I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
+				I40E_TXD_FLTR_QW1_PCMD_SHIFT);
+
+	if (fdir_action->behavior == I40E_FDIR_REJECT)
+		dest = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
+	else if (fdir_action->behavior == I40E_FDIR_ACCEPT)
+		dest = I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX;
+	else if (fdir_action->behavior == I40E_FDIR_PASSTHRU)
+		dest = I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_OTHER;
+	else {
+		PMD_DRV_LOG(ERR, "Failed to program FDIR filter: unsupported fdir behavior.");
+		return -EINVAL;
+	}
+
+	fdirdp->dtype_cmd_cntindex |= rte_cpu_to_le_32((dest <<
+				I40E_TXD_FLTR_QW1_DEST_SHIFT) &
+				I40E_TXD_FLTR_QW1_DEST_MASK);
+
+	fdirdp->dtype_cmd_cntindex |=
+		rte_cpu_to_le_32((fdir_action->report_status <<
+				I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
+				I40E_TXD_FLTR_QW1_FD_STATUS_MASK);
+
+	fdirdp->dtype_cmd_cntindex |=
+			rte_cpu_to_le_32(I40E_TXD_FLTR_QW1_CNT_ENA_MASK);
+	fdirdp->dtype_cmd_cntindex |=
+			rte_cpu_to_le_32(
+			((uint32_t)pf->fdir.match_counter_index <<
+			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
+			I40E_TXD_FLTR_QW1_CNTINDEX_MASK);
+
+	fdirdp->fd_id = rte_cpu_to_le_32(filter->soft_id);
+
+	PMD_DRV_LOG(INFO, "filling transmit descriptor.");
+	txdp = &txq->tx_ring[txq->tx_tail + 1];
+	txdp->buffer_addr = rte_cpu_to_le_64(pf->fdir.dma_addr);
+	td_cmd = I40E_TX_DESC_CMD_EOP |
+		 I40E_TX_DESC_CMD_RS  |
+		 I40E_TX_DESC_CMD_DUMMY;
+
+	txdp->cmd_type_offset_bsz =
+		i40e_build_ctob(td_cmd, 0, I40E_FDIR_PKT_LEN, 0);
+
+	txq->tx_tail += 2; /* set 2 descriptors above, fdirdp and txdp */
+	if (txq->tx_tail >= txq->nb_tx_desc)
+		txq->tx_tail = 0;
+	/* Update the tx tail register */
+	rte_wmb();
+	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+	for (i = 0; i < I40E_FDIR_MAX_WAIT_US; i++) {
+		if ((txdp->cmd_type_offset_bsz &
+				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) ==
+				rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+			break;
+		rte_delay_us(1);
+	}
+	if (i >= I40E_FDIR_MAX_WAIT_US) {
+		PMD_DRV_LOG(ERR,
+		    "Failed to program FDIR filter: time out to get DD on tx queue.");
+		return -ETIMEDOUT;
+	}
+	/* totally delay 10 ms to check programming status*/
+	rte_delay_us(I40E_FDIR_MAX_WAIT_US);
+	if (i40e_check_fdir_programming_status(rxq) < 0) {
+		PMD_DRV_LOG(ERR,
+		    "Failed to program FDIR filter: programming status reported.");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/*
  * i40e_fdir_flush - clear all filters of Flow Director table
  * @pf: board private structure
  */
@@ -1384,7 +1973,6 @@ i40e_fdir_info_get_flex_mask(struct i40e_pf *pf,
 {
 	struct i40e_fdir_flex_mask *mask;
 	struct rte_eth_fdir_flex_mask *ptr = flex_mask;
-	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
 	uint16_t flow_type;
 	uint8_t i, j;
 	uint16_t off_bytes, mask_tmp;
@@ -1393,14 +1981,11 @@ i40e_fdir_info_get_flex_mask(struct i40e_pf *pf,
 	     i <= I40E_FILTER_PCTYPE_L2_PAYLOAD;
 	     i++) {
 		mask =  &pf->fdir.flex_mask[i];
-		if (hw->mac.type == I40E_MAC_X722) {
-			if (!I40E_VALID_PCTYPE_X722((enum i40e_filter_pctype)i))
-				continue;
-		} else {
-			if (!I40E_VALID_PCTYPE((enum i40e_filter_pctype)i))
-				continue;
-		}
-		flow_type = i40e_pctype_to_flowtype((enum i40e_filter_pctype)i);
+		flow_type = i40e_pctype_to_flowtype(pf->adapter,
+						    (enum i40e_filter_pctype)i);
+		if (flow_type == RTE_ETH_FLOW_UNKNOWN)
+			continue;
+
 		for (j = 0; j < I40E_FDIR_MAX_FLEXWORD_NUM; j++) {
 			if (mask->word_mask & I40E_FLEX_WORD_MASK(j)) {
 				ptr->mask[j * sizeof(uint16_t)] = UINT8_MAX;
@@ -1580,7 +2165,7 @@ i40e_fdir_filter_restore(struct i40e_pf *pf)
 	uint32_t best_cnt;     /**< Number of filters in best effort spaces. */
 
 	TAILQ_FOREACH(f, fdir_list, rules)
-		i40e_add_del_fdir_filter(dev, &f->fdir, TRUE);
+		i40e_flow_add_del_fdir_filter(dev, &f->fdir, TRUE);
 
 	fdstat = I40E_READ_REG(hw, I40E_PFQF_FDSTAT);
 	guarant_cnt =
diff --git a/drivers/net/i40e/i40e_flow.c b/drivers/net/i40e/i40e_flow.c
index b92719a3..7e4936e3 100644
--- a/drivers/net/i40e/i40e_flow.c
+++ b/drivers/net/i40e/i40e_flow.c
@@ -41,7 +41,6 @@
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_log.h>
-#include <rte_memzone.h>
 #include <rte_malloc.h>
 #include <rte_eth_ctrl.h>
 #include <rte_tailq.h>
@@ -84,11 +83,11 @@ static int i40e_flow_parse_ethertype_action(struct rte_eth_dev *dev,
 static int i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 					const struct rte_flow_item *pattern,
 					struct rte_flow_error *error,
-					struct rte_eth_fdir_filter *filter);
+					struct i40e_fdir_filter_conf *filter);
 static int i40e_flow_parse_fdir_action(struct rte_eth_dev *dev,
 				       const struct rte_flow_action *actions,
 				       struct rte_flow_error *error,
-				       struct rte_eth_fdir_filter *filter);
+				       struct i40e_fdir_filter_conf *filter);
 static int i40e_flow_parse_tunnel_action(struct rte_eth_dev *dev,
 				 const struct rte_flow_action *actions,
 				 struct rte_flow_error *error,
@@ -125,6 +124,12 @@ static int i40e_flow_parse_mpls_filter(struct rte_eth_dev *dev,
 				       const struct rte_flow_action actions[],
 				       struct rte_flow_error *error,
 				       union i40e_filter_t *filter);
+static int i40e_flow_parse_gtp_filter(struct rte_eth_dev *dev,
+				      const struct rte_flow_attr *attr,
+				      const struct rte_flow_item pattern[],
+				      const struct rte_flow_action actions[],
+				      struct rte_flow_error *error,
+				      union i40e_filter_t *filter);
 static int i40e_flow_destroy_ethertype_filter(struct i40e_pf *pf,
 				      struct i40e_ethertype_filter *filter);
 static int i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf,
@@ -189,6 +194,40 @@ static enum rte_flow_item_type pattern_fdir_ipv4_sctp[] = {
 	RTE_FLOW_ITEM_TYPE_END,
 };
 
+static enum rte_flow_item_type pattern_fdir_ipv4_gtpc[] = {
+	RTE_FLOW_ITEM_TYPE_ETH,
+	RTE_FLOW_ITEM_TYPE_IPV4,
+	RTE_FLOW_ITEM_TYPE_UDP,
+	RTE_FLOW_ITEM_TYPE_GTPC,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv4_gtpu[] = {
+	RTE_FLOW_ITEM_TYPE_ETH,
+	RTE_FLOW_ITEM_TYPE_IPV4,
+	RTE_FLOW_ITEM_TYPE_UDP,
+	RTE_FLOW_ITEM_TYPE_GTPU,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv4_gtpu_ipv4[] = {
+	RTE_FLOW_ITEM_TYPE_ETH,
+	RTE_FLOW_ITEM_TYPE_IPV4,
+	RTE_FLOW_ITEM_TYPE_UDP,
+	RTE_FLOW_ITEM_TYPE_GTPU,
+	RTE_FLOW_ITEM_TYPE_IPV4,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv4_gtpu_ipv6[] = {
+	RTE_FLOW_ITEM_TYPE_ETH,
+	RTE_FLOW_ITEM_TYPE_IPV4,
+	RTE_FLOW_ITEM_TYPE_UDP,
+	RTE_FLOW_ITEM_TYPE_GTPU,
+	RTE_FLOW_ITEM_TYPE_IPV6,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
 static enum rte_flow_item_type pattern_fdir_ipv6[] = {
 	RTE_FLOW_ITEM_TYPE_ETH,
 	RTE_FLOW_ITEM_TYPE_IPV6,
@@ -216,6 +255,40 @@ static enum rte_flow_item_type pattern_fdir_ipv6_sctp[] = {
 	RTE_FLOW_ITEM_TYPE_END,
 };
 
+static enum rte_flow_item_type pattern_fdir_ipv6_gtpc[] = {
+	RTE_FLOW_ITEM_TYPE_ETH,
+	RTE_FLOW_ITEM_TYPE_IPV6,
+	RTE_FLOW_ITEM_TYPE_UDP,
+	RTE_FLOW_ITEM_TYPE_GTPC,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6_gtpu[] = {
+	RTE_FLOW_ITEM_TYPE_ETH,
+	RTE_FLOW_ITEM_TYPE_IPV6,
+	RTE_FLOW_ITEM_TYPE_UDP,
+	RTE_FLOW_ITEM_TYPE_GTPU,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6_gtpu_ipv4[] = {
+	RTE_FLOW_ITEM_TYPE_ETH,
+	RTE_FLOW_ITEM_TYPE_IPV6,
+	RTE_FLOW_ITEM_TYPE_UDP,
+	RTE_FLOW_ITEM_TYPE_GTPU,
+	RTE_FLOW_ITEM_TYPE_IPV4,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
+static enum rte_flow_item_type pattern_fdir_ipv6_gtpu_ipv6[] = {
+	RTE_FLOW_ITEM_TYPE_ETH,
+	RTE_FLOW_ITEM_TYPE_IPV6,
+	RTE_FLOW_ITEM_TYPE_UDP,
+	RTE_FLOW_ITEM_TYPE_GTPU,
+	RTE_FLOW_ITEM_TYPE_IPV6,
+	RTE_FLOW_ITEM_TYPE_END,
+};
+
 static enum rte_flow_item_type pattern_fdir_ethertype_raw_1[] = {
 	RTE_FLOW_ITEM_TYPE_ETH,
 	RTE_FLOW_ITEM_TYPE_RAW,
@@ -1576,10 +1649,18 @@ static struct i40e_valid_pattern i40e_supported_patterns[] = {
 	{ pattern_fdir_ipv4_udp, i40e_flow_parse_fdir_filter },
 	{ pattern_fdir_ipv4_tcp, i40e_flow_parse_fdir_filter },
 	{ pattern_fdir_ipv4_sctp, i40e_flow_parse_fdir_filter },
+	{ pattern_fdir_ipv4_gtpc, i40e_flow_parse_fdir_filter },
+	{ pattern_fdir_ipv4_gtpu, i40e_flow_parse_fdir_filter },
+	{ pattern_fdir_ipv4_gtpu_ipv4, i40e_flow_parse_fdir_filter },
+	{ pattern_fdir_ipv4_gtpu_ipv6, i40e_flow_parse_fdir_filter },
 	{ pattern_fdir_ipv6, i40e_flow_parse_fdir_filter },
 	{ pattern_fdir_ipv6_udp, i40e_flow_parse_fdir_filter },
 	{ pattern_fdir_ipv6_tcp, i40e_flow_parse_fdir_filter },
 	{ pattern_fdir_ipv6_sctp, i40e_flow_parse_fdir_filter },
+	{ pattern_fdir_ipv6_gtpc, i40e_flow_parse_fdir_filter },
+	{ pattern_fdir_ipv6_gtpu, i40e_flow_parse_fdir_filter },
+	{ pattern_fdir_ipv6_gtpu_ipv4, i40e_flow_parse_fdir_filter },
+	{ pattern_fdir_ipv6_gtpu_ipv6, i40e_flow_parse_fdir_filter },
 	/* FDIR - support default flow type with flexible payload */
 	{ pattern_fdir_ethertype_raw_1, i40e_flow_parse_fdir_filter },
 	{ pattern_fdir_ethertype_raw_2, i40e_flow_parse_fdir_filter },
@@ -1732,6 +1813,11 @@ static struct i40e_valid_pattern i40e_supported_patterns[] = {
 	{ pattern_mpls_2, i40e_flow_parse_mpls_filter },
 	{ pattern_mpls_3, i40e_flow_parse_mpls_filter },
 	{ pattern_mpls_4, i40e_flow_parse_mpls_filter },
+	/* GTP-C & GTP-U */
+	{ pattern_fdir_ipv4_gtpc, i40e_flow_parse_gtp_filter },
+	{ pattern_fdir_ipv4_gtpu, i40e_flow_parse_gtp_filter },
+	{ pattern_fdir_ipv6_gtpc, i40e_flow_parse_gtp_filter },
+	{ pattern_fdir_ipv6_gtpu, i40e_flow_parse_gtp_filter },
 	/* QINQ */
 	{ pattern_qinq_1, i40e_flow_parse_qinq_filter },
 };
@@ -2302,20 +2388,58 @@ i40e_flow_set_fdir_inset(struct i40e_pf *pf,
 	return 0;
 }
 
+static uint8_t
+i40e_flow_fdir_get_pctype_value(struct i40e_pf *pf,
+				enum rte_flow_item_type item_type,
+				struct i40e_fdir_filter_conf *filter)
+{
+	struct i40e_customized_pctype *cus_pctype = NULL;
+
+	switch (item_type) {
+	case RTE_FLOW_ITEM_TYPE_GTPC:
+		cus_pctype = i40e_find_customized_pctype(pf,
+							 I40E_CUSTOMIZED_GTPC);
+		break;
+	case RTE_FLOW_ITEM_TYPE_GTPU:
+		if (!filter->input.flow_ext.inner_ip)
+			cus_pctype = i40e_find_customized_pctype(pf,
+							 I40E_CUSTOMIZED_GTPU);
+		else if (filter->input.flow_ext.iip_type ==
+			 I40E_FDIR_IPTYPE_IPV4)
+			cus_pctype = i40e_find_customized_pctype(pf,
+						 I40E_CUSTOMIZED_GTPU_IPV4);
+		else if (filter->input.flow_ext.iip_type ==
+			 I40E_FDIR_IPTYPE_IPV6)
+			cus_pctype = i40e_find_customized_pctype(pf,
+						 I40E_CUSTOMIZED_GTPU_IPV6);
+		break;
+	default:
+		PMD_DRV_LOG(ERR, "Unsupported item type");
+		break;
+	}
+
+	if (cus_pctype)
+		return cus_pctype->pctype;
+
+	return I40E_FILTER_PCTYPE_INVALID;
+}
+
 /* 1. Last in item should be NULL as range is not supported.
  * 2. Supported patterns: refer to array i40e_supported_patterns.
- * 3. Supported flow type and input set: refer to array
+ * 3. Default supported flow type and input set: refer to array
  *    valid_fdir_inset_table in i40e_ethdev.c.
  * 4. Mask of fields which need to be matched should be
  *    filled with 1.
  * 5. Mask of fields which needn't to be matched should be
  *    filled with 0.
+ * 6. GTP profile supports GTPv1 only.
+ * 7. GTP-C response message ('source_port' = 2123) is not supported.
  */
 static int
 i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 			     const struct rte_flow_item *pattern,
 			     struct rte_flow_error *error,
-			     struct rte_eth_fdir_filter *filter)
+			     struct i40e_fdir_filter_conf *filter)
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	const struct rte_flow_item *item = pattern;
@@ -2326,15 +2450,16 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 	const struct rte_flow_item_tcp *tcp_spec, *tcp_mask;
 	const struct rte_flow_item_udp *udp_spec, *udp_mask;
 	const struct rte_flow_item_sctp *sctp_spec, *sctp_mask;
+	const struct rte_flow_item_gtp *gtp_spec, *gtp_mask;
 	const struct rte_flow_item_raw *raw_spec, *raw_mask;
 	const struct rte_flow_item_vf *vf_spec;
 
-	uint32_t flow_type = RTE_ETH_FLOW_UNKNOWN;
-	enum i40e_filter_pctype pctype;
+	uint8_t pctype = 0;
 	uint64_t input_set = I40E_INSET_NONE;
 	uint16_t frag_off;
 	enum rte_flow_item_type item_type;
 	enum rte_flow_item_type l3 = RTE_FLOW_ITEM_TYPE_END;
+	enum rte_flow_item_type cus_proto = RTE_FLOW_ITEM_TYPE_END;
 	uint32_t i, j;
 	uint8_t  ipv6_addr_mask[16] = {
 		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
@@ -2352,12 +2477,14 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 	uint16_t outer_tpid;
 	uint16_t ether_type;
 	uint32_t vtc_flow_cpu;
+	bool outer_ip = true;
 	int ret;
 
 	memset(off_arr, 0, sizeof(off_arr));
 	memset(len_arr, 0, sizeof(len_arr));
 	memset(flex_mask, 0, I40E_FDIR_MAX_FLEX_LEN);
 	outer_tpid = i40e_get_outer_vlan(dev);
+	filter->input.flow_ext.customized_pctype = false;
 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
 		if (item->last) {
 			rte_flow_error_set(error, EINVAL,
@@ -2402,7 +2529,7 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 				}
 			}
 
-			flow_type = RTE_ETH_FLOW_L2_PAYLOAD;
+			pctype = I40E_FILTER_PCTYPE_L2_PAYLOAD;
 			layer_idx = I40E_FLXPLD_L2_IDX;
 
 			break;
@@ -2420,7 +2547,7 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 				}
 			}
 
-			flow_type = RTE_ETH_FLOW_L2_PAYLOAD;
+			pctype = I40E_FILTER_PCTYPE_L2_PAYLOAD;
 			layer_idx = I40E_FLXPLD_L2_IDX;
 
 			break;
@@ -2430,8 +2557,10 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 				(const struct rte_flow_item_ipv4 *)item->spec;
 			ipv4_mask =
 				(const struct rte_flow_item_ipv4 *)item->mask;
+			pctype = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+			layer_idx = I40E_FLXPLD_L3_IDX;
 
-			if (ipv4_spec && ipv4_mask) {
+			if (ipv4_spec && ipv4_mask && outer_ip) {
 				/* Check IPv4 mask and update input set */
 				if (ipv4_mask->hdr.version_ihl ||
 				    ipv4_mask->hdr.total_length ||
@@ -2456,14 +2585,12 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 				if (ipv4_mask->hdr.next_proto_id == UINT8_MAX)
 					input_set |= I40E_INSET_IPV4_PROTO;
 
-				/* Get filter info */
-				flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_OTHER;
 				/* Check if it is fragment. */
 				frag_off = ipv4_spec->hdr.fragment_offset;
 				frag_off = rte_be_to_cpu_16(frag_off);
 				if (frag_off & IPV4_HDR_OFFSET_MASK ||
 				    frag_off & IPV4_HDR_MF_FLAG)
-					flow_type = RTE_ETH_FLOW_FRAG_IPV4;
+					pctype = I40E_FILTER_PCTYPE_FRAG_IPV4;
 
 				/* Get the filter info */
 				filter->input.flow.ip4_flow.proto =
@@ -2476,9 +2603,20 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 					ipv4_spec->hdr.src_addr;
 				filter->input.flow.ip4_flow.dst_ip =
 					ipv4_spec->hdr.dst_addr;
+			} else if (!ipv4_spec && !ipv4_mask && !outer_ip) {
+				filter->input.flow_ext.inner_ip = true;
+				filter->input.flow_ext.iip_type =
+					I40E_FDIR_IPTYPE_IPV4;
+			} else if ((ipv4_spec || ipv4_mask) && !outer_ip) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid inner IPv4 mask.");
+				return -rte_errno;
 			}
 
-			layer_idx = I40E_FLXPLD_L3_IDX;
+			if (outer_ip)
+				outer_ip = false;
 
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
@@ -2487,8 +2625,10 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 				(const struct rte_flow_item_ipv6 *)item->spec;
 			ipv6_mask =
 				(const struct rte_flow_item_ipv6 *)item->mask;
+			pctype = I40E_FILTER_PCTYPE_NONF_IPV6_OTHER;
+			layer_idx = I40E_FLXPLD_L3_IDX;
 
-			if (ipv6_spec && ipv6_mask) {
+			if (ipv6_spec && ipv6_mask && outer_ip) {
 				/* Check IPv6 mask and update input set */
 				if (ipv6_mask->hdr.payload_len) {
 					rte_flow_error_set(error, EINVAL,
@@ -2535,20 +2675,32 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 				/* Check if it is fragment. */
 				if (ipv6_spec->hdr.proto ==
 				    I40E_IPV6_FRAG_HEADER)
-					flow_type =
-						RTE_ETH_FLOW_FRAG_IPV6;
-				else
-					flow_type =
-						RTE_ETH_FLOW_NONFRAG_IPV6_OTHER;
+					pctype = I40E_FILTER_PCTYPE_FRAG_IPV6;
+			} else if (!ipv6_spec && !ipv6_mask && !outer_ip) {
+				filter->input.flow_ext.inner_ip = true;
+				filter->input.flow_ext.iip_type =
+					I40E_FDIR_IPTYPE_IPV6;
+			} else if ((ipv6_spec || ipv6_mask) && !outer_ip) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid inner IPv6 mask");
+				return -rte_errno;
 			}
 
-			layer_idx = I40E_FLXPLD_L3_IDX;
-
+			if (outer_ip)
+				outer_ip = false;
 			break;
 		case RTE_FLOW_ITEM_TYPE_TCP:
 			tcp_spec = (const struct rte_flow_item_tcp *)item->spec;
 			tcp_mask = (const struct rte_flow_item_tcp *)item->mask;
 
+			if (l3 == RTE_FLOW_ITEM_TYPE_IPV4)
+				pctype =
+					I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+			else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6)
+				pctype =
+					I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
 			if (tcp_spec && tcp_mask) {
 				/* Check TCP mask and update input set */
 				if (tcp_mask->hdr.sent_seq ||
@@ -2571,13 +2723,6 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 					input_set |= I40E_INSET_DST_PORT;
 
 				/* Get filter info */
-				if (l3 == RTE_FLOW_ITEM_TYPE_IPV4)
-					flow_type =
-						RTE_ETH_FLOW_NONFRAG_IPV4_TCP;
-				else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6)
-					flow_type =
-						RTE_ETH_FLOW_NONFRAG_IPV6_TCP;
-
 				if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) {
 					filter->input.flow.tcp4_flow.src_port =
 						tcp_spec->hdr.src_port;
@@ -2598,6 +2743,13 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 			udp_spec = (const struct rte_flow_item_udp *)item->spec;
 			udp_mask = (const struct rte_flow_item_udp *)item->mask;
 
+			if (l3 == RTE_FLOW_ITEM_TYPE_IPV4)
+				pctype =
+					I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+			else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6)
+				pctype =
+					I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+
 			if (udp_spec && udp_mask) {
 				/* Check UDP mask and update input set*/
 				if (udp_mask->hdr.dgram_len ||
@@ -2615,13 +2767,6 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 					input_set |= I40E_INSET_DST_PORT;
 
 				/* Get filter info */
-				if (l3 == RTE_FLOW_ITEM_TYPE_IPV4)
-					flow_type =
-						RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
-				else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6)
-					flow_type =
-						RTE_ETH_FLOW_NONFRAG_IPV6_UDP;
-
 				if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) {
 					filter->input.flow.udp4_flow.src_port =
 						udp_spec->hdr.src_port;
@@ -2638,12 +2783,50 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 			layer_idx = I40E_FLXPLD_L4_IDX;
 
 			break;
+		case RTE_FLOW_ITEM_TYPE_GTPC:
+		case RTE_FLOW_ITEM_TYPE_GTPU:
+			if (!pf->gtp_support) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Unsupported protocol");
+				return -rte_errno;
+			}
+
+			gtp_spec = (const struct rte_flow_item_gtp *)item->spec;
+			gtp_mask = (const struct rte_flow_item_gtp *)item->mask;
+
+			if (gtp_spec && gtp_mask) {
+				if (gtp_mask->v_pt_rsv_flags ||
+				    gtp_mask->msg_type ||
+				    gtp_mask->msg_len ||
+				    gtp_mask->teid != UINT32_MAX) {
+					rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid GTP mask");
+					return -rte_errno;
+				}
+
+				filter->input.flow.gtp_flow.teid =
+					gtp_spec->teid;
+				filter->input.flow_ext.customized_pctype = true;
+				cus_proto = item_type;
+			}
+			break;
 		case RTE_FLOW_ITEM_TYPE_SCTP:
 			sctp_spec =
 				(const struct rte_flow_item_sctp *)item->spec;
 			sctp_mask =
 				(const struct rte_flow_item_sctp *)item->mask;
 
+			if (l3 == RTE_FLOW_ITEM_TYPE_IPV4)
+				pctype =
+					I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
+			else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6)
+				pctype =
+					I40E_FILTER_PCTYPE_NONF_IPV6_SCTP;
+
 			if (sctp_spec && sctp_mask) {
 				/* Check SCTP mask and update input set */
 				if (sctp_mask->hdr.cksum) {
@@ -2662,13 +2845,6 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 					input_set |= I40E_INSET_SCTP_VT;
 
 				/* Get filter info */
-				if (l3 == RTE_FLOW_ITEM_TYPE_IPV4)
-					flow_type =
-						RTE_ETH_FLOW_NONFRAG_IPV4_SCTP;
-				else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6)
-					flow_type =
-						RTE_ETH_FLOW_NONFRAG_IPV6_SCTP;
-
 				if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) {
 					filter->input.flow.sctp4_flow.src_port =
 						sctp_spec->hdr.src_port;
@@ -2776,51 +2952,58 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev,
 		}
 	}
 
-	pctype = i40e_flowtype_to_pctype(flow_type);
-	if (pctype == 0 || pctype > I40E_FILTER_PCTYPE_L2_PAYLOAD) {
-		rte_flow_error_set(error, EINVAL,
-				   RTE_FLOW_ERROR_TYPE_ITEM, item,
-				   "Unsupported flow type");
-		return -rte_errno;
+	/* Get customized pctype value */
+	if (filter->input.flow_ext.customized_pctype) {
+		pctype = i40e_flow_fdir_get_pctype_value(pf, cus_proto, filter);
+		if (pctype == I40E_FILTER_PCTYPE_INVALID) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Unsupported pctype");
+			return -rte_errno;
+		}
 	}
 
-	ret = i40e_flow_set_fdir_inset(pf, pctype, input_set);
-	if (ret == -1) {
-		rte_flow_error_set(error, EINVAL,
-				   RTE_FLOW_ERROR_TYPE_ITEM, item,
-				   "Conflict with the first rule's input set.");
-		return -rte_errno;
-	} else if (ret == -EINVAL) {
-		rte_flow_error_set(error, EINVAL,
-				   RTE_FLOW_ERROR_TYPE_ITEM, item,
-				   "Invalid pattern mask.");
-		return -rte_errno;
-	}
+	/* If customized pctype is not used, set fdir configuration.*/
+	if (!filter->input.flow_ext.customized_pctype) {
+		ret = i40e_flow_set_fdir_inset(pf, pctype, input_set);
+		if (ret == -1) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM, item,
+					   "Conflict with the first rule's input set.");
+			return -rte_errno;
+		} else if (ret == -EINVAL) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM, item,
+					   "Invalid pattern mask.");
+			return -rte_errno;
+		}
 
-	filter->input.flow_type = flow_type;
+		/* Store flex mask to SW */
+		ret = i40e_flow_store_flex_mask(pf, pctype, flex_mask);
+		if (ret == -1) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Exceed maximal number of bitmasks");
+			return -rte_errno;
+		} else if (ret == -2) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Conflict with the first flexible rule");
+			return -rte_errno;
+		} else if (ret > 0)
+			cfg_flex_msk = false;
 
-	/* Store flex mask to SW */
-	ret = i40e_flow_store_flex_mask(pf, pctype, flex_mask);
-	if (ret == -1) {
-		rte_flow_error_set(error, EINVAL,
-				   RTE_FLOW_ERROR_TYPE_ITEM,
-				   item,
-				   "Exceed maximal number of bitmasks");
-		return -rte_errno;
-	} else if (ret == -2) {
-		rte_flow_error_set(error, EINVAL,
-				   RTE_FLOW_ERROR_TYPE_ITEM,
-				   item,
-				   "Conflict with the first flexible rule");
-		return -rte_errno;
-	} else if (ret > 0)
-		cfg_flex_msk = false;
+		if (cfg_flex_pit)
+			i40e_flow_set_fdir_flex_pit(pf, layer_idx, raw_id);
 
-	if (cfg_flex_pit)
-		i40e_flow_set_fdir_flex_pit(pf, layer_idx, raw_id);
+		if (cfg_flex_msk)
+			i40e_flow_set_fdir_flex_msk(pf, pctype);
+	}
 
-	if (cfg_flex_msk)
-		i40e_flow_set_fdir_flex_msk(pf, pctype);
+	filter->input.pctype = pctype;
 
 	return 0;
 }
@@ -2832,7 +3015,7 @@ static int
 i40e_flow_parse_fdir_action(struct rte_eth_dev *dev,
 			    const struct rte_flow_action *actions,
 			    struct rte_flow_error *error,
-			    struct rte_eth_fdir_filter *filter)
+			    struct i40e_fdir_filter_conf *filter)
 {
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	const struct rte_flow_action *act;
@@ -2855,13 +3038,13 @@ i40e_flow_parse_fdir_action(struct rte_eth_dev *dev,
 					   "Invalid queue ID for FDIR.");
 			return -rte_errno;
 		}
-		filter->action.behavior = RTE_ETH_FDIR_ACCEPT;
+		filter->action.behavior = I40E_FDIR_ACCEPT;
 		break;
 	case RTE_FLOW_ACTION_TYPE_DROP:
-		filter->action.behavior = RTE_ETH_FDIR_REJECT;
+		filter->action.behavior = I40E_FDIR_REJECT;
 		break;
 	case RTE_FLOW_ACTION_TYPE_PASSTHRU:
-		filter->action.behavior = RTE_ETH_FDIR_PASSTHRU;
+		filter->action.behavior = I40E_FDIR_PASSTHRU;
 		break;
 	default:
 		rte_flow_error_set(error, EINVAL,
@@ -2876,11 +3059,11 @@ i40e_flow_parse_fdir_action(struct rte_eth_dev *dev,
 	switch (act->type) {
 	case RTE_FLOW_ACTION_TYPE_MARK:
 		mark_spec = (const struct rte_flow_action_mark *)act->conf;
-		filter->action.report_status = RTE_ETH_FDIR_REPORT_ID;
+		filter->action.report_status = I40E_FDIR_REPORT_ID;
 		filter->soft_id = mark_spec->id;
 		break;
 	case RTE_FLOW_ACTION_TYPE_FLAG:
-		filter->action.report_status = RTE_ETH_FDIR_NO_REPORT_STATUS;
+		filter->action.report_status = I40E_FDIR_NO_REPORT_STATUS;
 		break;
 	case RTE_FLOW_ACTION_TYPE_END:
 		return 0;
@@ -2911,7 +3094,7 @@ i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev,
 			    struct rte_flow_error *error,
 			    union i40e_filter_t *filter)
 {
-	struct rte_eth_fdir_filter *fdir_filter =
+	struct i40e_fdir_filter_conf *fdir_filter =
 		&filter->fdir_filter;
 	int ret;
 
@@ -3646,6 +3829,148 @@ i40e_flow_parse_mpls_filter(struct rte_eth_dev *dev,
 }
 
 /* 1. Last in item should be NULL as range is not supported.
+ * 2. Supported filter types: GTP TEID.
+ * 3. Mask of fields which need to be matched should be
+ *    filled with 1.
+ * 4. Mask of fields which needn't to be matched should be
+ *    filled with 0.
+ * 5. GTP profile supports GTPv1 only.
+ * 6. GTP-C response message ('source_port' = 2123) is not supported.
+ */
+static int
+i40e_flow_parse_gtp_pattern(struct rte_eth_dev *dev,
+			    const struct rte_flow_item *pattern,
+			    struct rte_flow_error *error,
+			    struct i40e_tunnel_filter_conf *filter)
+{
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	const struct rte_flow_item *item = pattern;
+	const struct rte_flow_item_gtp *gtp_spec;
+	const struct rte_flow_item_gtp *gtp_mask;
+	enum rte_flow_item_type item_type;
+
+	if (!pf->gtp_support) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ITEM,
+				   item,
+				   "GTP is not supported by default.");
+		return -rte_errno;
+	}
+
+	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+		if (item->last) {
+			rte_flow_error_set(error, EINVAL,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   item,
+					   "Not support range");
+			return -rte_errno;
+		}
+		item_type = item->type;
+		switch (item_type) {
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			if (item->spec || item->mask) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid ETH item");
+				return -rte_errno;
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			filter->ip_type = I40E_TUNNEL_IPTYPE_IPV4;
+			/* IPv4 is used to describe protocol,
+			 * spec and mask should be NULL.
+			 */
+			if (item->spec || item->mask) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid IPv4 item");
+				return -rte_errno;
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			if (item->spec || item->mask) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid UDP item");
+				return -rte_errno;
+			}
+			break;
+		case RTE_FLOW_ITEM_TYPE_GTPC:
+		case RTE_FLOW_ITEM_TYPE_GTPU:
+			gtp_spec =
+				(const struct rte_flow_item_gtp *)item->spec;
+			gtp_mask =
+				(const struct rte_flow_item_gtp *)item->mask;
+
+			if (!gtp_spec || !gtp_mask) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid GTP item");
+				return -rte_errno;
+			}
+
+			if (gtp_mask->v_pt_rsv_flags ||
+			    gtp_mask->msg_type ||
+			    gtp_mask->msg_len ||
+			    gtp_mask->teid != UINT32_MAX) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   item,
+						   "Invalid GTP mask");
+				return -rte_errno;
+			}
+
+			if (item_type == RTE_FLOW_ITEM_TYPE_GTPC)
+				filter->tunnel_type = I40E_TUNNEL_TYPE_GTPC;
+			else if (item_type == RTE_FLOW_ITEM_TYPE_GTPU)
+				filter->tunnel_type = I40E_TUNNEL_TYPE_GTPU;
+
+			filter->tenant_id = rte_be_to_cpu_32(gtp_spec->teid);
+
+			break;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+i40e_flow_parse_gtp_filter(struct rte_eth_dev *dev,
+			   const struct rte_flow_attr *attr,
+			   const struct rte_flow_item pattern[],
+			   const struct rte_flow_action actions[],
+			   struct rte_flow_error *error,
+			   union i40e_filter_t *filter)
+{
+	struct i40e_tunnel_filter_conf *tunnel_filter =
+		&filter->consistent_tunnel_filter;
+	int ret;
+
+	ret = i40e_flow_parse_gtp_pattern(dev, pattern,
+					  error, tunnel_filter);
+	if (ret)
+		return ret;
+
+	ret = i40e_flow_parse_tunnel_action(dev, actions, error, tunnel_filter);
+	if (ret)
+		return ret;
+
+	ret = i40e_flow_parse_attr(attr, error);
+	if (ret)
+		return ret;
+
+	cons_filter_type = RTE_ETH_FILTER_TUNNEL;
+
+	return ret;
+}
+
+/* 1. Last in item should be NULL as range is not supported.
  * 2. Supported filter types: QINQ.
  * 3. Mask of fields which need to be matched should be
  *    filled with 1.
@@ -3877,7 +4202,7 @@ i40e_flow_create(struct rte_eth_dev *dev,
 					i40e_ethertype_filter_list);
 		break;
 	case RTE_ETH_FILTER_FDIR:
-		ret = i40e_add_del_fdir_filter(dev,
+		ret = i40e_flow_add_del_fdir_filter(dev,
 				       &cons_filter.fdir_filter, 1);
 		if (ret)
 			goto free_flow;
@@ -3927,7 +4252,7 @@ i40e_flow_destroy(struct rte_eth_dev *dev,
 			      (struct i40e_tunnel_filter *)flow->rule);
 		break;
 	case RTE_ETH_FILTER_FDIR:
-		ret = i40e_add_del_fdir_filter(dev,
+		ret = i40e_flow_add_del_fdir_filter(dev,
 		       &((struct i40e_fdir_filter *)flow->rule)->fdir, 0);
 		break;
 	default:
@@ -4016,12 +4341,12 @@ i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf,
 		vsi = vf->vsi;
 	}
 
-	if (((filter->input.flags & I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoUDP) ==
-	    I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoUDP) ||
-	    ((filter->input.flags & I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoGRE) ==
-	    I40E_AQC_ADD_CLOUD_FILTER_TEID_MPLSoGRE) ||
-	    ((filter->input.flags & I40E_AQC_ADD_CLOUD_FILTER_CUSTOM_QINQ) ==
-	    I40E_AQC_ADD_CLOUD_FILTER_CUSTOM_QINQ))
+	if (((filter->input.flags & I40E_AQC_ADD_CLOUD_FILTER_0X11) ==
+	    I40E_AQC_ADD_CLOUD_FILTER_0X11) ||
+	    ((filter->input.flags & I40E_AQC_ADD_CLOUD_FILTER_0X12) ==
+	    I40E_AQC_ADD_CLOUD_FILTER_0X12) ||
+	    ((filter->input.flags & I40E_AQC_ADD_CLOUD_FILTER_0X10) ==
+	    I40E_AQC_ADD_CLOUD_FILTER_0X10))
 		big_buffer = 1;
 
 	if (big_buffer)
diff --git a/drivers/net/i40e/i40e_pf.c b/drivers/net/i40e/i40e_pf.c
index 100f8dc2..94bb0cfd 100644
--- a/drivers/net/i40e/i40e_pf.c
+++ b/drivers/net/i40e/i40e_pf.c
@@ -44,7 +44,6 @@
 #include <rte_pci.h>
 #include <rte_ether.h>
 #include <rte_ethdev.h>
-#include <rte_memzone.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 
@@ -538,73 +537,6 @@ send_msg:
 	return ret;
 }
 
-static int
-i40e_pf_host_process_cmd_config_vsi_queues_ext(struct i40e_pf_vf *vf,
-					       uint8_t *msg,
-					       uint16_t msglen,
-					       bool b_op)
-{
-	struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf);
-	struct i40e_vsi *vsi = vf->vsi;
-	struct virtchnl_vsi_queue_config_ext_info *vc_vqcei =
-		(struct virtchnl_vsi_queue_config_ext_info *)msg;
-	struct virtchnl_queue_pair_ext_info *vc_qpei;
-	int i, ret = I40E_SUCCESS;
-
-	if (!b_op) {
-		i40e_pf_host_send_msg_to_vf(
-			vf,
-			VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT,
-			I40E_NOT_SUPPORTED, NULL, 0);
-		return ret;
-	}
-
-	if (!msg || vc_vqcei->num_queue_pairs > vsi->nb_qps ||
-		vc_vqcei->num_queue_pairs > I40E_MAX_VSI_QP ||
-		msglen < I40E_VIRTCHNL_CONFIG_VSI_QUEUES_SIZE(vc_vqcei,
-					vc_vqcei->num_queue_pairs)) {
-		PMD_DRV_LOG(ERR, "vsi_queue_config_ext_info argument wrong");
-		ret = I40E_ERR_PARAM;
-		goto send_msg;
-	}
-
-	vc_qpei = vc_vqcei->qpair;
-	for (i = 0; i < vc_vqcei->num_queue_pairs; i++) {
-		if (vc_qpei[i].rxq.queue_id > vsi->nb_qps - 1 ||
-			vc_qpei[i].txq.queue_id > vsi->nb_qps - 1) {
-			ret = I40E_ERR_PARAM;
-			goto send_msg;
-		}
-		/*
-		 * Apply VF RX queue setting to HMC.
-		 * If the opcode is VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT,
-		 * then the extra information of
-		 * 'struct virtchnl_queue_pair_ext_info' is needed,
-		 * otherwise set the last parameter to NULL.
-		 */
-		if (i40e_pf_host_hmc_config_rxq(hw, vf, &vc_qpei[i].rxq,
-			vc_qpei[i].rxq_ext.crcstrip) != I40E_SUCCESS) {
-			PMD_DRV_LOG(ERR, "Configure RX queue HMC failed");
-			ret = I40E_ERR_PARAM;
-			goto send_msg;
-		}
-
-		/* Apply VF TX queue setting to HMC */
-		if (i40e_pf_host_hmc_config_txq(hw, vf, &vc_qpei[i].txq) !=
-							I40E_SUCCESS) {
-			PMD_DRV_LOG(ERR, "Configure TX queue HMC failed");
-			ret = I40E_ERR_PARAM;
-			goto send_msg;
-		}
-	}
-
-send_msg:
-	i40e_pf_host_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT,
-								ret, NULL, 0);
-
-	return ret;
-}
-
 static void
 i40e_pf_config_irq_link_list(struct i40e_pf_vf *vf,
 			      struct virtchnl_vector_map *vvm)
@@ -714,7 +646,7 @@ i40e_pf_host_process_cmd_config_irq_map(struct i40e_pf_vf *vf,
 	    (struct virtchnl_irq_map_info *)msg;
 	struct virtchnl_vector_map *map;
 	int i;
-	uint16_t vector_id;
+	uint16_t vector_id, itr_idx;
 	unsigned long qbit_max;
 
 	if (!b_op) {
@@ -741,12 +673,13 @@ i40e_pf_host_process_cmd_config_irq_map(struct i40e_pf_vf *vf,
 		vf->vsi->msix_intr = irqmap->vecmap[0].vector_id;
 		vf->vsi->nb_msix = irqmap->num_vectors;
 		vf->vsi->nb_used_qps = vf->vsi->nb_qps;
+		itr_idx = irqmap->vecmap[0].rxitr_idx;
 
 		/* Don't care how the TX/RX queue mapping with this vector.
 		 * Link all VF RX queues together. Only did mapping work.
 		 * VF can disable/enable the intr by itself.
 		 */
-		i40e_vsi_queues_bind_intr(vf->vsi);
+		i40e_vsi_queues_bind_intr(vf->vsi, itr_idx);
 		goto send_msg;
 	}
 
@@ -909,7 +842,7 @@ i40e_pf_host_process_cmd_add_ether_address(struct i40e_pf_vf *vf,
 
 	for (i = 0; i < addr_list->num_elements; i++) {
 		mac = (struct ether_addr *)(addr_list->list[i].addr);
-		(void)rte_memcpy(&filter.mac_addr, mac, ETHER_ADDR_LEN);
+		rte_memcpy(&filter.mac_addr, mac, ETHER_ADDR_LEN);
 		filter.filter_type = RTE_MACVLAN_PERFECT_MATCH;
 		if (is_zero_ether_addr(mac) ||
 		    i40e_vsi_add_mac(vf->vsi, &filter)) {
@@ -1157,42 +1090,13 @@ i40e_pf_host_process_cmd_disable_vlan_strip(struct i40e_pf_vf *vf, bool b_op)
 	return ret;
 }
 
-static int
-i40e_pf_host_process_cmd_cfg_pvid(struct i40e_pf_vf *vf,
-					uint8_t *msg,
-					uint16_t msglen,
-					bool b_op)
-{
-	int ret = I40E_SUCCESS;
-	struct virtchnl_pvid_info  *tpid_info =
-			(struct virtchnl_pvid_info *)msg;
-
-	if (!b_op) {
-		i40e_pf_host_send_msg_to_vf(
-			vf,
-			I40E_VIRTCHNL_OP_CFG_VLAN_PVID,
-			I40E_NOT_SUPPORTED, NULL, 0);
-		return ret;
-	}
-
-	if (msg == NULL || msglen != sizeof(*tpid_info)) {
-		ret = I40E_ERR_PARAM;
-		goto send_msg;
-	}
-
-	ret = i40e_vsi_vlan_pvid_set(vf->vsi, &tpid_info->info);
-
-send_msg:
-	i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_CFG_VLAN_PVID,
-					ret, NULL, 0);
-
-	return ret;
-}
-
 void
 i40e_notify_vf_link_status(struct rte_eth_dev *dev, struct i40e_pf_vf *vf)
 {
+	struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf);
 	struct virtchnl_pf_event event;
+	uint16_t vf_id = vf->vf_idx;
+	uint32_t tval, rval;
 
 	event.event = VIRTCHNL_EVENT_LINK_CHANGE;
 	event.event_data.link_event.link_status =
@@ -1224,8 +1128,15 @@ i40e_notify_vf_link_status(struct rte_eth_dev *dev, struct i40e_pf_vf *vf)
 		break;
 	}
 
-	i40e_pf_host_send_msg_to_vf(vf, VIRTCHNL_OP_EVENT,
-		I40E_SUCCESS, (uint8_t *)&event, sizeof(event));
+	tval = I40E_READ_REG(hw, I40E_VF_ATQLEN(vf_id));
+	rval = I40E_READ_REG(hw, I40E_VF_ARQLEN(vf_id));
+
+	if (tval & I40E_VF_ATQLEN_ATQLEN_MASK ||
+	    tval & I40E_VF_ATQLEN_ATQENABLE_MASK ||
+	    rval & I40E_VF_ARQLEN_ARQLEN_MASK ||
+	    rval & I40E_VF_ARQLEN_ARQENABLE_MASK)
+		i40e_pf_host_send_msg_to_vf(vf, VIRTCHNL_OP_EVENT,
+			I40E_SUCCESS, (uint8_t *)&event, sizeof(event));
 }
 
 void
@@ -1300,11 +1211,6 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
 		i40e_pf_host_process_cmd_config_vsi_queues(vf, msg,
 							   msglen, b_op);
 		break;
-	case VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT:
-		PMD_DRV_LOG(INFO, "OP_CONFIG_VSI_QUEUES_EXT received");
-		i40e_pf_host_process_cmd_config_vsi_queues_ext(vf, msg,
-							       msglen, b_op);
-		break;
 	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
 		PMD_DRV_LOG(INFO, "OP_CONFIG_IRQ_MAP received");
 		i40e_pf_host_process_cmd_config_irq_map(vf, msg, msglen, b_op);
@@ -1359,10 +1265,6 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
 		PMD_DRV_LOG(INFO, "OP_DISABLE_VLAN_STRIPPING received");
 		i40e_pf_host_process_cmd_disable_vlan_strip(vf, b_op);
 		break;
-	case I40E_VIRTCHNL_OP_CFG_VLAN_PVID:
-		PMD_DRV_LOG(INFO, "OP_CFG_VLAN_PVID received");
-		i40e_pf_host_process_cmd_cfg_pvid(vf, msg, msglen, b_op);
-		break;
 	/* Don't add command supported below, which will
 	 * return an error code.
 	 */
diff --git a/drivers/net/i40e/i40e_pf.h b/drivers/net/i40e/i40e_pf.h
index 7afb7eae..04116637 100644
--- a/drivers/net/i40e/i40e_pf.h
+++ b/drivers/net/i40e/i40e_pf.h
@@ -34,58 +34,9 @@
 #ifndef _I40E_PF_H_
 #define _I40E_PF_H_
 
-/* VERSION info to exchange between VF and PF host. In case VF works with
- *  ND kernel driver, it reads VIRTCHNL_VERSION_MAJOR/MINOR. In
- *  case works with DPDK host, it reads version below. Then VF realize who it
- *  is talking to and use proper language to communicate.
- * */
-#define I40E_DPDK_SIGNATURE     ('D' << 24 | 'P' << 16 | 'D' << 8 | 'K')
-#define I40E_DPDK_VERSION_MAJOR I40E_DPDK_SIGNATURE
-#define I40E_DPDK_VERSION_MINOR 0
-
 /* Default setting on number of VSIs that VF can contain */
 #define I40E_DEFAULT_VF_VSI_NUM 1
 
-#define I40E_DPDK_OFFSET  0x100
-
-/* DPDK pf driver specific command to VF */
-enum virtchnl_ops_dpdk {
-	/*
-	 * Keep some gap between Linux PF commands and
-	 * DPDK PF extended commands.
-	 */
-	I40E_VIRTCHNL_OP_CFG_VLAN_PVID = VIRTCHNL_OP_VERSION +
-					 I40E_DPDK_OFFSET,
-	VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT,
-};
-
-/* A structure to support extended info of a receive queue. */
-struct virtchnl_rxq_ext_info {
-	uint8_t crcstrip;
-};
-
-/*
- * A structure to support extended info of queue pairs, an additional field
- * is added, comparing to original 'struct virtchnl_queue_pair_info'.
- */
-struct virtchnl_queue_pair_ext_info {
-	/* vsi_id and queue_id should be identical for both rx and tx queues.*/
-	struct virtchnl_txq_info txq;
-	struct virtchnl_rxq_info rxq;
-	struct virtchnl_rxq_ext_info rxq_ext;
-};
-
-/*
- * A structure to support extended info of VSI queue pairs,
- * 'struct virtchnl_queue_pair_ext_info' is used, see its original
- * of 'struct virtchnl_queue_pair_info'.
- */
-struct virtchnl_vsi_queue_config_ext_info {
-	uint16_t vsi_id;
-	uint16_t num_queue_pairs;
-	struct virtchnl_queue_pair_ext_info qpair[0];
-};
-
 struct virtchnl_vlan_offload_info {
 	uint16_t vsi_id;
 	uint8_t enable_vlan_strip;
@@ -99,17 +50,6 @@ struct virtchnl_vlan_offload_info {
 #define I40E_VIRTCHNL_CONFIG_VSI_QUEUES_SIZE(x, n) \
 	(sizeof(*(x)) + sizeof((x)->qpair[0]) * (n))
 
-/*
- * I40E_VIRTCHNL_OP_CFG_VLAN_PVID
- * VF sends this message to enable/disable pvid. If it's
- * enable op, needs to specify the pvid. PF returns status
- * code in retval.
- */
-struct virtchnl_pvid_info {
-	uint16_t vsi_id;
-	struct i40e_vsi_vlan_pvid_info info;
-};
-
 int i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset);
 void i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
 				uint16_t abs_vf_id, uint32_t opcode,
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index d42c23c0..8b4f612f 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -108,7 +108,7 @@ i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
 {
 	if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 		(1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
-		mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
+		mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
 		mb->vlan_tci =
 			rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1);
 		PMD_RX_LOG(DEBUG, "Descriptor l2tag1: %u",
@@ -589,7 +589,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 		mb->nb_segs = 1;
 		mb->port = rxq->port_id;
 		dma_addr = rte_cpu_to_le_64(\
-			rte_mbuf_data_dma_addr_default(mb));
+			rte_mbuf_data_iova_default(mb));
 		rxdp[i].read.hdr_addr = 0;
 		rxdp[i].read.pkt_addr = dma_addr;
 	}
@@ -752,7 +752,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rxm = rxe->mbuf;
 		rxe->mbuf = nmb;
 		dma_addr =
-			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
 		rxdp->read.pkt_addr = dma_addr;
 
@@ -869,7 +869,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
 		rxm = rxe->mbuf;
 		rxe->mbuf = nmb;
 		dma_addr =
-			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 
 		/* Set data buffer address and data length of the mbuf */
 		rxdp->read.hdr_addr = 0;
@@ -1202,7 +1202,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
 			/* Setup TX Descriptor */
 			slen = m_seg->data_len;
-			buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
+			buf_dma_addr = rte_mbuf_data_iova(m_seg);
 
 			PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]:\n"
 				"buf_dma_addr: %#"PRIx64";\n"
@@ -1301,7 +1301,7 @@ tx4(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkts)
 	uint32_t i;
 
 	for (i = 0; i < 4; i++, txdp++, pkts++) {
-		dma_addr = rte_mbuf_data_dma_addr(*pkts);
+		dma_addr = rte_mbuf_data_iova(*pkts);
 		txdp->buffer_addr = rte_cpu_to_le_64(dma_addr);
 		txdp->cmd_type_offset_bsz =
 			i40e_build_ctob((uint32_t)I40E_TD_CMD, 0,
@@ -1315,7 +1315,7 @@ tx1(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkts)
 {
 	uint64_t dma_addr;
 
-	dma_addr = rte_mbuf_data_dma_addr(*pkts);
+	dma_addr = rte_mbuf_data_iova(*pkts);
 	txdp->buffer_addr = rte_cpu_to_le_64(dma_addr);
 	txdp->cmd_type_offset_bsz =
 		i40e_build_ctob((uint32_t)I40E_TD_CMD, 0,
@@ -1734,36 +1734,42 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			const struct rte_eth_rxconf *rx_conf,
 			struct rte_mempool *mp)
 {
-	struct i40e_vsi *vsi;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	struct i40e_adapter *ad =
 		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct i40e_vsi *vsi;
+	struct i40e_pf *pf = NULL;
+	struct i40e_vf *vf = NULL;
 	struct i40e_rx_queue *rxq;
 	const struct rte_memzone *rz;
 	uint32_t ring_size;
 	uint16_t len, i;
-	uint16_t base, bsf, tc_mapping;
-	int use_def_burst_func = 1;
+	uint16_t reg_idx, base, bsf, tc_mapping;
+	int q_offset, use_def_burst_func = 1;
 
 	if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) {
-		struct i40e_vf *vf =
-			I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+		vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 		vsi = &vf->vsi;
-	} else
+		if (!vsi)
+			return -EINVAL;
+		reg_idx = queue_idx;
+	} else {
+		pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 		vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
-
-	if (vsi == NULL) {
-		PMD_DRV_LOG(ERR, "VSI not available or queue "
-			    "index exceeds the maximum");
-		return I40E_ERR_PARAM;
+		if (!vsi)
+			return -EINVAL;
+		q_offset = i40e_get_queue_offset_by_qindex(pf, queue_idx);
+		if (q_offset < 0)
+			return -EINVAL;
+		reg_idx = vsi->base_queue + q_offset;
 	}
+
 	if (nb_desc % I40E_ALIGN_RING_DESC != 0 ||
-			(nb_desc > I40E_MAX_RING_DESC) ||
-			(nb_desc < I40E_MIN_RING_DESC)) {
+	    (nb_desc > I40E_MAX_RING_DESC) ||
+	    (nb_desc < I40E_MIN_RING_DESC)) {
 		PMD_DRV_LOG(ERR, "Number (%u) of receive descriptors is "
 			    "invalid", nb_desc);
-		return I40E_ERR_PARAM;
+		return -EINVAL;
 	}
 
 	/* Free memory if needed */
@@ -1786,12 +1792,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->nb_rx_desc = nb_desc;
 	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
 	rxq->queue_id = queue_idx;
-	if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF)
-		rxq->reg_idx = queue_idx;
-	else /* PF device */
-		rxq->reg_idx = vsi->base_queue +
-			i40e_get_queue_offset_by_qindex(pf, queue_idx);
-
+	rxq->reg_idx = reg_idx;
 	rxq->port_id = dev->data->port_id;
 	rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
 							0 : ETHER_CRC_LEN);
@@ -1822,7 +1823,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	/* Zero all the descriptors in the ring. */
 	memset(rz->addr, 0, ring_size);
 
-	rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
 
 	len = (uint16_t)(nb_desc + RTE_PMD_I40E_RX_MAX_BURST);
@@ -2012,34 +2013,40 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
 			unsigned int socket_id,
 			const struct rte_eth_txconf *tx_conf)
 {
-	struct i40e_vsi *vsi;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	struct i40e_vsi *vsi;
+	struct i40e_pf *pf = NULL;
+	struct i40e_vf *vf = NULL;
 	struct i40e_tx_queue *txq;
 	const struct rte_memzone *tz;
 	uint32_t ring_size;
 	uint16_t tx_rs_thresh, tx_free_thresh;
-	uint16_t i, base, bsf, tc_mapping;
+	uint16_t reg_idx, i, base, bsf, tc_mapping;
+	int q_offset;
 
 	if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) {
-		struct i40e_vf *vf =
-			I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+		vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 		vsi = &vf->vsi;
-	} else
+		if (!vsi)
+			return -EINVAL;
+		reg_idx = queue_idx;
+	} else {
+		pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 		vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
-
-	if (vsi == NULL) {
-		PMD_DRV_LOG(ERR, "VSI is NULL, or queue index (%u) "
-			    "exceeds the maximum", queue_idx);
-		return I40E_ERR_PARAM;
+		if (!vsi)
+			return -EINVAL;
+		q_offset = i40e_get_queue_offset_by_qindex(pf, queue_idx);
+		if (q_offset < 0)
+			return -EINVAL;
+		reg_idx = vsi->base_queue + q_offset;
 	}
 
 	if (nb_desc % I40E_ALIGN_RING_DESC != 0 ||
-			(nb_desc > I40E_MAX_RING_DESC) ||
-			(nb_desc < I40E_MIN_RING_DESC)) {
+	    (nb_desc > I40E_MAX_RING_DESC) ||
+	    (nb_desc < I40E_MIN_RING_DESC)) {
 		PMD_DRV_LOG(ERR, "Number (%u) of transmit descriptors is "
 			    "invalid", nb_desc);
-		return I40E_ERR_PARAM;
+		return -EINVAL;
 	}
 
 	/**
@@ -2148,18 +2155,13 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
 	txq->hthresh = tx_conf->tx_thresh.hthresh;
 	txq->wthresh = tx_conf->tx_thresh.wthresh;
 	txq->queue_id = queue_idx;
-	if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF)
-		txq->reg_idx = queue_idx;
-	else /* PF device */
-		txq->reg_idx = vsi->base_queue +
-			i40e_get_queue_offset_by_qindex(pf, queue_idx);
-
+	txq->reg_idx = reg_idx;
 	txq->port_id = dev->data->port_id;
 	txq->txq_flags = tx_conf->txq_flags;
 	txq->vsi = vsi;
 	txq->tx_deferred_start = tx_conf->tx_deferred_start;
 
-	txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+	txq->tx_ring_phys_addr = tz->iova;
 	txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
 
 	/* Allocate software ring */
@@ -2221,12 +2223,8 @@ i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
 	if (mz)
 		return mz;
 
-	if (rte_xen_dom0_supported())
-		mz = rte_memzone_reserve_bounded(name, len,
-				socket_id, 0, I40E_RING_BASE_ALIGN, RTE_PGSIZE_2M);
-	else
-		mz = rte_memzone_reserve_aligned(name, len,
-				socket_id, 0, I40E_RING_BASE_ALIGN);
+	mz = rte_memzone_reserve_aligned(name, len,
+					 socket_id, 0, I40E_RING_BASE_ALIGN);
 	return mz;
 }
 
@@ -2307,18 +2305,40 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
 void
 i40e_tx_queue_release_mbufs(struct i40e_tx_queue *txq)
 {
+	struct rte_eth_dev *dev;
 	uint16_t i;
 
+	dev = &rte_eth_devices[txq->port_id];
+
 	if (!txq || !txq->sw_ring) {
 		PMD_DRV_LOG(DEBUG, "Pointer to rxq or sw_ring is NULL");
 		return;
 	}
 
-	for (i = 0; i < txq->nb_tx_desc; i++) {
-		if (txq->sw_ring[i].mbuf) {
+	/**
+	 *  vPMD tx will not set sw_ring's mbuf to NULL after free,
+	 *  so need to free remains more carefully.
+	 */
+	if (dev->tx_pkt_burst == i40e_xmit_pkts_vec) {
+		i = txq->tx_next_dd - txq->tx_rs_thresh + 1;
+		if (txq->tx_tail < i) {
+			for (; i < txq->nb_tx_desc; i++) {
+				rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
+				txq->sw_ring[i].mbuf = NULL;
+			}
+			i = 0;
+		}
+		for (; i < txq->tx_tail; i++) {
 			rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
 			txq->sw_ring[i].mbuf = NULL;
 		}
+	} else {
+		for (i = 0; i < txq->nb_tx_desc; i++) {
+			if (txq->sw_ring[i].mbuf) {
+				rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
+				txq->sw_ring[i].mbuf = NULL;
+			}
+		}
 	}
 }
 
@@ -2431,7 +2451,7 @@ i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
 		mbuf->port = rxq->port_id;
 
 		dma_addr =
-			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 
 		rxd = &rxq->rx_ring[i];
 		rxd->read.pkt_addr = dma_addr;
@@ -2675,7 +2695,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
 	txq->reg_idx = pf->fdir.fdir_vsi->base_queue;
 	txq->vsi = pf->fdir.fdir_vsi;
 
-	txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+	txq->tx_ring_phys_addr = tz->iova;
 	txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
 	/*
 	 * don't need to allocate software ring and reset for the fdir
@@ -2731,7 +2751,7 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
 	rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
 	rxq->vsi = pf->fdir.fdir_vsi;
 
-	rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
 
 	/*
@@ -2941,6 +2961,64 @@ i40e_set_default_ptype_table(struct rte_eth_dev *dev)
 		ad->ptype_tbl[i] = i40e_get_default_pkt_type(i);
 }
 
+void __attribute__((cold))
+i40e_set_default_pctype_table(struct rte_eth_dev *dev)
+{
+	struct i40e_adapter *ad =
+			I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int i;
+
+	for (i = 0; i < I40E_FLOW_TYPE_MAX; i++)
+		ad->pctypes_tbl[i] = 0ULL;
+	ad->flow_types_mask = 0ULL;
+	ad->pctypes_mask = 0ULL;
+
+	ad->pctypes_tbl[RTE_ETH_FLOW_FRAG_IPV4] =
+				(1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4);
+	ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_UDP] =
+				(1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP);
+	ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_TCP] =
+				(1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
+	ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_SCTP] =
+				(1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP);
+	ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_OTHER] =
+				(1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER);
+	ad->pctypes_tbl[RTE_ETH_FLOW_FRAG_IPV6] =
+				(1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6);
+	ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_UDP] =
+				(1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP);
+	ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_TCP] =
+				(1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
+	ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_SCTP] =
+				(1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP);
+	ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_OTHER] =
+				(1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER);
+	ad->pctypes_tbl[RTE_ETH_FLOW_L2_PAYLOAD] =
+				(1ULL << I40E_FILTER_PCTYPE_L2_PAYLOAD);
+
+	if (hw->mac.type == I40E_MAC_X722) {
+		ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_UDP] |=
+			(1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP);
+		ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_UDP] |=
+			(1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
+		ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_TCP] |=
+			(1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
+		ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_UDP] |=
+			(1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP);
+		ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_UDP] |=
+			(1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
+		ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_TCP] |=
+			(1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
+	}
+
+	for (i = 0; i < I40E_FLOW_TYPE_MAX; i++) {
+		if (ad->pctypes_tbl[i])
+			ad->flow_types_mask |= (1ULL << i);
+		ad->pctypes_mask |= ad->pctypes_tbl[i];
+	}
+}
+
 /* Stubs needed for linkage when CONFIG_RTE_I40E_INC_VECTOR is set to 'n' */
 int __attribute__((weak))
 i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 20084d64..06c6a659 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -121,7 +121,7 @@ struct i40e_rx_queue {
 	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
 	uint64_t mbuf_initializer; /**< value to init mbufs */
 
-	uint8_t port_id; /**< device port ID */
+	uint16_t port_id; /**< device port ID */
 	uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise */
 	uint16_t queue_id; /**< RX queue index */
 	uint16_t reg_idx; /**< RX queue register index */
@@ -167,7 +167,7 @@ struct i40e_tx_queue {
 	uint8_t pthresh; /**< Prefetch threshold register. */
 	uint8_t hthresh; /**< Host threshold register. */
 	uint8_t wthresh; /**< Write-back threshold reg. */
-	uint8_t port_id; /**< Device port identifier. */
+	uint16_t port_id; /**< Device port identifier. */
 	uint16_t queue_id; /**< TX queue index. */
 	uint16_t reg_idx;
 	uint32_t txq_flags;
@@ -255,6 +255,7 @@ void i40e_set_tx_function_flag(struct rte_eth_dev *dev,
 			       struct i40e_tx_queue *txq);
 void i40e_set_tx_function(struct rte_eth_dev *dev);
 void i40e_set_default_ptype_table(struct rte_eth_dev *dev);
+void i40e_set_default_pctype_table(struct rte_eth_dev *dev);
 
 /* For each value it means, datasheet of hardware can tell more details
  *
diff --git a/drivers/net/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
index f4036ea2..5e4e472a 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_altivec.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_altivec.c
@@ -100,7 +100,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		p1 = (uintptr_t)&mb1->rearm_data;
 		*(uint64_t *)p1 = rxq->mbuf_initializer;
 
-		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 		vaddr0 = vec_ld(0, (vector unsigned long *)&mb0->buf_addr);
 		vaddr1 = vec_ld(0, (vector unsigned long *)&mb1->buf_addr);
 
@@ -146,7 +146,7 @@ desc_to_olflags_v(vector unsigned long descs[4], struct rte_mbuf **rx_pkts)
 	/* map rss and vlan type to rss hash and vlan flag */
 	const vector unsigned char vlan_flags = (vector unsigned char){
 			0, 0, 0, 0,
-			PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED, 0, 0, 0,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0, 0, 0,
 			0, 0, 0, 0,
 			0, 0, 0, 0};
 
@@ -538,7 +538,7 @@ vtx1(volatile struct i40e_tx_desc *txdp,
 		((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
 
 	vector unsigned long descriptor = (vector unsigned long){
-		pkt->buf_physaddr + pkt->data_off, high_qw};
+		pkt->buf_iova + pkt->data_off, high_qw};
 	*(vector unsigned long *)txdp = descriptor;
 }
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 694e91f3..b5685e2b 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -81,13 +81,13 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		mb0 = rxep[0].mbuf;
 		mb1 = rxep[1].mbuf;
 
-		paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
 		dma_addr0 = vdupq_n_u64(paddr);
 
 		/* flush desc with pa dma_addr */
 		vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
 
-		paddr = mb1->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
 		dma_addr1 = vdupq_n_u64(paddr);
 		vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
 	}
@@ -137,7 +137,7 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq, uint64x2_t descs[4],
 	/* map rss and vlan type to rss hash and vlan flag */
 	const uint8x16_t vlan_flags = {
 			0, 0, 0, 0,
-			PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED, 0, 0, 0,
+			PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0, 0, 0,
 			0, 0, 0, 0,
 			0, 0, 0, 0};
 
@@ -197,8 +197,7 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq, uint64x2_t descs[4],
 }
 
 #define PKTLEN_SHIFT     10
-
-#define I40E_VPMD_DESC_DD_MASK	0x0001000100010001ULL
+#define I40E_UINT16_BIT (CHAR_BIT * sizeof(uint16_t))
 
 static inline void
 desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts,
@@ -230,7 +229,6 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	struct i40e_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
 	int pos;
-	uint64_t var;
 	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 
 	/* mask to shuffle from desc. to mbuf */
@@ -364,7 +362,6 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		/* C.2 get 4 pkts staterr value  */
 		staterr = vzipq_u16(sterr_tmp1.val[1],
 				    sterr_tmp2.val[1]).val[0];
-		stat = vgetq_lane_u64(vreinterpretq_u64_u16(staterr), 0);
 
 		desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
 
@@ -429,6 +426,12 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			rx_pkts[pos + 3]->next = NULL;
 		}
 
+		staterr = vshlq_n_u16(staterr, I40E_UINT16_BIT - 1);
+		staterr = vreinterpretq_u16_s16(
+				vshrq_n_s16(vreinterpretq_s16_u16(staterr),
+					    I40E_UINT16_BIT - 1));
+		stat = ~vgetq_lane_u64(vreinterpretq_u64_u16(staterr), 0);
+
 		rte_prefetch_non_temporal(rxdp + RTE_I40E_DESCS_PER_LOOP);
 
 		/* D.3 copy final 1,2 data to rx_pkts */
@@ -438,10 +441,12 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 			 pkt_mb1);
 		desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
 		/* C.4 calc avaialbe number of desc */
-		var = __builtin_popcountll(stat & I40E_VPMD_DESC_DD_MASK);
-		nb_pkts_recd += var;
-		if (likely(var != RTE_I40E_DESCS_PER_LOOP))
+		if (unlikely(stat == 0)) {
+			nb_pkts_recd += RTE_I40E_DESCS_PER_LOOP;
+		} else {
+			nb_pkts_recd += __builtin_ctzl(stat) / I40E_UINT16_BIT;
 			break;
+		}
 	}
 
 	/* Update our internal tail pointer */
@@ -515,7 +520,7 @@ vtx1(volatile struct i40e_tx_desc *txdp,
 			((uint64_t)flags  << I40E_TXD_QW1_CMD_SHIFT) |
 			((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
 
-	uint64x2_t descriptor = {pkt->buf_physaddr + pkt->data_off, high_qw};
+	uint64x2_t descriptor = {pkt->buf_iova + pkt->data_off, high_qw};
 	vst1q_u64((uint64_t *)txdp, descriptor);
 }
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index 779f14e5..9d2d1f83 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -86,8 +86,8 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		mb0 = rxep[0].mbuf;
 		mb1 = rxep[1].mbuf;
 
-		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_physaddr) !=
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
 				offsetof(struct rte_mbuf, buf_addr) + 8);
 		vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
 		vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
@@ -151,7 +151,7 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq, __m128i descs[4],
 	/* map rss and vlan type to rss hash and vlan flag */
 	const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0,
 			0, 0, 0, 0,
-			0, 0, 0, PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
+			0, 0, 0, PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
 			0, 0, 0, 0);
 
 	const __m128i rss_flags = _mm_set_epi8(0, 0, 0, 0,
@@ -549,7 +549,7 @@ vtx1(volatile struct i40e_tx_desc *txdp,
 			((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT));
 
 	__m128i descriptor = _mm_set_epi64x(high_qw,
-				pkt->buf_physaddr + pkt->data_off);
+				pkt->buf_iova + pkt->data_off);
 	_mm_store_si128((__m128i *)txdp, descriptor);
 }
 
diff --git a/drivers/net/i40e/i40e_tm.c b/drivers/net/i40e/i40e_tm.c
index d90313af..44316f64 100644
--- a/drivers/net/i40e/i40e_tm.c
+++ b/drivers/net/i40e/i40e_tm.c
@@ -302,7 +302,7 @@ i40e_shaper_profile_add(struct rte_eth_dev *dev,
 	if (!shaper_profile)
 		return -ENOMEM;
 	shaper_profile->shaper_profile_id = shaper_profile_id;
-	(void)rte_memcpy(&shaper_profile->profile, profile,
+	rte_memcpy(&shaper_profile->profile, profile,
 			 sizeof(struct rte_tm_shaper_params));
 	TAILQ_INSERT_TAIL(&pf->tm_conf.shaper_profile_list,
 			  shaper_profile, node);
@@ -374,11 +374,13 @@ i40e_tm_node_search(struct rte_eth_dev *dev,
 }
 
 static int
-i40e_node_param_check(uint32_t node_id, uint32_t parent_node_id,
+i40e_node_param_check(struct rte_eth_dev *dev, uint32_t node_id,
 		      uint32_t priority, uint32_t weight,
 		      struct rte_tm_node_params *params,
 		      struct rte_tm_error *error)
 {
+	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
 	if (node_id == RTE_TM_NODE_ID_NULL) {
 		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
 		error->message = "invalid node id";
@@ -409,8 +411,8 @@ i40e_node_param_check(uint32_t node_id, uint32_t parent_node_id,
 		return -EINVAL;
 	}
 
-	/* for root node */
-	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+	/* for non-leaf node */
+	if (node_id >= hw->func_caps.num_tx_qp) {
 		if (params->nonleaf.wfq_weight_mode) {
 			error->type =
 				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
@@ -433,7 +435,7 @@ i40e_node_param_check(uint32_t node_id, uint32_t parent_node_id,
 		return 0;
 	}
 
-	/* for TC or queue node */
+	/* for leaf node */
 	if (params->leaf.cman) {
 		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
 		error->message = "Congestion management not supported";
@@ -478,7 +480,7 @@ i40e_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
 	enum i40e_tm_node_type node_type = I40E_TM_NODE_TYPE_MAX;
 	enum i40e_tm_node_type parent_node_type = I40E_TM_NODE_TYPE_MAX;
-	struct i40e_tm_shaper_profile *shaper_profile;
+	struct i40e_tm_shaper_profile *shaper_profile = NULL;
 	struct i40e_tm_node *tm_node;
 	struct i40e_tm_node *parent_node;
 	uint16_t tc_nb = 0;
@@ -494,7 +496,7 @@ i40e_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 		return -EINVAL;
 	}
 
-	ret = i40e_node_param_check(node_id, parent_node_id, priority, weight,
+	ret = i40e_node_param_check(dev, node_id, priority, weight,
 				    params, error);
 	if (ret)
 		return ret;
@@ -507,12 +509,15 @@ i40e_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 	}
 
 	/* check the shaper profile id */
-	shaper_profile = i40e_shaper_profile_search(dev,
-						    params->shaper_profile_id);
-	if (!shaper_profile) {
-		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
-		error->message = "shaper profile not exist";
-		return -EINVAL;
+	if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE) {
+		shaper_profile = i40e_shaper_profile_search(
+					dev, params->shaper_profile_id);
+		if (!shaper_profile) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+			error->message = "shaper profile not exist";
+			return -EINVAL;
+		}
 	}
 
 	/* root node if not have a parent */
@@ -544,12 +549,13 @@ i40e_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 		tm_node->reference_count = 0;
 		tm_node->parent = NULL;
 		tm_node->shaper_profile = shaper_profile;
-		(void)rte_memcpy(&tm_node->params, params,
+		rte_memcpy(&tm_node->params, params,
 				 sizeof(struct rte_tm_node_params));
 		pf->tm_conf.root = tm_node;
 
 		/* increase the reference counter of the shaper profile */
-		shaper_profile->reference_count++;
+		if (shaper_profile)
+			shaper_profile->reference_count++;
 
 		return 0;
 	}
@@ -615,9 +621,9 @@ i40e_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 	tm_node->priority = priority;
 	tm_node->weight = weight;
 	tm_node->reference_count = 0;
-	tm_node->parent = pf->tm_conf.root;
+	tm_node->parent = parent_node;
 	tm_node->shaper_profile = shaper_profile;
-	(void)rte_memcpy(&tm_node->params, params,
+	rte_memcpy(&tm_node->params, params,
 			 sizeof(struct rte_tm_node_params));
 	if (parent_node_type == I40E_TM_NODE_TYPE_PORT) {
 		TAILQ_INSERT_TAIL(&pf->tm_conf.tc_list,
@@ -631,7 +637,8 @@ i40e_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 	tm_node->parent->reference_count++;
 
 	/* increase the reference counter of the shaper profile */
-	shaper_profile->reference_count++;
+	if (shaper_profile)
+		shaper_profile->reference_count++;
 
 	return 0;
 }
@@ -678,14 +685,16 @@ i40e_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
 
 	/* root node */
 	if (node_type == I40E_TM_NODE_TYPE_PORT) {
-		tm_node->shaper_profile->reference_count--;
+		if (tm_node->shaper_profile)
+			tm_node->shaper_profile->reference_count--;
 		rte_free(tm_node);
 		pf->tm_conf.root = NULL;
 		return 0;
 	}
 
 	/* TC or queue node */
-	tm_node->shaper_profile->reference_count--;
+	if (tm_node->shaper_profile)
+		tm_node->shaper_profile->reference_count--;
 	tm_node->parent->reference_count--;
 	if (node_type == I40E_TM_NODE_TYPE_TC) {
 		TAILQ_REMOVE(&pf->tm_conf.tc_list, tm_node, node);
@@ -753,15 +762,34 @@ i40e_level_capabilities_get(struct rte_eth_dev *dev,
 		cap->n_nodes_max = 1;
 		cap->n_nodes_nonleaf_max = 1;
 		cap->n_nodes_leaf_max = 0;
-		cap->non_leaf_nodes_identical = true;
-		cap->leaf_nodes_identical = true;
+	} else if (level_id == I40E_TM_NODE_TYPE_TC) {
+		/* TC */
+		cap->n_nodes_max = I40E_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_nonleaf_max = I40E_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_leaf_max = 0;
+	} else {
+		/* queue */
+		cap->n_nodes_max = hw->func_caps.num_tx_qp;
+		cap->n_nodes_nonleaf_max = 0;
+		cap->n_nodes_leaf_max = hw->func_caps.num_tx_qp;
+	}
+
+	cap->non_leaf_nodes_identical = true;
+	cap->leaf_nodes_identical = true;
+
+	if (level_id != I40E_TM_NODE_TYPE_QUEUE) {
 		cap->nonleaf.shaper_private_supported = true;
 		cap->nonleaf.shaper_private_dual_rate_supported = false;
 		cap->nonleaf.shaper_private_rate_min = 0;
 		/* 40Gbps -> 5GBps */
 		cap->nonleaf.shaper_private_rate_max = 5000000000ull;
 		cap->nonleaf.shaper_shared_n_max = 0;
-		cap->nonleaf.sched_n_children_max = I40E_MAX_TRAFFIC_CLASS;
+		if (level_id == I40E_TM_NODE_TYPE_PORT)
+			cap->nonleaf.sched_n_children_max =
+				I40E_MAX_TRAFFIC_CLASS;
+		else
+			cap->nonleaf.sched_n_children_max =
+				hw->func_caps.num_tx_qp;
 		cap->nonleaf.sched_sp_n_priorities_max = 1;
 		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
 		cap->nonleaf.sched_wfq_n_groups_max = 0;
@@ -771,21 +799,7 @@ i40e_level_capabilities_get(struct rte_eth_dev *dev,
 		return 0;
 	}
 
-	/* TC or queue node */
-	if (level_id == I40E_TM_NODE_TYPE_TC) {
-		/* TC */
-		cap->n_nodes_max = I40E_MAX_TRAFFIC_CLASS;
-		cap->n_nodes_nonleaf_max = I40E_MAX_TRAFFIC_CLASS;
-		cap->n_nodes_leaf_max = 0;
-		cap->non_leaf_nodes_identical = true;
-	} else {
-		/* queue */
-		cap->n_nodes_max = hw->func_caps.num_tx_qp;
-		cap->n_nodes_nonleaf_max = 0;
-		cap->n_nodes_leaf_max = hw->func_caps.num_tx_qp;
-		cap->non_leaf_nodes_identical = true;
-	}
-	cap->leaf_nodes_identical = true;
+	/* queue node */
 	cap->leaf.shaper_private_supported = true;
 	cap->leaf.shaper_private_dual_rate_supported = false;
 	cap->leaf.shaper_private_rate_min = 0;
@@ -888,11 +902,15 @@ i40e_hierarchy_commit(struct rte_eth_dev *dev,
 	 * If the port has a max bandwidth, the TCs should have none.
 	 */
 	/* port */
-	bw = pf->tm_conf.root->shaper_profile->profile.peak.rate;
+	if (pf->tm_conf.root->shaper_profile)
+		bw = pf->tm_conf.root->shaper_profile->profile.peak.rate;
+	else
+		bw = 0;
 	if (bw) {
 		/* check if any TC has a max bandwidth */
 		TAILQ_FOREACH(tm_node, tc_list, node) {
-			if (tm_node->shaper_profile->profile.peak.rate) {
+			if (tm_node->shaper_profile &&
+			    tm_node->shaper_profile->profile.peak.rate) {
 				error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE;
 				error->message = "no port and TC max bandwidth"
 						 " in parallel";
@@ -936,7 +954,10 @@ i40e_hierarchy_commit(struct rte_eth_dev *dev,
 		}
 		tc_map &= ~BIT_ULL(i);
 
-		bw = tm_node->shaper_profile->profile.peak.rate;
+		if (tm_node->shaper_profile)
+			bw = tm_node->shaper_profile->profile.peak.rate;
+		else
+			bw = 0;
 		if (!bw)
 			continue;
 
@@ -947,7 +968,10 @@ i40e_hierarchy_commit(struct rte_eth_dev *dev,
 	}
 
 	TAILQ_FOREACH(tm_node, queue_list, node) {
-		bw = tm_node->shaper_profile->profile.peak.rate;
+		if (tm_node->shaper_profile)
+			bw = tm_node->shaper_profile->profile.peak.rate;
+		else
+			bw = 0;
 		if (bw) {
 			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
 			error->message = "not support queue QoS";
diff --git a/drivers/net/i40e/rte_pmd_i40e.c b/drivers/net/i40e/rte_pmd_i40e.c
index f12b7f4a..aeb92af3 100644
--- a/drivers/net/i40e/rte_pmd_i40e.c
+++ b/drivers/net/i40e/rte_pmd_i40e.c
@@ -35,13 +35,14 @@
 #include <rte_tailq.h>
 
 #include "base/i40e_prototype.h"
+#include "base/i40e_dcb.h"
 #include "i40e_ethdev.h"
 #include "i40e_pf.h"
 #include "i40e_rxtx.h"
 #include "rte_pmd_i40e.h"
 
 int
-rte_pmd_i40e_ping_vfs(uint8_t port, uint16_t vf)
+rte_pmd_i40e_ping_vfs(uint16_t port, uint16_t vf)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_pf *pf;
@@ -66,7 +67,7 @@ rte_pmd_i40e_ping_vfs(uint8_t port, uint16_t vf)
 }
 
 int
-rte_pmd_i40e_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf_id, uint8_t on)
+rte_pmd_i40e_set_vf_mac_anti_spoof(uint16_t port, uint16_t vf_id, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_pf *pf;
@@ -117,7 +118,7 @@ rte_pmd_i40e_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf_id, uint8_t on)
 		vsi->info.sec_flags &= ~I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK;
 
 	memset(&ctxt, 0, sizeof(ctxt));
-	(void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+	rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
 	ctxt.seid = vsi->seid;
 
 	hw = I40E_VSI_TO_HW(vsi);
@@ -170,7 +171,7 @@ i40e_add_rm_all_vlan_filter(struct i40e_vsi *vsi, uint8_t add)
 }
 
 int
-rte_pmd_i40e_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf_id, uint8_t on)
+rte_pmd_i40e_set_vf_vlan_anti_spoof(uint16_t port, uint16_t vf_id, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_pf *pf;
@@ -219,7 +220,7 @@ rte_pmd_i40e_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf_id, uint8_t on)
 		vsi->info.sec_flags &= ~I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK;
 
 	memset(&ctxt, 0, sizeof(ctxt));
-	(void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+	rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
 	ctxt.seid = vsi->seid;
 
 	hw = I40E_VSI_TO_HW(vsi);
@@ -264,7 +265,7 @@ i40e_vsi_rm_mac_filter(struct i40e_vsi *vsi)
 
 		for (i = 0; i < vlan_num; i++) {
 			mv_f[i].filter_type = filter_type;
-			(void)rte_memcpy(&mv_f[i].macaddr,
+			rte_memcpy(&mv_f[i].macaddr,
 					 &f->mac_info.mac_addr,
 					 ETH_ADDR_LEN);
 		}
@@ -325,7 +326,7 @@ i40e_vsi_restore_mac_filter(struct i40e_vsi *vsi)
 
 		for (i = 0; i < vlan_num; i++) {
 			mv_f[i].filter_type = f->mac_info.filter_type;
-			(void)rte_memcpy(&mv_f[i].macaddr,
+			rte_memcpy(&mv_f[i].macaddr,
 					 &f->mac_info.mac_addr,
 					 ETH_ADDR_LEN);
 		}
@@ -407,7 +408,7 @@ i40e_vsi_set_tx_loopback(struct i40e_vsi *vsi, uint8_t on)
 		vsi->info.switch_id &= ~I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB;
 
 	memset(&ctxt, 0, sizeof(ctxt));
-	(void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+	rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
 	ctxt.seid = vsi->seid;
 
 	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
@@ -430,7 +431,7 @@ i40e_vsi_set_tx_loopback(struct i40e_vsi *vsi, uint8_t on)
 }
 
 int
-rte_pmd_i40e_set_tx_loopback(uint8_t port, uint8_t on)
+rte_pmd_i40e_set_tx_loopback(uint16_t port, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_pf *pf;
@@ -473,7 +474,7 @@ rte_pmd_i40e_set_tx_loopback(uint8_t port, uint8_t on)
 }
 
 int
-rte_pmd_i40e_set_vf_unicast_promisc(uint8_t port, uint16_t vf_id, uint8_t on)
+rte_pmd_i40e_set_vf_unicast_promisc(uint16_t port, uint16_t vf_id, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_pf *pf;
@@ -514,7 +515,7 @@ rte_pmd_i40e_set_vf_unicast_promisc(uint8_t port, uint16_t vf_id, uint8_t on)
 }
 
 int
-rte_pmd_i40e_set_vf_multicast_promisc(uint8_t port, uint16_t vf_id, uint8_t on)
+rte_pmd_i40e_set_vf_multicast_promisc(uint16_t port, uint16_t vf_id, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_pf *pf;
@@ -555,7 +556,7 @@ rte_pmd_i40e_set_vf_multicast_promisc(uint8_t port, uint16_t vf_id, uint8_t on)
 }
 
 int
-rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id,
+rte_pmd_i40e_set_vf_mac_addr(uint16_t port, uint16_t vf_id,
 			     struct ether_addr *mac_addr)
 {
 	struct i40e_mac_filter *f;
@@ -591,14 +592,16 @@ rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id,
 
 	/* Remove all existing mac */
 	TAILQ_FOREACH_SAFE(f, &vsi->mac_list, next, temp)
-		i40e_vsi_delete_mac(vsi, &f->mac_info.mac_addr);
+		if (i40e_vsi_delete_mac(vsi, &f->mac_info.mac_addr)
+				!= I40E_SUCCESS)
+			PMD_DRV_LOG(WARNING, "Delete MAC failed");
 
 	return 0;
 }
 
 /* Set vlan strip on/off for specific VF from host */
 int
-rte_pmd_i40e_set_vf_vlan_stripq(uint8_t port, uint16_t vf_id, uint8_t on)
+rte_pmd_i40e_set_vf_vlan_stripq(uint16_t port, uint16_t vf_id, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_pf *pf;
@@ -633,7 +636,7 @@ rte_pmd_i40e_set_vf_vlan_stripq(uint8_t port, uint16_t vf_id, uint8_t on)
 	return ret;
 }
 
-int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id,
+int rte_pmd_i40e_set_vf_vlan_insert(uint16_t port, uint16_t vf_id,
 				    uint16_t vlan_id)
 {
 	struct rte_eth_dev *dev;
@@ -685,7 +688,7 @@ int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id,
 		vsi->info.port_vlan_flags &= ~I40E_AQ_VSI_PVLAN_INSERT_PVID;
 
 	memset(&ctxt, 0, sizeof(ctxt));
-	(void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+	rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
 	ctxt.seid = vsi->seid;
 
 	hw = I40E_VSI_TO_HW(vsi);
@@ -698,7 +701,7 @@ int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id,
 	return ret;
 }
 
-int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id,
+int rte_pmd_i40e_set_vf_broadcast(uint16_t port, uint16_t vf_id,
 				  uint8_t on)
 {
 	struct rte_eth_dev *dev;
@@ -747,7 +750,7 @@ int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id,
 	}
 
 	if (on) {
-		(void)rte_memcpy(&filter.mac_addr, &broadcast, ETHER_ADDR_LEN);
+		rte_memcpy(&filter.mac_addr, &broadcast, ETHER_ADDR_LEN);
 		filter.filter_type = RTE_MACVLAN_PERFECT_MATCH;
 		ret = i40e_vsi_add_mac(vsi, &filter);
 	} else {
@@ -764,7 +767,7 @@ int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id,
 	return ret;
 }
 
-int rte_pmd_i40e_set_vf_vlan_tag(uint8_t port, uint16_t vf_id, uint8_t on)
+int rte_pmd_i40e_set_vf_vlan_tag(uint16_t port, uint16_t vf_id, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_pf *pf;
@@ -819,7 +822,7 @@ int rte_pmd_i40e_set_vf_vlan_tag(uint8_t port, uint16_t vf_id, uint8_t on)
 	}
 
 	memset(&ctxt, 0, sizeof(ctxt));
-	(void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+	rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
 	ctxt.seid = vsi->seid;
 
 	hw = I40E_VSI_TO_HW(vsi);
@@ -858,7 +861,7 @@ i40e_vlan_filter_count(struct i40e_vsi *vsi)
 	return count;
 }
 
-int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id,
+int rte_pmd_i40e_set_vf_vlan_filter(uint16_t port, uint16_t vlan_id,
 				    uint64_t vf_mask, uint8_t on)
 {
 	struct rte_eth_dev *dev;
@@ -941,7 +944,7 @@ int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id,
 }
 
 int
-rte_pmd_i40e_get_vf_stats(uint8_t port,
+rte_pmd_i40e_get_vf_stats(uint16_t port,
 			  uint16_t vf_id,
 			  struct rte_eth_stats *stats)
 {
@@ -986,7 +989,7 @@ rte_pmd_i40e_get_vf_stats(uint8_t port,
 }
 
 int
-rte_pmd_i40e_reset_vf_stats(uint8_t port,
+rte_pmd_i40e_reset_vf_stats(uint16_t port,
 			    uint16_t vf_id)
 {
 	struct rte_eth_dev *dev;
@@ -1020,7 +1023,7 @@ rte_pmd_i40e_reset_vf_stats(uint8_t port,
 }
 
 int
-rte_pmd_i40e_set_vf_max_bw(uint8_t port, uint16_t vf_id, uint32_t bw)
+rte_pmd_i40e_set_vf_max_bw(uint16_t port, uint16_t vf_id, uint32_t bw)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_pf *pf;
@@ -1109,7 +1112,7 @@ rte_pmd_i40e_set_vf_max_bw(uint8_t port, uint16_t vf_id, uint32_t bw)
 }
 
 int
-rte_pmd_i40e_set_vf_tc_bw_alloc(uint8_t port, uint16_t vf_id,
+rte_pmd_i40e_set_vf_tc_bw_alloc(uint16_t port, uint16_t vf_id,
 				uint8_t tc_num, uint8_t *bw_weight)
 {
 	struct rte_eth_dev *dev;
@@ -1223,7 +1226,7 @@ rte_pmd_i40e_set_vf_tc_bw_alloc(uint8_t port, uint16_t vf_id,
 }
 
 int
-rte_pmd_i40e_set_vf_tc_max_bw(uint8_t port, uint16_t vf_id,
+rte_pmd_i40e_set_vf_tc_max_bw(uint16_t port, uint16_t vf_id,
 			      uint8_t tc_no, uint32_t bw)
 {
 	struct rte_eth_dev *dev;
@@ -1341,7 +1344,7 @@ rte_pmd_i40e_set_vf_tc_max_bw(uint8_t port, uint16_t vf_id,
 }
 
 int
-rte_pmd_i40e_set_tc_strict_prio(uint8_t port, uint8_t tc_map)
+rte_pmd_i40e_set_tc_strict_prio(uint16_t port, uint8_t tc_map)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_pf *pf;
@@ -1513,7 +1516,7 @@ i40e_add_rm_profile_info(struct i40e_hw *hw, uint8_t *profile_info_sec)
 
 /* Check if the profile info exists */
 static int
-i40e_check_profile_info(uint8_t port, uint8_t *profile_info_sec)
+i40e_check_profile_info(uint16_t port, uint8_t *profile_info_sec)
 {
 	struct rte_eth_dev *dev = &rte_eth_devices[port];
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -1557,7 +1560,7 @@ i40e_check_profile_info(uint8_t port, uint8_t *profile_info_sec)
 }
 
 int
-rte_pmd_i40e_process_ddp_package(uint8_t port, uint8_t *buff,
+rte_pmd_i40e_process_ddp_package(uint16_t port, uint8_t *buff,
 				 uint32_t size,
 				 enum rte_pmd_i40e_package_op op)
 {
@@ -1606,6 +1609,8 @@ rte_pmd_i40e_process_ddp_package(uint8_t port, uint8_t *buff,
 		return -EINVAL;
 	}
 
+	i40e_update_customized_info(dev, buff, size);
+
 	/* Find metadata segment */
 	metadata_seg_hdr = i40e_find_segment_in_package(SEGMENT_TYPE_METADATA,
 							pkg_hdr);
@@ -1704,6 +1709,27 @@ rte_pmd_i40e_process_ddp_package(uint8_t port, uint8_t *buff,
 	return status;
 }
 
+/* Get number of tvl records in the section */
+static unsigned int
+i40e_get_tlv_section_size(struct i40e_profile_section_header *sec)
+{
+	unsigned int i, nb_rec, nb_tlv = 0;
+	struct i40e_profile_tlv_section_record *tlv;
+
+	if (!sec)
+		return nb_tlv;
+
+	/* get number of records in the section */
+	nb_rec = sec->section.size /
+				sizeof(struct i40e_profile_tlv_section_record);
+	for (i = 0; i < nb_rec; ) {
+		tlv = (struct i40e_profile_tlv_section_record *)&sec[1 + i];
+		i += tlv->len;
+		nb_tlv++;
+	}
+	return nb_tlv;
+}
+
 int rte_pmd_i40e_get_ddp_info(uint8_t *pkg_buff, uint32_t pkg_size,
 	uint8_t *info_buff, uint32_t info_size,
 	enum rte_pmd_i40e_package_info type)
@@ -1858,12 +1884,162 @@ int rte_pmd_i40e_get_ddp_info(uint8_t *pkg_buff, uint32_t pkg_size,
 		return I40E_SUCCESS;
 	}
 
+	/* get number of protocols */
+	if (type == RTE_PMD_I40E_PKG_INFO_PROTOCOL_NUM) {
+		struct i40e_profile_section_header *proto;
+
+		if (info_size < sizeof(uint32_t)) {
+			PMD_DRV_LOG(ERR, "Invalid information buffer size");
+			return -EINVAL;
+		}
+		proto = i40e_find_section_in_profile(SECTION_TYPE_PROTO,
+				(struct i40e_profile_segment *)i40e_seg_hdr);
+		*(uint32_t *)info_buff = i40e_get_tlv_section_size(proto);
+		return I40E_SUCCESS;
+	}
+
+	/* get list of protocols */
+	if (type == RTE_PMD_I40E_PKG_INFO_PROTOCOL_LIST) {
+		uint32_t i, j, nb_tlv, nb_rec, nb_proto_info;
+		struct rte_pmd_i40e_proto_info *pinfo;
+		struct i40e_profile_section_header *proto;
+		struct i40e_profile_tlv_section_record *tlv;
+
+		pinfo = (struct rte_pmd_i40e_proto_info *)info_buff;
+		nb_proto_info = info_size /
+					sizeof(struct rte_pmd_i40e_proto_info);
+		for (i = 0; i < nb_proto_info; i++) {
+			pinfo[i].proto_id = RTE_PMD_I40E_PROTO_UNUSED;
+			memset(pinfo[i].name, 0, RTE_PMD_I40E_DDP_NAME_SIZE);
+		}
+		proto = i40e_find_section_in_profile(SECTION_TYPE_PROTO,
+				(struct i40e_profile_segment *)i40e_seg_hdr);
+		nb_tlv = i40e_get_tlv_section_size(proto);
+		if (nb_tlv == 0)
+			return I40E_SUCCESS;
+		if (nb_proto_info < nb_tlv) {
+			PMD_DRV_LOG(ERR, "Invalid information buffer size");
+			return -EINVAL;
+		}
+		/* get number of records in the section */
+		nb_rec = proto->section.size /
+				sizeof(struct i40e_profile_tlv_section_record);
+		tlv = (struct i40e_profile_tlv_section_record *)&proto[1];
+		for (i = j = 0; i < nb_rec; j++) {
+			pinfo[j].proto_id = tlv->data[0];
+			snprintf(pinfo[j].name, I40E_DDP_NAME_SIZE, "%s",
+				 (const char *)&tlv->data[1]);
+			i += tlv->len;
+			tlv = &tlv[tlv->len];
+		}
+		return I40E_SUCCESS;
+	}
+
+	/* get number of packet classification types */
+	if (type == RTE_PMD_I40E_PKG_INFO_PCTYPE_NUM) {
+		struct i40e_profile_section_header *pctype;
+
+		if (info_size < sizeof(uint32_t)) {
+			PMD_DRV_LOG(ERR, "Invalid information buffer size");
+			return -EINVAL;
+		}
+		pctype = i40e_find_section_in_profile(SECTION_TYPE_PCTYPE,
+				(struct i40e_profile_segment *)i40e_seg_hdr);
+		*(uint32_t *)info_buff = i40e_get_tlv_section_size(pctype);
+		return I40E_SUCCESS;
+	}
+
+	/* get list of packet classification types */
+	if (type == RTE_PMD_I40E_PKG_INFO_PCTYPE_LIST) {
+		uint32_t i, j, nb_tlv, nb_rec, nb_proto_info;
+		struct rte_pmd_i40e_ptype_info *pinfo;
+		struct i40e_profile_section_header *pctype;
+		struct i40e_profile_tlv_section_record *tlv;
+
+		pinfo = (struct rte_pmd_i40e_ptype_info *)info_buff;
+		nb_proto_info = info_size /
+					sizeof(struct rte_pmd_i40e_ptype_info);
+		for (i = 0; i < nb_proto_info; i++)
+			memset(&pinfo[i], RTE_PMD_I40E_PROTO_UNUSED,
+			       sizeof(struct rte_pmd_i40e_ptype_info));
+		pctype = i40e_find_section_in_profile(SECTION_TYPE_PCTYPE,
+				(struct i40e_profile_segment *)i40e_seg_hdr);
+		nb_tlv = i40e_get_tlv_section_size(pctype);
+		if (nb_tlv == 0)
+			return I40E_SUCCESS;
+		if (nb_proto_info < nb_tlv) {
+			PMD_DRV_LOG(ERR, "Invalid information buffer size");
+			return -EINVAL;
+		}
+
+		/* get number of records in the section */
+		nb_rec = pctype->section.size /
+				sizeof(struct i40e_profile_tlv_section_record);
+		tlv = (struct i40e_profile_tlv_section_record *)&pctype[1];
+		for (i = j = 0; i < nb_rec; j++) {
+			memcpy(&pinfo[j], tlv->data,
+			       sizeof(struct rte_pmd_i40e_ptype_info));
+			i += tlv->len;
+			tlv = &tlv[tlv->len];
+		}
+		return I40E_SUCCESS;
+	}
+
+	/* get number of packet types */
+	if (type == RTE_PMD_I40E_PKG_INFO_PTYPE_NUM) {
+		struct i40e_profile_section_header *ptype;
+
+		if (info_size < sizeof(uint32_t)) {
+			PMD_DRV_LOG(ERR, "Invalid information buffer size");
+			return -EINVAL;
+		}
+		ptype = i40e_find_section_in_profile(SECTION_TYPE_PTYPE,
+				(struct i40e_profile_segment *)i40e_seg_hdr);
+		*(uint32_t *)info_buff = i40e_get_tlv_section_size(ptype);
+		return I40E_SUCCESS;
+	}
+
+	/* get list of packet types */
+	if (type == RTE_PMD_I40E_PKG_INFO_PTYPE_LIST) {
+		uint32_t i, j, nb_tlv, nb_rec, nb_proto_info;
+		struct rte_pmd_i40e_ptype_info *pinfo;
+		struct i40e_profile_section_header *ptype;
+		struct i40e_profile_tlv_section_record *tlv;
+
+		pinfo = (struct rte_pmd_i40e_ptype_info *)info_buff;
+		nb_proto_info = info_size /
+					sizeof(struct rte_pmd_i40e_ptype_info);
+		for (i = 0; i < nb_proto_info; i++)
+			memset(&pinfo[i], RTE_PMD_I40E_PROTO_UNUSED,
+			       sizeof(struct rte_pmd_i40e_ptype_info));
+		ptype = i40e_find_section_in_profile(SECTION_TYPE_PTYPE,
+				(struct i40e_profile_segment *)i40e_seg_hdr);
+		nb_tlv = i40e_get_tlv_section_size(ptype);
+		if (nb_tlv == 0)
+			return I40E_SUCCESS;
+		if (nb_proto_info < nb_tlv) {
+			PMD_DRV_LOG(ERR, "Invalid information buffer size");
+			return -EINVAL;
+		}
+		/* get number of records in the section */
+		nb_rec = ptype->section.size /
+				sizeof(struct i40e_profile_tlv_section_record);
+		for (i = j = 0; i < nb_rec; j++) {
+			tlv = (struct i40e_profile_tlv_section_record *)
+								&ptype[1 + i];
+			memcpy(&pinfo[j], tlv->data,
+			       sizeof(struct rte_pmd_i40e_ptype_info));
+			i += tlv->len;
+		}
+		return I40E_SUCCESS;
+	}
+
 	PMD_DRV_LOG(ERR, "Info type %u is invalid.", type);
 	return -EINVAL;
 }
 
 int
-rte_pmd_i40e_get_ddp_list(uint8_t port, uint8_t *buff, uint32_t size)
+rte_pmd_i40e_get_ddp_list(uint16_t port, uint8_t *buff, uint32_t size)
 {
 	struct rte_eth_dev *dev;
 	struct i40e_hw *hw;
@@ -1933,7 +2109,9 @@ static int check_invalid_pkt_type(uint32_t pkt_type)
 	    tnl != RTE_PTYPE_TUNNEL_VXLAN &&
 	    tnl != RTE_PTYPE_TUNNEL_NVGRE &&
 	    tnl != RTE_PTYPE_TUNNEL_GENEVE &&
-	    tnl != RTE_PTYPE_TUNNEL_GRENAT)
+	    tnl != RTE_PTYPE_TUNNEL_GRENAT &&
+	    tnl != RTE_PTYPE_TUNNEL_GTPC &&
+	    tnl != RTE_PTYPE_TUNNEL_GTPU)
 		return -1;
 
 	if (il2 &&
@@ -1991,7 +2169,7 @@ static int check_invalid_ptype_mapping(
 
 int
 rte_pmd_i40e_ptype_mapping_update(
-			uint8_t port,
+			uint16_t port,
 			struct rte_pmd_i40e_ptype_mapping *mapping_items,
 			uint16_t count,
 			uint8_t exclusive)
@@ -2027,7 +2205,7 @@ rte_pmd_i40e_ptype_mapping_update(
 	return 0;
 }
 
-int rte_pmd_i40e_ptype_mapping_reset(uint8_t port)
+int rte_pmd_i40e_ptype_mapping_reset(uint16_t port)
 {
 	struct rte_eth_dev *dev;
 
@@ -2044,7 +2222,7 @@ int rte_pmd_i40e_ptype_mapping_reset(uint8_t port)
 }
 
 int rte_pmd_i40e_ptype_mapping_get(
-			uint8_t port,
+			uint16_t port,
 			struct rte_pmd_i40e_ptype_mapping *mapping_items,
 			uint16_t size,
 			uint16_t *count,
@@ -2078,7 +2256,7 @@ int rte_pmd_i40e_ptype_mapping_get(
 	return 0;
 }
 
-int rte_pmd_i40e_ptype_mapping_replace(uint8_t port,
+int rte_pmd_i40e_ptype_mapping_replace(uint16_t port,
 				       uint32_t target,
 				       uint8_t mask,
 				       uint32_t pkt_type)
@@ -2115,3 +2293,695 @@ int rte_pmd_i40e_ptype_mapping_replace(uint8_t port,
 
 	return 0;
 }
+
+int
+rte_pmd_i40e_add_vf_mac_addr(uint16_t port, uint16_t vf_id,
+			     struct ether_addr *mac_addr)
+{
+	struct rte_eth_dev *dev;
+	struct i40e_pf_vf *vf;
+	struct i40e_vsi *vsi;
+	struct i40e_pf *pf;
+	struct i40e_mac_filter_info mac_filter;
+	int ret;
+
+	if (i40e_validate_mac_addr((u8 *)mac_addr) != I40E_SUCCESS)
+		return -EINVAL;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+	dev = &rte_eth_devices[port];
+
+	if (!is_i40e_supported(dev))
+		return -ENOTSUP;
+
+	pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+
+	if (vf_id >= pf->vf_num || !pf->vfs)
+		return -EINVAL;
+
+	vf = &pf->vfs[vf_id];
+	vsi = vf->vsi;
+	if (!vsi) {
+		PMD_DRV_LOG(ERR, "Invalid VSI.");
+		return -EINVAL;
+	}
+
+	mac_filter.filter_type = RTE_MACVLAN_PERFECT_MATCH;
+	ether_addr_copy(mac_addr, &mac_filter.mac_addr);
+	ret = i40e_vsi_add_mac(vsi, &mac_filter);
+	if (ret != I40E_SUCCESS) {
+		PMD_DRV_LOG(ERR, "Failed to add MAC filter.");
+		return -1;
+	}
+
+	return 0;
+}
+
+int rte_pmd_i40e_flow_type_mapping_reset(uint16_t port)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+	dev = &rte_eth_devices[port];
+
+	if (!is_i40e_supported(dev))
+		return -ENOTSUP;
+
+	i40e_set_default_pctype_table(dev);
+
+	return 0;
+}
+
+int rte_pmd_i40e_flow_type_mapping_get(
+			uint16_t port,
+			struct rte_pmd_i40e_flow_type_mapping *mapping_items)
+{
+	struct rte_eth_dev *dev;
+	struct i40e_adapter *ad;
+	uint16_t i;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+	dev = &rte_eth_devices[port];
+
+	if (!is_i40e_supported(dev))
+		return -ENOTSUP;
+
+	ad = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+	for (i = 0; i < I40E_FLOW_TYPE_MAX; i++) {
+		mapping_items[i].flow_type = i;
+		mapping_items[i].pctype = ad->pctypes_tbl[i];
+	}
+
+	return 0;
+}
+
+int
+rte_pmd_i40e_flow_type_mapping_update(
+			uint16_t port,
+			struct rte_pmd_i40e_flow_type_mapping *mapping_items,
+			uint16_t count,
+			uint8_t exclusive)
+{
+	struct rte_eth_dev *dev;
+	struct i40e_adapter *ad;
+	int i;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+	dev = &rte_eth_devices[port];
+
+	if (!is_i40e_supported(dev))
+		return -ENOTSUP;
+
+	if (count > I40E_FLOW_TYPE_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < count; i++)
+		if (mapping_items[i].flow_type >= I40E_FLOW_TYPE_MAX ||
+		    mapping_items[i].flow_type == RTE_ETH_FLOW_UNKNOWN ||
+		    (mapping_items[i].pctype &
+		    (1ULL << I40E_FILTER_PCTYPE_INVALID)))
+			return -EINVAL;
+
+	ad = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+	if (exclusive) {
+		for (i = 0; i < I40E_FLOW_TYPE_MAX; i++)
+			ad->pctypes_tbl[i] = 0ULL;
+		ad->flow_types_mask = 0ULL;
+	}
+
+	for (i = 0; i < count; i++) {
+		ad->pctypes_tbl[mapping_items[i].flow_type] =
+						mapping_items[i].pctype;
+		if (mapping_items[i].pctype)
+			ad->flow_types_mask |=
+					(1ULL << mapping_items[i].flow_type);
+		else
+			ad->flow_types_mask &=
+					~(1ULL << mapping_items[i].flow_type);
+	}
+
+	for (i = 0, ad->pctypes_mask = 0ULL; i < I40E_FLOW_TYPE_MAX; i++)
+		ad->pctypes_mask |= ad->pctypes_tbl[i];
+
+	return 0;
+}
+
+int
+rte_pmd_i40e_query_vfid_by_mac(uint16_t port, const struct ether_addr *vf_mac)
+{
+	struct rte_eth_dev *dev;
+	struct ether_addr *mac;
+	struct i40e_pf *pf;
+	int vf_id;
+	struct i40e_pf_vf *vf;
+	uint16_t vf_num;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+	dev = &rte_eth_devices[port];
+
+	if (!is_i40e_supported(dev))
+		return -ENOTSUP;
+
+	pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	vf_num = pf->vf_num;
+
+	for (vf_id = 0; vf_id < vf_num; vf_id++) {
+		vf = &pf->vfs[vf_id];
+		mac = &vf->mac_addr;
+
+		if (is_same_ether_addr(mac, vf_mac))
+			return vf_id;
+	}
+
+	return -EINVAL;
+}
+
+static int
+i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,
+			      struct i40e_pf *pf)
+{
+	uint16_t i;
+	struct i40e_vsi *vsi = pf->main_vsi;
+	uint16_t queue_offset, bsf, tc_index;
+	struct i40e_vsi_context ctxt;
+	struct i40e_aqc_vsi_properties_data *vsi_info;
+	struct i40e_queue_regions *region_info =
+				&pf->queue_region;
+	int32_t ret = -EINVAL;
+
+	if (!region_info->queue_region_number) {
+		PMD_INIT_LOG(ERR, "there is no that region id been set before");
+		return ret;
+	}
+
+	memset(&ctxt, 0, sizeof(struct i40e_vsi_context));
+
+	/* Update Queue Pairs Mapping for currently enabled UPs */
+	ctxt.seid = vsi->seid;
+	ctxt.pf_num = hw->pf_id;
+	ctxt.vf_num = 0;
+	ctxt.uplink_seid = vsi->uplink_seid;
+	ctxt.info = vsi->info;
+	vsi_info = &ctxt.info;
+
+	memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);
+	memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);
+
+	/* Configure queue region and queue mapping parameters,
+	 * for enabled queue region, allocate queues to this region.
+	 */
+
+	for (i = 0; i < region_info->queue_region_number; i++) {
+		tc_index = region_info->region[i].region_id;
+		bsf = rte_bsf32(region_info->region[i].queue_num);
+		queue_offset = region_info->region[i].queue_start_index;
+		vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(
+			(queue_offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+				(bsf << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));
+	}
+
+	/* Associate queue number with VSI, Keep vsi->nb_qps unchanged */
+	vsi_info->mapping_flags |=
+			rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+	vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi->base_queue);
+	vsi_info->valid_sections |=
+		rte_cpu_to_le_16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID);
+
+	/* Update the VSI after updating the VSI queue-mapping information */
+	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "Failed to configure queue region mapping = %d ",
+				hw->aq.asq_last_status);
+		return ret;
+	}
+	/* update the local VSI info with updated queue map */
+	rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,
+					sizeof(vsi->info.tc_mapping));
+	rte_memcpy(&vsi->info.queue_mapping,
+			&ctxt.info.queue_mapping,
+			sizeof(vsi->info.queue_mapping));
+	vsi->info.mapping_flags = ctxt.info.mapping_flags;
+	vsi->info.valid_sections = 0;
+
+	return 0;
+}
+
+
+static int
+i40e_queue_region_set_region(struct i40e_pf *pf,
+				struct rte_pmd_i40e_queue_region_conf *conf_ptr)
+{
+	uint16_t i;
+	struct i40e_vsi *main_vsi = pf->main_vsi;
+	struct i40e_queue_regions *info = &pf->queue_region;
+	int32_t ret = -EINVAL;
+
+	if (!((rte_is_power_of_2(conf_ptr->queue_num)) &&
+				conf_ptr->queue_num <= 64)) {
+		PMD_DRV_LOG(ERR, "The region sizes should be any of the following values: 1, 2, 4, 8, 16, 32, 64 as long as the "
+			"total number of queues do not exceed the VSI allocation");
+		return ret;
+	}
+
+	if (conf_ptr->region_id > I40E_REGION_MAX_INDEX) {
+		PMD_DRV_LOG(ERR, "the queue region max index is 7");
+		return ret;
+	}
+
+	if ((conf_ptr->queue_start_index + conf_ptr->queue_num)
+					> main_vsi->nb_used_qps) {
+		PMD_DRV_LOG(ERR, "the queue index exceeds the VSI range");
+		return ret;
+	}
+
+	for (i = 0; i < info->queue_region_number; i++)
+		if (conf_ptr->region_id == info->region[i].region_id)
+			break;
+
+	if (i == info->queue_region_number &&
+				i <= I40E_REGION_MAX_INDEX) {
+		info->region[i].region_id = conf_ptr->region_id;
+		info->region[i].queue_num = conf_ptr->queue_num;
+		info->region[i].queue_start_index =
+			conf_ptr->queue_start_index;
+		info->queue_region_number++;
+	} else {
+		PMD_DRV_LOG(ERR, "queue region number exceeds maxnum 8 or the queue region id has been set before");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+i40e_queue_region_set_flowtype(struct i40e_pf *pf,
+			struct rte_pmd_i40e_queue_region_conf *rss_region_conf)
+{
+	int32_t ret = -EINVAL;
+	struct i40e_queue_regions *info = &pf->queue_region;
+	uint16_t i, j;
+	uint16_t region_index, flowtype_index;
+
+	/* For the pctype or hardware flowtype of packet,
+	 * the specific index for each type has been defined
+	 * in file i40e_type.h as enum i40e_filter_pctype.
+	 */
+
+	if (rss_region_conf->region_id > I40E_PFQF_HREGION_MAX_INDEX) {
+		PMD_DRV_LOG(ERR, "the queue region max index is 7");
+		return ret;
+	}
+
+	if (rss_region_conf->hw_flowtype >= I40E_FILTER_PCTYPE_MAX) {
+		PMD_DRV_LOG(ERR, "the hw_flowtype or PCTYPE max index is 63");
+		return ret;
+	}
+
+
+	for (i = 0; i < info->queue_region_number; i++)
+		if (rss_region_conf->region_id == info->region[i].region_id)
+			break;
+
+	if (i == info->queue_region_number) {
+		PMD_DRV_LOG(ERR, "that region id has not been set before");
+		ret = -EINVAL;
+		return ret;
+	}
+	region_index = i;
+
+	for (i = 0; i < info->queue_region_number; i++) {
+		for (j = 0; j < info->region[i].flowtype_num; j++) {
+			if (rss_region_conf->hw_flowtype ==
+				info->region[i].hw_flowtype[j]) {
+				PMD_DRV_LOG(ERR, "that hw_flowtype has been set before");
+				return 0;
+			}
+		}
+	}
+
+	flowtype_index = info->region[region_index].flowtype_num;
+	info->region[region_index].hw_flowtype[flowtype_index] =
+					rss_region_conf->hw_flowtype;
+	info->region[region_index].flowtype_num++;
+
+	return 0;
+}
+
+static void
+i40e_queue_region_pf_flowtype_conf(struct i40e_hw *hw,
+				struct i40e_pf *pf)
+{
+	uint8_t hw_flowtype;
+	uint32_t pfqf_hregion;
+	uint16_t i, j, index;
+	struct i40e_queue_regions *info = &pf->queue_region;
+
+	/* For the pctype or hardware flowtype of packet,
+	 * the specific index for each type has been defined
+	 * in file i40e_type.h as enum i40e_filter_pctype.
+	 */
+
+	for (i = 0; i < info->queue_region_number; i++) {
+		for (j = 0; j < info->region[i].flowtype_num; j++) {
+			hw_flowtype = info->region[i].hw_flowtype[j];
+			index = hw_flowtype >> 3;
+			pfqf_hregion =
+				i40e_read_rx_ctl(hw, I40E_PFQF_HREGION(index));
+
+			if ((hw_flowtype & 0x7) == 0) {
+				pfqf_hregion |= info->region[i].region_id <<
+					I40E_PFQF_HREGION_REGION_0_SHIFT;
+				pfqf_hregion |= 1 <<
+					I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT;
+			} else if ((hw_flowtype & 0x7) == 1) {
+				pfqf_hregion |= info->region[i].region_id  <<
+					I40E_PFQF_HREGION_REGION_1_SHIFT;
+				pfqf_hregion |= 1 <<
+					I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT;
+			} else if ((hw_flowtype & 0x7) == 2) {
+				pfqf_hregion |= info->region[i].region_id  <<
+					I40E_PFQF_HREGION_REGION_2_SHIFT;
+				pfqf_hregion |= 1 <<
+					I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT;
+			} else if ((hw_flowtype & 0x7) == 3) {
+				pfqf_hregion |= info->region[i].region_id  <<
+					I40E_PFQF_HREGION_REGION_3_SHIFT;
+				pfqf_hregion |= 1 <<
+					I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT;
+			} else if ((hw_flowtype & 0x7) == 4) {
+				pfqf_hregion |= info->region[i].region_id  <<
+					I40E_PFQF_HREGION_REGION_4_SHIFT;
+				pfqf_hregion |= 1 <<
+					I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT;
+			} else if ((hw_flowtype & 0x7) == 5) {
+				pfqf_hregion |= info->region[i].region_id  <<
+					I40E_PFQF_HREGION_REGION_5_SHIFT;
+				pfqf_hregion |= 1 <<
+					I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT;
+			} else if ((hw_flowtype & 0x7) == 6) {
+				pfqf_hregion |= info->region[i].region_id  <<
+					I40E_PFQF_HREGION_REGION_6_SHIFT;
+				pfqf_hregion |= 1 <<
+					I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT;
+			} else {
+				pfqf_hregion |= info->region[i].region_id  <<
+					I40E_PFQF_HREGION_REGION_7_SHIFT;
+				pfqf_hregion |= 1 <<
+					I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT;
+			}
+
+			i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(index),
+						pfqf_hregion);
+		}
+	}
+}
+
+static int
+i40e_queue_region_set_user_priority(struct i40e_pf *pf,
+		struct rte_pmd_i40e_queue_region_conf *rss_region_conf)
+{
+	struct i40e_queue_regions *info = &pf->queue_region;
+	int32_t ret = -EINVAL;
+	uint16_t i, j, region_index;
+
+	if (rss_region_conf->user_priority >= I40E_MAX_USER_PRIORITY) {
+		PMD_DRV_LOG(ERR, "the queue region max index is 7");
+		return ret;
+	}
+
+	if (rss_region_conf->region_id > I40E_REGION_MAX_INDEX) {
+		PMD_DRV_LOG(ERR, "the region_id max index is 7");
+		return ret;
+	}
+
+	for (i = 0; i < info->queue_region_number; i++)
+		if (rss_region_conf->region_id == info->region[i].region_id)
+			break;
+
+	if (i == info->queue_region_number) {
+		PMD_DRV_LOG(ERR, "that region id has not been set before");
+		ret = -EINVAL;
+		return ret;
+	}
+
+	region_index = i;
+
+	for (i = 0; i < info->queue_region_number; i++) {
+		for (j = 0; j < info->region[i].user_priority_num; j++) {
+			if (info->region[i].user_priority[j] ==
+				rss_region_conf->user_priority) {
+				PMD_DRV_LOG(ERR, "that user priority has been set before");
+				return 0;
+			}
+		}
+	}
+
+	j = info->region[region_index].user_priority_num;
+	info->region[region_index].user_priority[j] =
+					rss_region_conf->user_priority;
+	info->region[region_index].user_priority_num++;
+
+	return 0;
+}
+
+static int
+i40e_queue_region_dcb_configure(struct i40e_hw *hw,
+				struct i40e_pf *pf)
+{
+	struct i40e_dcbx_config dcb_cfg_local;
+	struct i40e_dcbx_config *dcb_cfg;
+	struct i40e_queue_regions *info = &pf->queue_region;
+	struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;
+	int32_t ret = -EINVAL;
+	uint16_t i, j, prio_index, region_index;
+	uint8_t tc_map, tc_bw, bw_lf;
+
+	if (!info->queue_region_number) {
+		PMD_DRV_LOG(ERR, "No queue region been set before");
+		return ret;
+	}
+
+	dcb_cfg = &dcb_cfg_local;
+	memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));
+
+	/* assume each tc has the same bw */
+	tc_bw = I40E_MAX_PERCENT / info->queue_region_number;
+	for (i = 0; i < info->queue_region_number; i++)
+		dcb_cfg->etscfg.tcbwtable[i] = tc_bw;
+	/* to ensure the sum of tcbw is equal to 100 */
+	bw_lf = I40E_MAX_PERCENT %  info->queue_region_number;
+	for (i = 0; i < bw_lf; i++)
+		dcb_cfg->etscfg.tcbwtable[i]++;
+
+	/* assume each tc has the same Transmission Selection Algorithm */
+	for (i = 0; i < info->queue_region_number; i++)
+		dcb_cfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;
+
+	for (i = 0; i < info->queue_region_number; i++) {
+		for (j = 0; j < info->region[i].user_priority_num; j++) {
+			prio_index = info->region[i].user_priority[j];
+			region_index = info->region[i].region_id;
+			dcb_cfg->etscfg.prioritytable[prio_index] =
+						region_index;
+		}
+	}
+
+	/* FW needs one App to configure HW */
+	dcb_cfg->numapps = I40E_DEFAULT_DCB_APP_NUM;
+	dcb_cfg->app[0].selector = I40E_APP_SEL_ETHTYPE;
+	dcb_cfg->app[0].priority = I40E_DEFAULT_DCB_APP_PRIO;
+	dcb_cfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;
+
+	tc_map = RTE_LEN2MASK(info->queue_region_number, uint8_t);
+
+	dcb_cfg->pfc.willing = 0;
+	dcb_cfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
+	dcb_cfg->pfc.pfcenable = tc_map;
+
+	/* Copy the new config to the current config */
+	*old_cfg = *dcb_cfg;
+	old_cfg->etsrec = old_cfg->etscfg;
+	ret = i40e_set_dcb_config(hw);
+
+	if (ret) {
+		PMD_DRV_LOG(ERR, "Set queue region DCB Config failed, err %s aq_err %s",
+			 i40e_stat_str(hw, ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		return ret;
+	}
+
+	return 0;
+}
+
+int
+i40e_flush_queue_region_all_conf(struct rte_eth_dev *dev,
+	struct i40e_hw *hw, struct i40e_pf *pf, uint16_t on)
+{
+	int32_t ret = -EINVAL;
+	struct i40e_queue_regions *info = &pf->queue_region;
+	struct i40e_vsi *main_vsi = pf->main_vsi;
+
+	if (on) {
+		i40e_queue_region_pf_flowtype_conf(hw, pf);
+
+		ret = i40e_vsi_update_queue_region_mapping(hw, pf);
+		if (ret != I40E_SUCCESS) {
+			PMD_DRV_LOG(INFO, "Failed to flush queue region mapping.");
+			return ret;
+		}
+
+		ret = i40e_queue_region_dcb_configure(hw, pf);
+		if (ret != I40E_SUCCESS) {
+			PMD_DRV_LOG(INFO, "Failed to flush dcb.");
+			return ret;
+		}
+
+		return 0;
+	}
+
+	info->queue_region_number = 1;
+	info->region[0].queue_num = main_vsi->nb_used_qps;
+	info->region[0].queue_start_index = 0;
+
+	ret = i40e_vsi_update_queue_region_mapping(hw, pf);
+	if (ret != I40E_SUCCESS)
+		PMD_DRV_LOG(INFO, "Failed to flush queue region mapping.");
+
+	ret = i40e_dcb_init_configure(dev, TRUE);
+	if (ret != I40E_SUCCESS) {
+		PMD_DRV_LOG(INFO, "Failed to flush dcb.");
+		pf->flags &= ~I40E_FLAG_DCB;
+	}
+
+	i40e_init_queue_region_conf(dev);
+
+	return 0;
+}
+
+static int
+i40e_queue_region_pf_check_rss(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
+	uint64_t hena;
+
+	hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0));
+	hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32;
+
+	if (!hena)
+		return -ENOTSUP;
+
+	return 0;
+}
+
+static int
+i40e_queue_region_get_all_info(struct i40e_pf *pf,
+		struct i40e_queue_regions *regions_ptr)
+{
+	struct i40e_queue_regions *info = &pf->queue_region;
+
+	rte_memcpy(regions_ptr, info,
+			sizeof(struct i40e_queue_regions));
+
+	return 0;
+}
+
+int rte_pmd_i40e_rss_queue_region_conf(uint16_t port_id,
+		enum rte_pmd_i40e_queue_region_op op_type, void *arg)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int32_t ret;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	if (!is_i40e_supported(dev))
+		return -ENOTSUP;
+
+	if (!(!i40e_queue_region_pf_check_rss(pf)))
+		return -ENOTSUP;
+
+	/* This queue region feature only support pf by now. It should
+	 * be called after dev_start, and will be clear after dev_stop.
+	 * "RTE_PMD_I40E_RSS_QUEUE_REGION_ALL_FLUSH_ON"
+	 * is just an enable function which server for other configuration,
+	 * it is for all configuration about queue region from up layer,
+	 * at first will only keep in DPDK softwarestored in driver,
+	 * only after "FLUSH_ON", it commit all configuration to HW.
+	 * Because PMD had to set hardware configuration at a time, so
+	 * it will record all up layer command at first.
+	 * "RTE_PMD_I40E_RSS_QUEUE_REGION_ALL_FLUSH_OFF" is
+	 * just clean all configuration about queue region just now,
+	 * and restore all to DPDK i40e driver default
+	 * config when start up.
+	 */
+
+	switch (op_type) {
+	case RTE_PMD_I40E_RSS_QUEUE_REGION_SET:
+		ret = i40e_queue_region_set_region(pf,
+				(struct rte_pmd_i40e_queue_region_conf *)arg);
+		break;
+	case RTE_PMD_I40E_RSS_QUEUE_REGION_FLOWTYPE_SET:
+		ret = i40e_queue_region_set_flowtype(pf,
+				(struct rte_pmd_i40e_queue_region_conf *)arg);
+		break;
+	case RTE_PMD_I40E_RSS_QUEUE_REGION_USER_PRIORITY_SET:
+		ret = i40e_queue_region_set_user_priority(pf,
+				(struct rte_pmd_i40e_queue_region_conf *)arg);
+		break;
+	case RTE_PMD_I40E_RSS_QUEUE_REGION_ALL_FLUSH_ON:
+		ret = i40e_flush_queue_region_all_conf(dev, hw, pf, 1);
+		break;
+	case RTE_PMD_I40E_RSS_QUEUE_REGION_ALL_FLUSH_OFF:
+		ret = i40e_flush_queue_region_all_conf(dev, hw, pf, 0);
+		break;
+	case RTE_PMD_I40E_RSS_QUEUE_REGION_INFO_GET:
+		ret = i40e_queue_region_get_all_info(pf,
+				(struct i40e_queue_regions *)arg);
+		break;
+	default:
+		PMD_DRV_LOG(WARNING, "op type (%d) not supported",
+			    op_type);
+		ret = -EINVAL;
+	}
+
+	I40E_WRITE_FLUSH(hw);
+
+	return ret;
+}
+
+int rte_pmd_i40e_flow_add_del_packet_template(
+			uint16_t port,
+			const struct rte_pmd_i40e_pkt_template_conf *conf,
+			uint8_t add)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port];
+	struct i40e_fdir_filter_conf filter_conf;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
+
+	if (!is_i40e_supported(dev))
+		return -ENOTSUP;
+
+	memset(&filter_conf, 0, sizeof(filter_conf));
+	filter_conf.soft_id = conf->soft_id;
+	filter_conf.input.flow.raw_flow.pctype = conf->input.pctype;
+	filter_conf.input.flow.raw_flow.packet = conf->input.packet;
+	filter_conf.input.flow.raw_flow.length = conf->input.length;
+	filter_conf.input.flow_ext.pkt_template = true;
+
+	filter_conf.action.rx_queue = conf->action.rx_queue;
+	filter_conf.action.behavior =
+		(enum i40e_fdir_behavior)conf->action.behavior;
+	filter_conf.action.report_status =
+		(enum i40e_fdir_status)conf->action.report_status;
+	filter_conf.action.flex_off = conf->action.flex_off;
+
+	return i40e_flow_add_del_fdir_filter(dev, &filter_conf, add);
+}
diff --git a/drivers/net/i40e/rte_pmd_i40e.h b/drivers/net/i40e/rte_pmd_i40e.h
index 356fa89d..580ca4ae 100644
--- a/drivers/net/i40e/rte_pmd_i40e.h
+++ b/drivers/net/i40e/rte_pmd_i40e.h
@@ -88,10 +88,48 @@ enum rte_pmd_i40e_package_info {
 	RTE_PMD_I40E_PKG_INFO_HEADER,
 	RTE_PMD_I40E_PKG_INFO_DEVID_NUM,
 	RTE_PMD_I40E_PKG_INFO_DEVID_LIST,
+	RTE_PMD_I40E_PKG_INFO_PROTOCOL_NUM,
+	RTE_PMD_I40E_PKG_INFO_PROTOCOL_LIST,
+	RTE_PMD_I40E_PKG_INFO_PCTYPE_NUM,
+	RTE_PMD_I40E_PKG_INFO_PCTYPE_LIST,
+	RTE_PMD_I40E_PKG_INFO_PTYPE_NUM,
+	RTE_PMD_I40E_PKG_INFO_PTYPE_LIST,
 	RTE_PMD_I40E_PKG_INFO_MAX = 0xFFFFFFFF
 };
 
-#define RTE_PMD_I40E_DDP_NAME_SIZE 32
+/**
+ *  Option types of queue region.
+ */
+enum rte_pmd_i40e_queue_region_op {
+	RTE_PMD_I40E_RSS_QUEUE_REGION_UNDEFINED,
+	/** add queue region set */
+	RTE_PMD_I40E_RSS_QUEUE_REGION_SET,
+	/** add PF region pctype set */
+	RTE_PMD_I40E_RSS_QUEUE_REGION_FLOWTYPE_SET,
+	/** add queue region user priority set */
+	RTE_PMD_I40E_RSS_QUEUE_REGION_USER_PRIORITY_SET,
+	/**
+	 * ALL configuration about queue region from up layer
+	 * at first will only keep in DPDK software stored in driver,
+	 * only after " FLUSH_ON ", it commit all configuration to HW.
+	 * Because PMD had to set hardware configuration at a time, so
+	 * it will record all up layer command at first.
+	 */
+	RTE_PMD_I40E_RSS_QUEUE_REGION_ALL_FLUSH_ON,
+	/**
+	 * "FLUSH_OFF " is just clean all configuration about queue
+	 * region just now, and restore all to DPDK i40e driver default
+	 * config when start up.
+	 */
+	RTE_PMD_I40E_RSS_QUEUE_REGION_ALL_FLUSH_OFF,
+	RTE_PMD_I40E_RSS_QUEUE_REGION_INFO_GET,
+	RTE_PMD_I40E_RSS_QUEUE_REGION_OP_MAX
+};
+
+#define RTE_PMD_I40E_DDP_NAME_SIZE     32
+#define RTE_PMD_I40E_PCTYPE_MAX        64
+#define RTE_PMD_I40E_REGION_MAX_NUM    8
+#define RTE_PMD_I40E_MAX_USER_PRIORITY 8
 
 /**
  * Version for dynamic device personalization.
@@ -133,6 +171,25 @@ struct rte_pmd_i40e_profile_list {
 	struct rte_pmd_i40e_profile_info p_info[1];
 };
 
+#define RTE_PMD_I40E_PROTO_NUM 6
+#define RTE_PMD_I40E_PROTO_UNUSED 0xFF
+
+/**
+ * Protocols information stored in profile
+ */
+struct rte_pmd_i40e_proto_info {
+	uint8_t proto_id;
+	char name[RTE_PMD_I40E_DDP_NAME_SIZE];
+};
+
+/**
+ * Packet classification/ packet type information stored in profile
+ */
+struct rte_pmd_i40e_ptype_info {
+	uint8_t ptype_id;
+	uint8_t protocols[RTE_PMD_I40E_PROTO_NUM];
+};
+
 /**
  * ptype mapping table only accept RTE_PTYPE_XXX or "user defined" ptype.
  * A ptype with MSB set will be regarded as a user defined ptype.
@@ -146,6 +203,141 @@ struct rte_pmd_i40e_ptype_mapping {
 };
 
 /**
+ * Queue region related information.
+ */
+struct rte_pmd_i40e_queue_region_conf {
+	/** the region id for this configuration */
+	uint8_t region_id;
+	/** the pctype or hardware flowtype of packet,
+	 * the specific index for each type has been defined
+	 * in file i40e_type.h as enum i40e_filter_pctype.
+	 */
+	uint8_t hw_flowtype;
+	/** the start queue index for this region */
+	uint8_t queue_start_index;
+	/** the total queue number of this queue region */
+	uint8_t queue_num;
+	/** the packet's user priority for this region */
+	uint8_t user_priority;
+};
+
+/* queue region info */
+struct rte_pmd_i40e_queue_region_info {
+	/** the region id for this configuration */
+	uint8_t region_id;
+	/** the start queue index for this region */
+	uint8_t queue_start_index;
+	/** the total queue number of this queue region */
+	uint8_t queue_num;
+	/** the total number of user priority for this region */
+	uint8_t user_priority_num;
+	/** the packet's user priority for this region */
+	uint8_t user_priority[RTE_PMD_I40E_MAX_USER_PRIORITY];
+	/** the total number of flowtype for this region */
+	uint8_t flowtype_num;
+	/**
+	 * the pctype or hardware flowtype of packet,
+	 * the specific index for each type has been defined
+	 * in file i40e_type.h as enum i40e_filter_pctype.
+	 */
+	uint8_t hw_flowtype[RTE_PMD_I40E_PCTYPE_MAX];
+};
+
+struct rte_pmd_i40e_queue_regions {
+	/** the total number of queue region for this port */
+	uint16_t queue_region_number;
+	struct rte_pmd_i40e_queue_region_info
+		region[RTE_PMD_I40E_REGION_MAX_NUM];
+};
+
+/**
+ * Behavior will be taken if raw packet template is matched.
+ */
+enum rte_pmd_i40e_pkt_template_behavior {
+	RTE_PMD_I40E_PKT_TEMPLATE_ACCEPT,
+	RTE_PMD_I40E_PKT_TEMPLATE_REJECT,
+	RTE_PMD_I40E_PKT_TEMPLATE_PASSTHRU,
+};
+
+/**
+ * Flow director report status
+ * It defines what will be reported if raw packet template is matched.
+ */
+enum rte_pmd_i40e_pkt_template_status {
+	/** report nothing */
+	RTE_PMD_I40E_PKT_TEMPLATE_NO_REPORT_STATUS,
+	/** only report FD ID */
+	RTE_PMD_I40E_PKT_TEMPLATE_REPORT_ID,
+	/** report FD ID and 4 flex bytes */
+	RTE_PMD_I40E_PKT_TEMPLATE_REPORT_ID_FLEX_4,
+	/** report 8 flex bytes */
+	RTE_PMD_I40E_PKT_TEMPLATE_REPORT_FLEX_8,
+};
+
+/**
+ * A structure used to define an action when raw packet template is matched.
+ */
+struct rte_pmd_i40e_pkt_template_action {
+	/** queue assigned to if raw packet template match */
+	uint16_t rx_queue;
+	/** behavior will be taken */
+	enum rte_pmd_i40e_pkt_template_behavior behavior;
+	/** status report option */
+	enum rte_pmd_i40e_pkt_template_status report_status;
+	/**
+	 * If report_status is RTE_PMD_I40E_PKT_TEMPLATE_REPORT_ID_FLEX_4 or
+	 * RTE_PMD_I40E_PKT_TEMPLATE_REPORT_FLEX_8, flex_off specifies
+	 * where the reported flex bytes start from in flexible payload.
+	 */
+	uint8_t flex_off;
+};
+
+/**
+ * A structure used to define the input for raw packet template.
+ */
+struct rte_pmd_i40e_pkt_template_input {
+	/** the pctype used for raw packet template */
+	uint16_t pctype;
+	/** the buffer conatining raw packet template */
+	void *packet;
+	/** the length of buffer with raw packet template */
+	uint32_t length;
+};
+
+/**
+ * A structure used to define the configuration parameters
+ * for raw packet template.
+ */
+struct rte_pmd_i40e_pkt_template_conf {
+	/** the input for raw packet template. */
+	struct rte_pmd_i40e_pkt_template_input input;
+	/** the action to be taken when raw packet template is matched */
+	struct rte_pmd_i40e_pkt_template_action action;
+	/** ID, an unique software index for the raw packet template filter */
+	uint32_t soft_id;
+};
+
+/**
+ * Add or remove raw packet template filter to Flow Director.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param conf
+ *   Specifies configuration parameters of raw packet template filter.
+ * @param add
+ *   Speicifes an action to be taken - add or remove raw packet template filter.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if *conf* invalid.
+ *   - (-ENOTSUP) not supported by firmware.
+ */
+int rte_pmd_i40e_flow_add_del_packet_template(
+			uint16_t port,
+			const struct rte_pmd_i40e_pkt_template_conf *conf,
+			uint8_t add);
+
+/**
  * Notify VF when PF link status changes.
  *
  * @param port
@@ -157,7 +349,7 @@ struct rte_pmd_i40e_ptype_mapping {
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if *vf* invalid.
  */
-int rte_pmd_i40e_ping_vfs(uint8_t port, uint16_t vf);
+int rte_pmd_i40e_ping_vfs(uint16_t port, uint16_t vf);
 
 /**
  * Enable/Disable VF MAC anti spoofing.
@@ -174,7 +366,7 @@ int rte_pmd_i40e_ping_vfs(uint8_t port, uint16_t vf);
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_i40e_set_vf_mac_anti_spoof(uint8_t port,
+int rte_pmd_i40e_set_vf_mac_anti_spoof(uint16_t port,
 				       uint16_t vf_id,
 				       uint8_t on);
 
@@ -193,7 +385,7 @@ int rte_pmd_i40e_set_vf_mac_anti_spoof(uint8_t port,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_i40e_set_vf_vlan_anti_spoof(uint8_t port,
+int rte_pmd_i40e_set_vf_vlan_anti_spoof(uint16_t port,
 					uint16_t vf_id,
 					uint8_t on);
 
@@ -210,7 +402,7 @@ int rte_pmd_i40e_set_vf_vlan_anti_spoof(uint8_t port,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_i40e_set_tx_loopback(uint8_t port,
+int rte_pmd_i40e_set_tx_loopback(uint16_t port,
 				 uint8_t on);
 
 /**
@@ -228,7 +420,7 @@ int rte_pmd_i40e_set_tx_loopback(uint8_t port,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_i40e_set_vf_unicast_promisc(uint8_t port,
+int rte_pmd_i40e_set_vf_unicast_promisc(uint16_t port,
 					uint16_t vf_id,
 					uint8_t on);
 
@@ -247,7 +439,7 @@ int rte_pmd_i40e_set_vf_unicast_promisc(uint8_t port,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_i40e_set_vf_multicast_promisc(uint8_t port,
+int rte_pmd_i40e_set_vf_multicast_promisc(uint16_t port,
 					  uint16_t vf_id,
 					  uint8_t on);
 
@@ -271,7 +463,7 @@ int rte_pmd_i40e_set_vf_multicast_promisc(uint8_t port,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if *vf* or *mac_addr* is invalid.
  */
-int rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id,
+int rte_pmd_i40e_set_vf_mac_addr(uint16_t port, uint16_t vf_id,
 				 struct ether_addr *mac_addr);
 
 /**
@@ -291,7 +483,7 @@ int rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id,
  *   - (-EINVAL) if bad parameter.
  */
 int
-rte_pmd_i40e_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on);
+rte_pmd_i40e_set_vf_vlan_stripq(uint16_t port, uint16_t vf, uint8_t on);
 
 /**
  * Enable/Disable vf vlan insert
@@ -309,7 +501,7 @@ rte_pmd_i40e_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on);
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id,
+int rte_pmd_i40e_set_vf_vlan_insert(uint16_t port, uint16_t vf_id,
 				    uint16_t vlan_id);
 
 /**
@@ -328,7 +520,7 @@ int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id,
+int rte_pmd_i40e_set_vf_broadcast(uint16_t port, uint16_t vf_id,
 				  uint8_t on);
 
 /**
@@ -347,7 +539,7 @@ int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_i40e_set_vf_vlan_tag(uint8_t port, uint16_t vf_id, uint8_t on);
+int rte_pmd_i40e_set_vf_vlan_tag(uint16_t port, uint16_t vf_id, uint8_t on);
 
 /**
  * Enable/Disable VF VLAN filter
@@ -368,7 +560,7 @@ int rte_pmd_i40e_set_vf_vlan_tag(uint8_t port, uint16_t vf_id, uint8_t on);
  *   - (-EINVAL) if bad parameter.
  *   - (-ENOTSUP) not supported by firmware.
  */
-int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id,
+int rte_pmd_i40e_set_vf_vlan_filter(uint16_t port, uint16_t vlan_id,
 				    uint64_t vf_mask, uint8_t on);
 
 /**
@@ -393,7 +585,7 @@ int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id,
  *   - (-EINVAL) if bad parameter.
  */
 
-int rte_pmd_i40e_get_vf_stats(uint8_t port,
+int rte_pmd_i40e_get_vf_stats(uint16_t port,
 			      uint16_t vf_id,
 			      struct rte_eth_stats *stats);
 
@@ -409,7 +601,7 @@ int rte_pmd_i40e_get_vf_stats(uint8_t port,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_i40e_reset_vf_stats(uint8_t port,
+int rte_pmd_i40e_reset_vf_stats(uint16_t port,
 				uint16_t vf_id);
 
 /**
@@ -434,7 +626,7 @@ int rte_pmd_i40e_reset_vf_stats(uint8_t port,
  *   - (-EINVAL) if bad parameter.
  *   - (-ENOTSUP) not supported by firmware.
  */
-int rte_pmd_i40e_set_vf_max_bw(uint8_t port,
+int rte_pmd_i40e_set_vf_max_bw(uint16_t port,
 			       uint16_t vf_id,
 			       uint32_t bw);
 
@@ -459,7 +651,7 @@ int rte_pmd_i40e_set_vf_max_bw(uint8_t port,
  *   - (-EINVAL) if bad parameter.
  *   - (-ENOTSUP) not supported by firmware.
  */
-int rte_pmd_i40e_set_vf_tc_bw_alloc(uint8_t port,
+int rte_pmd_i40e_set_vf_tc_bw_alloc(uint16_t port,
 				    uint16_t vf_id,
 				    uint8_t tc_num,
 				    uint8_t *bw_weight);
@@ -484,7 +676,7 @@ int rte_pmd_i40e_set_vf_tc_bw_alloc(uint8_t port,
  *   - (-EINVAL) if bad parameter.
  *   - (-ENOTSUP) not supported by firmware.
  */
-int rte_pmd_i40e_set_vf_tc_max_bw(uint8_t port,
+int rte_pmd_i40e_set_vf_tc_max_bw(uint16_t port,
 				  uint16_t vf_id,
 				  uint8_t tc_no,
 				  uint32_t bw);
@@ -502,7 +694,7 @@ int rte_pmd_i40e_set_vf_tc_max_bw(uint8_t port,
  *   - (-EINVAL) if bad parameter.
  *   - (-ENOTSUP) not supported by firmware.
  */
-int rte_pmd_i40e_set_tc_strict_prio(uint8_t port, uint8_t tc_map);
+int rte_pmd_i40e_set_tc_strict_prio(uint16_t port, uint8_t tc_map);
 
 /**
  * Load/Unload a ddp package
@@ -523,7 +715,7 @@ int rte_pmd_i40e_set_tc_strict_prio(uint8_t port, uint8_t tc_map);
  *   - (-EACCES) if profile does not exist.
  *   - (-ENOTSUP) if operation not supported.
  */
-int rte_pmd_i40e_process_ddp_package(uint8_t port, uint8_t *buff,
+int rte_pmd_i40e_process_ddp_package(uint16_t port, uint8_t *buff,
 				     uint32_t size,
 				     enum rte_pmd_i40e_package_op op);
 
@@ -561,7 +753,7 @@ int rte_pmd_i40e_get_ddp_info(uint8_t *pkg, uint32_t pkg_size,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_i40e_get_ddp_list(uint8_t port, uint8_t *buff, uint32_t size);
+int rte_pmd_i40e_get_ddp_list(uint16_t port, uint8_t *buff, uint32_t size);
 
 /**
  * Update hardware defined ptype to software defined packet type
@@ -581,7 +773,7 @@ int rte_pmd_i40e_get_ddp_list(uint8_t port, uint8_t *buff, uint32_t size);
  *	set other PTYPEs maps to PTYPE_UNKNOWN.
  */
 int rte_pmd_i40e_ptype_mapping_update(
-			uint8_t port,
+			uint16_t port,
 			struct rte_pmd_i40e_ptype_mapping *mapping_items,
 			uint16_t count,
 			uint8_t exclusive);
@@ -593,7 +785,7 @@ int rte_pmd_i40e_ptype_mapping_update(
  * @param port
  *    pointer to port identifier of the device
  */
-int rte_pmd_i40e_ptype_mapping_reset(uint8_t port);
+int rte_pmd_i40e_ptype_mapping_reset(uint16_t port);
 
 /**
  * Get hardware defined ptype to software defined ptype
@@ -612,7 +804,7 @@ int rte_pmd_i40e_ptype_mapping_reset(uint8_t port);
  *    -(!0) only return mapping items which packet_type != RTE_PTYPE_UNKNOWN.
  */
 int rte_pmd_i40e_ptype_mapping_get(
-			uint8_t port,
+			uint16_t port,
 			struct rte_pmd_i40e_ptype_mapping *mapping_items,
 			uint16_t size,
 			uint16_t *count,
@@ -632,9 +824,113 @@ int rte_pmd_i40e_ptype_mapping_get(
  * @param pkt_type
  *    the new packet type to overwrite
  */
-int rte_pmd_i40e_ptype_mapping_replace(uint8_t port,
+int rte_pmd_i40e_ptype_mapping_replace(uint16_t port,
 				       uint32_t target,
 				       uint8_t mask,
 				       uint32_t pkt_type);
 
+/**
+ * Add a VF MAC address.
+ *
+ * Add more MAC address for VF. The existing MAC addresses
+ * are still effective.
+ *
+ * @param port
+ *   The port identifier of the Ethernet device.
+ * @param vf_id
+ *   VF id.
+ * @param mac_addr
+ *   VF MAC address.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port* invalid.
+ *   - (-EINVAL) if *vf* or *mac_addr* is invalid.
+ */
+int rte_pmd_i40e_add_vf_mac_addr(uint16_t port, uint16_t vf_id,
+				 struct ether_addr *mac_addr);
+
+#define RTE_PMD_I40E_PCTYPE_MAX		64
+#define RTE_PMD_I40E_FLOW_TYPE_MAX	64
+
+struct rte_pmd_i40e_flow_type_mapping {
+	uint16_t flow_type; /**< software defined flow type*/
+	uint64_t pctype;    /**< hardware defined pctype */
+};
+
+/**
+ * Update hardware defined pctype to software defined flow type
+ * mapping table.
+ *
+ * @param port
+ *    pointer to port identifier of the device.
+ * @param mapping_items
+ *    the base address of the mapping items array.
+ * @param count
+ *    number of mapping items.
+ * @param exclusive
+ *    the flag indicate different pctype mapping update method.
+ *    -(0) only overwrite referred PCTYPE mapping,
+ *	keep other PCTYPEs mapping unchanged.
+ *    -(!0) overwrite referred PCTYPE mapping,
+ *	set other PCTYPEs maps to PCTYPE_INVALID.
+ */
+int rte_pmd_i40e_flow_type_mapping_update(
+			uint16_t port,
+			struct rte_pmd_i40e_flow_type_mapping *mapping_items,
+			uint16_t count,
+			uint8_t exclusive);
+
+/**
+ * Get software defined flow type to hardware defined pctype
+ * mapping items.
+ *
+ * @param port
+ *    pointer to port identifier of the device.
+ * @param mapping_items
+ *    the base address of the array to store returned items.
+ *    array should be allocated by caller with minimum size of
+ *    RTE_PMD_I40E_FLOW_TYPE_MAX items
+ */
+int rte_pmd_i40e_flow_type_mapping_get(
+			uint16_t port,
+			struct rte_pmd_i40e_flow_type_mapping *mapping_items);
+
+/**
+ * Reset hardware defined pctype to software defined flow type
+ * mapping table to default.
+ *
+ * @param port
+ *    pointer to port identifier of the device
+ */
+int rte_pmd_i40e_flow_type_mapping_reset(uint16_t port);
+
+/**
+ * On the PF, find VF index based on VF MAC address
+ *
+ * @param port
+ *    pointer to port identifier of the device
+ * @param vf_mac
+ *    the mac address of the vf to determine index of
+ * @return
+ *    The index of vfid If successful.
+ *    -EINVAL: vf mac address does not exist for this port
+ *    -ENOTSUP: i40e not supported for this port.
+ */
+int rte_pmd_i40e_query_vfid_by_mac(uint16_t port,
+					const struct ether_addr *vf_mac);
+
+/**
+ * Do RSS queue region configuration for that port as
+ * the command option type
+ *
+ * @param port_id
+ *    The port identifier of the Ethernet device.
+ * @param op_type
+ *    Queue region operation type
+ * @param arg
+ *    Queue region operation type specific data
+ */
+int rte_pmd_i40e_rss_queue_region_conf(uint16_t port_id,
+			enum rte_pmd_i40e_queue_region_op op_type, void *arg);
+
 #endif /* _PMD_I40E_H_ */
diff --git a/drivers/net/i40e/rte_pmd_i40e_version.map b/drivers/net/i40e/rte_pmd_i40e_version.map
index 20cc9801..ebbd24e0 100644
--- a/drivers/net/i40e/rte_pmd_i40e_version.map
+++ b/drivers/net/i40e/rte_pmd_i40e_version.map
@@ -45,3 +45,16 @@ DPDK_17.08 {
 	rte_pmd_i40e_get_ddp_info;
 
 } DPDK_17.05;
+
+DPDK_17.11 {
+	global:
+
+	rte_pmd_i40e_add_vf_mac_addr;
+	rte_pmd_i40e_flow_add_del_packet_template;
+	rte_pmd_i40e_flow_type_mapping_update;
+	rte_pmd_i40e_flow_type_mapping_get;
+	rte_pmd_i40e_flow_type_mapping_reset;
+	rte_pmd_i40e_query_vfid_by_mac;
+	rte_pmd_i40e_rss_queue_region_conf;
+
+} DPDK_17.08;
diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile
index 5e57cb35..511a64eb 100644
--- a/drivers/net/ixgbe/Makefile
+++ b/drivers/net/ixgbe/Makefile
@@ -41,7 +41,7 @@ CFLAGS += $(WERROR_FLAGS)
 
 EXPORT_MAP := rte_pmd_ixgbe_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
 #
@@ -82,12 +82,15 @@ endif
 endif
 
 endif
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
+LDLIBS += -lrte_bus_pci
 
 #
 # Add extra flags for base driver files (also known as shared code)
 # to disable warnings in them
 #
-BASE_DRIVER_OBJS=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))
+BASE_DRIVER_OBJS=$(sort $(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))))
 $(foreach obj, $(BASE_DRIVER_OBJS), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
 
 VPATH += $(SRCDIR)/base
@@ -118,11 +121,13 @@ SRCS-$(CONFIG_RTE_IXGBE_INC_VECTOR) += ixgbe_rxtx_vec_neon.c
 else
 SRCS-$(CONFIG_RTE_IXGBE_INC_VECTOR) += ixgbe_rxtx_vec_sse.c
 endif
-
 ifeq ($(CONFIG_RTE_LIBRTE_IXGBE_BYPASS),y)
 SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_bypass.c
 SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_82599_bypass.c
 endif
+ifeq ($(CONFIG_RTE_LIBRTE_SECURITY),y)
+SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_ipsec.c
+endif
 SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += rte_pmd_ixgbe.c
 SRCS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe_tm.c
 
diff --git a/drivers/net/ixgbe/base/ixgbe_osdep.h b/drivers/net/ixgbe/base/ixgbe_osdep.h
index 4aab278d..bb5dfd2a 100644
--- a/drivers/net/ixgbe/base/ixgbe_osdep.h
+++ b/drivers/net/ixgbe/base/ixgbe_osdep.h
@@ -161,4 +161,12 @@ static inline uint32_t ixgbe_read_addr(volatile void* addr)
 #define IXGBE_WRITE_REG_ARRAY(hw, reg, index, value) \
 	IXGBE_PCI_REG_WRITE(IXGBE_PCI_REG_ARRAY_ADDR((hw), (reg), (index)), (value))
 
+#define IXGBE_WRITE_REG_THEN_POLL_MASK(hw, reg, val, mask, poll_ms)	\
+do {									\
+	uint32_t cnt = poll_ms;						\
+	IXGBE_WRITE_REG(hw, (reg), (val));				\
+	while (((IXGBE_READ_REG(hw, (reg))) & (mask)) && (cnt--))	\
+		rte_delay_ms(1);					\
+} while (0)
+
 #endif /* _IXGBE_OS_H_ */
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index 22171d86..ff19a564 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -48,10 +48,10 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
 #include <rte_alarm.h>
 #include <rte_ether.h>
@@ -61,6 +61,9 @@
 #include <rte_random.h>
 #include <rte_dev.h>
 #include <rte_hash_crc.h>
+#ifdef RTE_LIBRTE_SECURITY
+#include <rte_security_driver.h>
+#endif
 
 #include "ixgbe_logs.h"
 #include "base/ixgbe_api.h"
@@ -169,13 +172,14 @@ static void ixgbe_dev_stop(struct rte_eth_dev *dev);
 static int  ixgbe_dev_set_link_up(struct rte_eth_dev *dev);
 static int  ixgbe_dev_set_link_down(struct rte_eth_dev *dev);
 static void ixgbe_dev_close(struct rte_eth_dev *dev);
+static int  ixgbe_dev_reset(struct rte_eth_dev *dev);
 static void ixgbe_dev_promiscuous_enable(struct rte_eth_dev *dev);
 static void ixgbe_dev_promiscuous_disable(struct rte_eth_dev *dev);
 static void ixgbe_dev_allmulticast_enable(struct rte_eth_dev *dev);
 static void ixgbe_dev_allmulticast_disable(struct rte_eth_dev *dev);
 static int ixgbe_dev_link_update(struct rte_eth_dev *dev,
 				int wait_to_complete);
-static void ixgbe_dev_stats_get(struct rte_eth_dev *dev,
+static int ixgbe_dev_stats_get(struct rte_eth_dev *dev,
 				struct rte_eth_stats *stats);
 static int ixgbe_dev_xstats_get(struct rte_eth_dev *dev,
 				struct rte_eth_xstat *xstats, unsigned n);
@@ -218,7 +222,7 @@ static void ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev,
 		uint16_t queue, bool on);
 static void ixgbe_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue,
 		int on);
-static void ixgbe_vlan_offload_set(struct rte_eth_dev *dev, int mask);
+static int ixgbe_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 static void ixgbe_vlan_hw_strip_enable(struct rte_eth_dev *dev, uint16_t queue);
 static void ixgbe_vlan_hw_strip_disable(struct rte_eth_dev *dev, uint16_t queue);
 static void ixgbe_vlan_hw_extend_enable(struct rte_eth_dev *dev);
@@ -265,16 +269,17 @@ static int ixgbevf_dev_link_update(struct rte_eth_dev *dev,
 				   int wait_to_complete);
 static void ixgbevf_dev_stop(struct rte_eth_dev *dev);
 static void ixgbevf_dev_close(struct rte_eth_dev *dev);
+static int  ixgbevf_dev_reset(struct rte_eth_dev *dev);
 static void ixgbevf_intr_disable(struct ixgbe_hw *hw);
 static void ixgbevf_intr_enable(struct ixgbe_hw *hw);
-static void ixgbevf_dev_stats_get(struct rte_eth_dev *dev,
+static int ixgbevf_dev_stats_get(struct rte_eth_dev *dev,
 		struct rte_eth_stats *stats);
 static void ixgbevf_dev_stats_reset(struct rte_eth_dev *dev);
 static int ixgbevf_vlan_filter_set(struct rte_eth_dev *dev,
 		uint16_t vlan_id, int on);
 static void ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev,
 		uint16_t queue, int on);
-static void ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask);
+static int ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on);
 static int ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
 					    uint16_t queue_id);
@@ -518,6 +523,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = {
 	.dev_set_link_up    = ixgbe_dev_set_link_up,
 	.dev_set_link_down  = ixgbe_dev_set_link_down,
 	.dev_close            = ixgbe_dev_close,
+	.dev_reset	      = ixgbe_dev_reset,
 	.promiscuous_enable   = ixgbe_dev_promiscuous_enable,
 	.promiscuous_disable  = ixgbe_dev_promiscuous_disable,
 	.allmulticast_enable  = ixgbe_dev_allmulticast_enable,
@@ -608,6 +614,7 @@ static const struct eth_dev_ops ixgbevf_eth_dev_ops = {
 	.xstats_reset         = ixgbevf_dev_stats_reset,
 	.xstats_get_names     = ixgbevf_dev_xstats_get_names,
 	.dev_close            = ixgbevf_dev_close,
+	.dev_reset	      = ixgbevf_dev_reset,
 	.allmulticast_enable  = ixgbevf_dev_allmulticast_enable,
 	.allmulticast_disable = ixgbevf_dev_allmulticast_disable,
 	.dev_infos_get        = ixgbevf_dev_info_get,
@@ -1163,8 +1170,14 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
 		return 0;
 	}
 
+#ifdef RTE_LIBRTE_SECURITY
+	/* Initialize security_ctx only for primary process*/
+	eth_dev->security_ctx = ixgbe_ipsec_ctx_create(eth_dev);
+	if (eth_dev->security_ctx == NULL)
+		return -ENOMEM;
+#endif
+
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	/* Vendor and Device ID need to be set before init of shared code */
 	hw->device_id = pci_dev->id.device_id;
@@ -1332,12 +1345,8 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev)
 	/* initialize l2 tunnel filter list & hash */
 	ixgbe_l2_tn_filter_init(eth_dev);
 
-	TAILQ_INIT(&filter_ntuple_list);
-	TAILQ_INIT(&filter_ethertype_list);
-	TAILQ_INIT(&filter_syn_list);
-	TAILQ_INIT(&filter_fdir_list);
-	TAILQ_INIT(&filter_l2_tunnel_list);
-	TAILQ_INIT(&ixgbe_flow_list);
+	/* initialize flow filter lists */
+	ixgbe_filterlist_init();
 
 	/* initialize bandwidth configuration info */
 	memset(bw_conf, 0, sizeof(struct ixgbe_bw_conf));
@@ -1401,6 +1410,10 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
 	/* Remove all Traffic Manager configuration */
 	ixgbe_tm_conf_uninit(eth_dev);
 
+#ifdef RTE_LIBRTE_SECURITY
+	rte_free(eth_dev->security_ctx);
+#endif
+
 	return 0;
 }
 
@@ -1627,7 +1640,6 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
 	}
 
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	hw->device_id = pci_dev->id.device_id;
 	hw->vendor_id = pci_dev->id.vendor_id;
@@ -1781,7 +1793,8 @@ static int eth_ixgbe_pci_remove(struct rte_pci_device *pci_dev)
 
 static struct rte_pci_driver rte_ixgbe_pmd = {
 	.id_table = pci_id_ixgbe_map,
-	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+		     RTE_PCI_DRV_IOVA_AS_VA,
 	.probe = eth_ixgbe_pci_probe,
 	.remove = eth_ixgbe_pci_remove,
 };
@@ -1803,7 +1816,7 @@ static int eth_ixgbevf_pci_remove(struct rte_pci_device *pci_dev)
  */
 static struct rte_pci_driver rte_ixgbevf_pmd = {
 	.id_table = pci_id_ixgbevf_map,
-	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_IOVA_AS_VA,
 	.probe = eth_ixgbevf_pci_probe,
 	.remove = eth_ixgbevf_pci_remove,
 };
@@ -1959,9 +1972,9 @@ ixgbe_vlan_hw_strip_bitmap_set(struct rte_eth_dev *dev, uint16_t queue, bool on)
 	rxq = dev->data->rx_queues[queue];
 
 	if (on)
-		rxq->vlan_flags = PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
+		rxq->vlan_flags = PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
 	else
-		rxq->vlan_flags = PKT_RX_VLAN_PKT;
+		rxq->vlan_flags = PKT_RX_VLAN;
 }
 
 static void
@@ -2125,7 +2138,7 @@ ixgbe_vlan_hw_extend_enable(struct rte_eth_dev *dev)
 	 */
 }
 
-static void
+static int
 ixgbe_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	if (mask & ETH_VLAN_STRIP_MASK) {
@@ -2148,6 +2161,8 @@ ixgbe_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 		else
 			ixgbe_vlan_hw_extend_disable(dev);
 	}
+
+	return 0;
 }
 
 static void
@@ -2504,8 +2519,9 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
 	*    - fixed speed: TODO implement
 	*/
 	if (dev->data->dev_conf.link_speeds & ETH_LINK_SPEED_FIXED) {
-		PMD_INIT_LOG(ERR, "Invalid link_speeds for port %hhu; fix speed not supported",
-			     dev->data->port_id);
+		PMD_INIT_LOG(ERR,
+		"Invalid link_speeds for port %u, fix speed not supported",
+				dev->data->port_id);
 		return -EINVAL;
 	}
 
@@ -2568,9 +2584,13 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
 		goto error;
 	}
 
-    mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK |
+	mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK |
 		ETH_VLAN_EXTEND_MASK;
-	ixgbe_vlan_offload_set(dev, mask);
+	err = ixgbe_vlan_offload_set(dev, mask);
+	if (err) {
+		PMD_INIT_LOG(ERR, "Unable to set VLAN offload");
+		goto error;
+	}
 
 	if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_ONLY) {
 		/* Enable vlan filtering for VMDq */
@@ -2842,7 +2862,7 @@ ixgbe_dev_set_link_down(struct rte_eth_dev *dev)
 }
 
 /*
- * Reest and stop device.
+ * Reset and stop device.
  */
 static void
 ixgbe_dev_close(struct rte_eth_dev *dev)
@@ -2865,6 +2885,32 @@ ixgbe_dev_close(struct rte_eth_dev *dev)
 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
 }
 
+/*
+ * Reset PF device.
+ */
+static int
+ixgbe_dev_reset(struct rte_eth_dev *dev)
+{
+	int ret;
+
+	/* When a DPDK PMD PF begin to reset PF port, it should notify all
+	 * its VF to make them align with it. The detailed notification
+	 * mechanism is PMD specific. As to ixgbe PF, it is rather complex.
+	 * To avoid unexpected behavior in VF, currently reset of PF with
+	 * SR-IOV activation is not supported. It might be supported later.
+	 */
+	if (dev->data->sriov.active)
+		return -ENOTSUP;
+
+	ret = eth_ixgbe_dev_uninit(dev);
+	if (ret)
+		return ret;
+
+	ret = eth_ixgbe_dev_init(dev);
+
+	return ret;
+}
+
 static void
 ixgbe_read_stats_registers(struct ixgbe_hw *hw,
 			   struct ixgbe_hw_stats *hw_stats,
@@ -3077,7 +3123,7 @@ ixgbe_read_stats_registers(struct ixgbe_hw *hw,
 /*
  * This function is based on ixgbe_update_stats_counters() in ixgbe/ixgbe.c
  */
-static void
+static int
 ixgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct ixgbe_hw *hw =
@@ -3099,7 +3145,7 @@ ixgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 			&total_qbrc, &total_qprc, &total_qprdc);
 
 	if (stats == NULL)
-		return;
+		return -EINVAL;
 
 	/* Fill out the rte_eth_stats statistics structure */
 	stats->ipackets = total_qprc;
@@ -3130,6 +3176,7 @@ ixgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 	/* Tx Errors */
 	stats->oerrors  = 0;
+	return 0;
 }
 
 static void
@@ -3541,7 +3588,7 @@ ixgbevf_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 	return IXGBEVF_NB_XSTATS;
 }
 
-static void
+static int
 ixgbevf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct ixgbevf_hw_stats *hw_stats = (struct ixgbevf_hw_stats *)
@@ -3550,12 +3597,13 @@ ixgbevf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	ixgbevf_update_stats(dev);
 
 	if (stats == NULL)
-		return;
+		return -EINVAL;
 
 	stats->ipackets = hw_stats->vfgprc;
 	stats->ibytes = hw_stats->vfgorc;
 	stats->opackets = hw_stats->vfgptc;
 	stats->obytes = hw_stats->vfgotc;
+	return 0;
 }
 
 static void
@@ -3665,6 +3713,11 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	    hw->mac.type == ixgbe_mac_X550EM_a)
 		dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
 
+#ifdef RTE_LIBRTE_SECURITY
+	dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_SECURITY;
+	dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
+#endif
+
 	dev_info->default_rxconf = (struct rte_eth_rxconf) {
 		.rx_thresh = {
 			.pthresh = IXGBE_DEFAULT_RX_PTHRESH,
@@ -3927,6 +3980,7 @@ ixgbe_dev_link_update_share(struct rte_eth_dev *dev,
 	link.link_status = ETH_LINK_DOWN;
 	link.link_speed = 0;
 	link.link_duplex = ETH_LINK_HALF_DUPLEX;
+	link.link_autoneg = ETH_LINK_AUTONEG;
 	memset(&old, 0, sizeof(old));
 	rte_ixgbe_dev_atomic_read_link_status(dev, &old);
 
@@ -4993,13 +5047,21 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
 	/* Set HW strip */
 	mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK |
 		ETH_VLAN_EXTEND_MASK;
-	ixgbevf_vlan_offload_set(dev, mask);
+	err = ixgbevf_vlan_offload_set(dev, mask);
+	if (err) {
+		PMD_INIT_LOG(ERR, "Unable to set VLAN offload (%d)", err);
+		ixgbe_dev_clear_queues(dev);
+		return err;
+	}
 
 	ixgbevf_dev_rxtx_start(dev);
 
 	/* check and configure queue intr-vector mapping */
 	if (dev->data->dev_conf.intr_conf.rxq != 0) {
-		intr_vector = dev->data->nb_rx_queues;
+		/* According to datasheet, only vector 0/1/2 can be used,
+		 * now only one vector is used for Rx queue
+		 */
+		intr_vector = 1;
 		if (rte_intr_efd_enable(intr_handle, intr_vector))
 			return -1;
 	}
@@ -5016,6 +5078,15 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
 	}
 	ixgbevf_configure_msix(dev);
 
+	/* When a VF port is bound to VFIO-PCI, only miscellaneous interrupt
+	 * is mapped to VFIO vector 0 in eth_ixgbevf_dev_init( ).
+	 * If previous VFIO interrupt mapping setting in eth_ixgbevf_dev_init( )
+	 * is not cleared, it will fail when following rte_intr_enable( ) tries
+	 * to map Rx queue interrupt to other VFIO vectors.
+	 * So clear uio/vfio intr/evevnfd first to avoid failure.
+	 */
+	rte_intr_disable(intr_handle);
+
 	rte_intr_enable(intr_handle);
 
 	/* Re-enable interrupt for VF */
@@ -5078,6 +5149,23 @@ ixgbevf_dev_close(struct rte_eth_dev *dev)
 	ixgbevf_remove_mac_addr(dev, 0);
 }
 
+/*
+ * Reset VF device
+ */
+static int
+ixgbevf_dev_reset(struct rte_eth_dev *dev)
+{
+	int ret;
+
+	ret = eth_ixgbevf_dev_uninit(dev);
+	if (ret)
+		return ret;
+
+	ret = eth_ixgbevf_dev_init(dev);
+
+	return ret;
+}
+
 static void ixgbevf_set_vfta_all(struct rte_eth_dev *dev, bool on)
 {
 	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -5153,7 +5241,7 @@ ixgbevf_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
 	ixgbe_vlan_hw_strip_bitmap_set(dev, queue, on);
 }
 
-static void
+static int
 ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	struct ixgbe_hw *hw =
@@ -5168,6 +5256,8 @@ ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 		for (i = 0; i < hw->mac.max_rx_queues; i++)
 			ixgbevf_vlan_strip_queue_set(dev, i, on);
 	}
+
+	return 0;
 }
 
 int
@@ -5450,13 +5540,13 @@ ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
 	IXGBE_WRITE_REG(hw, IXGBE_MRCTL(rule_id), mr_ctl);
 
 	/* write pool mirrror control  register */
-	if (mirror_conf->rule_type == ETH_MIRROR_VIRTUAL_POOL_UP) {
+	if (mirror_conf->rule_type & ETH_MIRROR_VIRTUAL_POOL_UP) {
 		IXGBE_WRITE_REG(hw, IXGBE_VMRVM(rule_id), mp_lsb);
 		IXGBE_WRITE_REG(hw, IXGBE_VMRVM(rule_id + rule_mr_offset),
 				mp_msb);
 	}
 	/* write VLAN mirrror control  register */
-	if (mirror_conf->rule_type == ETH_MIRROR_VLAN) {
+	if (mirror_conf->rule_type & ETH_MIRROR_VLAN) {
 		IXGBE_WRITE_REG(hw, IXGBE_VMRVLAN(rule_id), mv_lsb);
 		IXGBE_WRITE_REG(hw, IXGBE_VMRVLAN(rule_id + rule_mr_offset),
 				mv_msb);
@@ -5509,9 +5599,12 @@ ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 	uint32_t mask;
 	struct ixgbe_hw *hw =
 		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	uint32_t vec = IXGBE_MISC_VEC_ID;
 
 	mask = IXGBE_READ_REG(hw, IXGBE_VTEIMS);
-	mask |= (1 << IXGBE_MISC_VEC_ID);
+	if (rte_intr_allow_others(intr_handle))
+		vec = IXGBE_RX_VEC_START;
+	mask |= (1 << vec);
 	RTE_SET_USED(queue_id);
 	IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask);
 
@@ -5526,9 +5619,14 @@ ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
 	uint32_t mask;
 	struct ixgbe_hw *hw =
 		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+	uint32_t vec = IXGBE_MISC_VEC_ID;
 
 	mask = IXGBE_READ_REG(hw, IXGBE_VTEIMS);
-	mask &= ~(1 << IXGBE_MISC_VEC_ID);
+	if (rte_intr_allow_others(intr_handle))
+		vec = IXGBE_RX_VEC_START;
+	mask &= ~(1 << vec);
 	RTE_SET_USED(queue_id);
 	IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask);
 
@@ -5670,6 +5768,7 @@ ixgbevf_configure_msix(struct rte_eth_dev *dev)
 		IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	uint32_t q_idx;
 	uint32_t vector_idx = IXGBE_MISC_VEC_ID;
+	uint32_t base = IXGBE_MISC_VEC_ID;
 
 	/* Configure VF other cause ivar */
 	ixgbevf_set_ivar_map(hw, -1, 1, vector_idx);
@@ -5680,6 +5779,11 @@ ixgbevf_configure_msix(struct rte_eth_dev *dev)
 	if (!rte_intr_dp_is_en(intr_handle))
 		return;
 
+	if (rte_intr_allow_others(intr_handle)) {
+		base = IXGBE_RX_VEC_START;
+		vector_idx = IXGBE_RX_VEC_START;
+	}
+
 	/* Configure all RX queues of VF */
 	for (q_idx = 0; q_idx < dev->data->nb_rx_queues; q_idx++) {
 		/* Force all queue use vector 0,
@@ -5687,6 +5791,8 @@ ixgbevf_configure_msix(struct rte_eth_dev *dev)
 		 */
 		ixgbevf_set_ivar_map(hw, 0, q_idx, vector_idx);
 		intr_handle->intr_vec[q_idx] = vector_idx;
+		if (vector_idx < base + intr_handle->nb_efd - 1)
+			vector_idx++;
 	}
 }
 
@@ -6313,7 +6419,7 @@ ixgbe_add_del_ntuple_filter(struct rte_eth_dev *dev,
 				sizeof(struct ixgbe_5tuple_filter), 0);
 		if (filter == NULL)
 			return -ENOMEM;
-		(void)rte_memcpy(&filter->filter_info,
+		rte_memcpy(&filter->filter_info,
 				 &filter_5tuple,
 				 sizeof(struct ixgbe_5tuple_filter_info));
 		filter->queue = ntuple_filter->queue;
@@ -7153,6 +7259,8 @@ ixgbe_dev_get_dcb_info(struct rte_eth_dev *dev,
 	struct ixgbe_dcb_config *dcb_config =
 			IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
 	struct ixgbe_dcb_tc_config *tc;
+	struct rte_eth_dcb_tc_queue_mapping *tc_queue;
+	uint8_t nb_tcs;
 	uint8_t i, j;
 
 	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_DCB_FLAG)
@@ -7160,19 +7268,31 @@ ixgbe_dev_get_dcb_info(struct rte_eth_dev *dev,
 	else
 		dcb_info->nb_tcs = 1;
 
+	tc_queue = &dcb_info->tc_queue;
+	nb_tcs = dcb_info->nb_tcs;
+
 	if (dcb_config->vt_mode) { /* vt is enabled*/
 		struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
 				&dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
 		for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
 			dcb_info->prio_tc[i] = vmdq_rx_conf->dcb_tc[i];
-		for (i = 0; i < vmdq_rx_conf->nb_queue_pools; i++) {
-			for (j = 0; j < dcb_info->nb_tcs; j++) {
-				dcb_info->tc_queue.tc_rxq[i][j].base =
-						i * dcb_info->nb_tcs + j;
-				dcb_info->tc_queue.tc_rxq[i][j].nb_queue = 1;
-				dcb_info->tc_queue.tc_txq[i][j].base =
-						i * dcb_info->nb_tcs + j;
-				dcb_info->tc_queue.tc_txq[i][j].nb_queue = 1;
+		if (RTE_ETH_DEV_SRIOV(dev).active > 0) {
+			for (j = 0; j < nb_tcs; j++) {
+				tc_queue->tc_rxq[0][j].base = j;
+				tc_queue->tc_rxq[0][j].nb_queue = 1;
+				tc_queue->tc_txq[0][j].base = j;
+				tc_queue->tc_txq[0][j].nb_queue = 1;
+			}
+		} else {
+			for (i = 0; i < vmdq_rx_conf->nb_queue_pools; i++) {
+				for (j = 0; j < nb_tcs; j++) {
+					tc_queue->tc_rxq[i][j].base =
+						i * nb_tcs + j;
+					tc_queue->tc_rxq[i][j].nb_queue = 1;
+					tc_queue->tc_txq[i][j].base =
+						i * nb_tcs + j;
+					tc_queue->tc_txq[i][j].nb_queue = 1;
+				}
 			}
 		}
 	} else { /* vt is disabled*/
@@ -7529,7 +7649,7 @@ ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev,
 		if (!node)
 			return -ENOMEM;
 
-		(void)rte_memcpy(&node->key,
+		rte_memcpy(&node->key,
 				 &key,
 				 sizeof(struct ixgbe_l2_tn_key));
 		node->pool = l2_tunnel->pool;
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h
index caa50c8b..51ddcfd4 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.h
+++ b/drivers/net/ixgbe/ixgbe_ethdev.h
@@ -38,9 +38,13 @@
 #include "base/ixgbe_dcb_82599.h"
 #include "base/ixgbe_dcb_82598.h"
 #include "ixgbe_bypass.h"
+#ifdef RTE_LIBRTE_SECURITY
+#include "ixgbe_ipsec.h"
+#endif
 #include <rte_time.h>
 #include <rte_hash.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_tm_driver.h>
 
 /* need update link, bit flag */
@@ -364,49 +368,6 @@ struct rte_flow {
 	enum rte_filter_type filter_type;
 	void *rule;
 };
-/* ntuple filter list structure */
-struct ixgbe_ntuple_filter_ele {
-	TAILQ_ENTRY(ixgbe_ntuple_filter_ele) entries;
-	struct rte_eth_ntuple_filter filter_info;
-};
-/* ethertype filter list structure */
-struct ixgbe_ethertype_filter_ele {
-	TAILQ_ENTRY(ixgbe_ethertype_filter_ele) entries;
-	struct rte_eth_ethertype_filter filter_info;
-};
-/* syn filter list structure */
-struct ixgbe_eth_syn_filter_ele {
-	TAILQ_ENTRY(ixgbe_eth_syn_filter_ele) entries;
-	struct rte_eth_syn_filter filter_info;
-};
-/* fdir filter list structure */
-struct ixgbe_fdir_rule_ele {
-	TAILQ_ENTRY(ixgbe_fdir_rule_ele) entries;
-	struct ixgbe_fdir_rule filter_info;
-};
-/* l2_tunnel filter list structure */
-struct ixgbe_eth_l2_tunnel_conf_ele {
-	TAILQ_ENTRY(ixgbe_eth_l2_tunnel_conf_ele) entries;
-	struct rte_eth_l2_tunnel_conf filter_info;
-};
-/* ixgbe_flow memory list structure */
-struct ixgbe_flow_mem {
-	TAILQ_ENTRY(ixgbe_flow_mem) entries;
-	struct rte_flow *flow;
-};
-
-TAILQ_HEAD(ixgbe_ntuple_filter_list, ixgbe_ntuple_filter_ele);
-struct ixgbe_ntuple_filter_list filter_ntuple_list;
-TAILQ_HEAD(ixgbe_ethertype_filter_list, ixgbe_ethertype_filter_ele);
-struct ixgbe_ethertype_filter_list filter_ethertype_list;
-TAILQ_HEAD(ixgbe_syn_filter_list, ixgbe_eth_syn_filter_ele);
-struct ixgbe_syn_filter_list filter_syn_list;
-TAILQ_HEAD(ixgbe_fdir_rule_filter_list, ixgbe_fdir_rule_ele);
-struct ixgbe_fdir_rule_filter_list filter_fdir_list;
-TAILQ_HEAD(ixgbe_l2_tunnel_filter_list, ixgbe_eth_l2_tunnel_conf_ele);
-struct ixgbe_l2_tunnel_filter_list filter_l2_tunnel_list;
-TAILQ_HEAD(ixgbe_flow_mem_list, ixgbe_flow_mem);
-struct ixgbe_flow_mem_list ixgbe_flow_list;
 
 /*
  * Statistics counters collected by the MACsec
@@ -529,7 +490,9 @@ struct ixgbe_adapter {
 	struct ixgbe_filter_info    filter;
 	struct ixgbe_l2_tn_info     l2_tn;
 	struct ixgbe_bw_conf        bw_conf;
-
+#ifdef RTE_LIBRTE_SECURITY
+	struct ixgbe_ipsec          ipsec;
+#endif
 	bool rx_bulk_alloc_allowed;
 	bool rx_vec_allowed;
 	struct rte_timecounter      systime_tc;
@@ -586,6 +549,9 @@ struct ixgbe_adapter {
 #define IXGBE_DEV_PRIVATE_TO_TM_CONF(adapter) \
 	(&((struct ixgbe_adapter *)adapter)->tm_conf)
 
+#define IXGBE_DEV_PRIVATE_TO_IPSEC(adapter)\
+	(&((struct ixgbe_adapter *)adapter)->ipsec)
+
 /*
  * RX/TX function prototypes
  */
@@ -692,6 +658,7 @@ ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev,
 int
 ixgbe_dev_l2_tunnel_filter_del(struct rte_eth_dev *dev,
 			       struct rte_eth_l2_tunnel_conf *l2_tunnel);
+void ixgbe_filterlist_init(void);
 void ixgbe_filterlist_flush(void);
 /*
  * Flow director function prototypes
diff --git a/drivers/net/ixgbe/ixgbe_fdir.c b/drivers/net/ixgbe/ixgbe_fdir.c
index eb2d5581..9281dc1a 100644
--- a/drivers/net/ixgbe/ixgbe_fdir.c
+++ b/drivers/net/ixgbe/ixgbe_fdir.c
@@ -1276,7 +1276,8 @@ ixgbe_fdir_filter_program(struct rte_eth_dev *dev,
 	     rule->ixgbe_fdir.formatted.flow_type ==
 	     IXGBE_ATR_FLOW_TYPE_IPV6) &&
 	    (info->mask.src_port_mask != 0 ||
-	     info->mask.dst_port_mask != 0)) {
+	     info->mask.dst_port_mask != 0) &&
+	     rule->mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
 		PMD_DRV_LOG(ERR, "By this device,"
 			    " IPv4 is not supported without"
 			    " L4 protocol and ports masked!");
@@ -1347,7 +1348,7 @@ ixgbe_fdir_filter_program(struct rte_eth_dev *dev,
 				   0);
 		if (!node)
 			return -ENOMEM;
-		(void)rte_memcpy(&node->ixgbe_fdir,
+		rte_memcpy(&node->ixgbe_fdir,
 				 &rule->ixgbe_fdir,
 				 sizeof(union ixgbe_atr_input));
 		node->fdirflags = fdircmd_flags;
diff --git a/drivers/net/ixgbe/ixgbe_flow.c b/drivers/net/ixgbe/ixgbe_flow.c
index d6796088..19c2d479 100644
--- a/drivers/net/ixgbe/ixgbe_flow.c
+++ b/drivers/net/ixgbe/ixgbe_flow.c
@@ -51,7 +51,6 @@
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_eal.h>
 #include <rte_alarm.h>
 #include <rte_ether.h>
@@ -79,6 +78,51 @@
 #define IXGBE_MAX_N_TUPLE_PRIO 7
 #define IXGBE_MAX_FLX_SOURCE_OFF 62
 
+/* ntuple filter list structure */
+struct ixgbe_ntuple_filter_ele {
+	TAILQ_ENTRY(ixgbe_ntuple_filter_ele) entries;
+	struct rte_eth_ntuple_filter filter_info;
+};
+/* ethertype filter list structure */
+struct ixgbe_ethertype_filter_ele {
+	TAILQ_ENTRY(ixgbe_ethertype_filter_ele) entries;
+	struct rte_eth_ethertype_filter filter_info;
+};
+/* syn filter list structure */
+struct ixgbe_eth_syn_filter_ele {
+	TAILQ_ENTRY(ixgbe_eth_syn_filter_ele) entries;
+	struct rte_eth_syn_filter filter_info;
+};
+/* fdir filter list structure */
+struct ixgbe_fdir_rule_ele {
+	TAILQ_ENTRY(ixgbe_fdir_rule_ele) entries;
+	struct ixgbe_fdir_rule filter_info;
+};
+/* l2_tunnel filter list structure */
+struct ixgbe_eth_l2_tunnel_conf_ele {
+	TAILQ_ENTRY(ixgbe_eth_l2_tunnel_conf_ele) entries;
+	struct rte_eth_l2_tunnel_conf filter_info;
+};
+/* ixgbe_flow memory list structure */
+struct ixgbe_flow_mem {
+	TAILQ_ENTRY(ixgbe_flow_mem) entries;
+	struct rte_flow *flow;
+};
+
+TAILQ_HEAD(ixgbe_ntuple_filter_list, ixgbe_ntuple_filter_ele);
+TAILQ_HEAD(ixgbe_ethertype_filter_list, ixgbe_ethertype_filter_ele);
+TAILQ_HEAD(ixgbe_syn_filter_list, ixgbe_eth_syn_filter_ele);
+TAILQ_HEAD(ixgbe_fdir_rule_filter_list, ixgbe_fdir_rule_ele);
+TAILQ_HEAD(ixgbe_l2_tunnel_filter_list, ixgbe_eth_l2_tunnel_conf_ele);
+TAILQ_HEAD(ixgbe_flow_mem_list, ixgbe_flow_mem);
+
+static struct ixgbe_ntuple_filter_list filter_ntuple_list;
+static struct ixgbe_ethertype_filter_list filter_ethertype_list;
+static struct ixgbe_syn_filter_list filter_syn_list;
+static struct ixgbe_fdir_rule_filter_list filter_fdir_list;
+static struct ixgbe_l2_tunnel_filter_list filter_l2_tunnel_list;
+static struct ixgbe_flow_mem_list ixgbe_flow_list;
+
 /**
  * Endless loop will never happen with below assumption
  * 1. there is at least one no-void item(END)
@@ -142,6 +186,9 @@ const struct rte_flow_action *next_no_void_action(
  * END
  * other members in mask and spec should set to 0x00.
  * item->last should be NULL.
+ *
+ * Special case for flow action type RTE_FLOW_ACTION_TYPE_SECURITY.
+ *
  */
 static int
 cons_parse_ntuple_filter(const struct rte_flow_attr *attr,
@@ -181,6 +228,43 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr,
 		return -rte_errno;
 	}
 
+#ifdef RTE_LIBRTE_SECURITY
+	/**
+	 *  Special case for flow action type RTE_FLOW_ACTION_TYPE_SECURITY
+	 */
+	act = next_no_void_action(actions, NULL);
+	if (act->type == RTE_FLOW_ACTION_TYPE_SECURITY) {
+		const void *conf = act->conf;
+		/* check if the next not void item is END */
+		act = next_no_void_action(actions, act);
+		if (act->type != RTE_FLOW_ACTION_TYPE_END) {
+			memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
+			rte_flow_error_set(error, EINVAL,
+				RTE_FLOW_ERROR_TYPE_ACTION,
+				act, "Not supported action.");
+			return -rte_errno;
+		}
+
+		/* get the IP pattern*/
+		item = next_no_void_pattern(pattern, NULL);
+		while (item->type != RTE_FLOW_ITEM_TYPE_IPV4 &&
+				item->type != RTE_FLOW_ITEM_TYPE_IPV6) {
+			if (item->last ||
+					item->type == RTE_FLOW_ITEM_TYPE_END) {
+				rte_flow_error_set(error, EINVAL,
+					RTE_FLOW_ERROR_TYPE_ITEM,
+					item, "IP pattern missing.");
+				return -rte_errno;
+			}
+			item = next_no_void_pattern(pattern, item);
+		}
+
+		filter->proto = IPPROTO_ESP;
+		return ixgbe_crypto_add_ingress_sa_from_flow(conf, item->spec,
+					item->type == RTE_FLOW_ITEM_TYPE_IPV6);
+	}
+#endif
+
 	/* the first not void item can be MAC or IPv4 */
 	item = next_no_void_pattern(pattern, NULL);
 
@@ -474,6 +558,12 @@ ixgbe_parse_ntuple_filter(struct rte_eth_dev *dev,
 	if (ret)
 		return ret;
 
+#ifdef RTE_LIBRTE_SECURITY
+	/* ESP flow not really a flow*/
+	if (filter->proto == IPPROTO_ESP)
+		return 0;
+#endif
+
 	/* Ixgbe doesn't support tcp flags. */
 	if (filter->flags & RTE_NTUPLE_FLAGS_TCP_FLAG) {
 		memset(filter, 0, sizeof(struct rte_eth_ntuple_filter));
@@ -1004,7 +1094,7 @@ ixgbe_parse_syn_filter(struct rte_eth_dev *dev,
  * The first not void item can be E_TAG.
  * The next not void item must be END.
  * action:
- * The first not void action should be QUEUE.
+ * The first not void action should be VF or PF.
  * The next not void action should be END.
  * pattern example:
  * ITEM		Spec			Mask
@@ -1015,7 +1105,8 @@ ixgbe_parse_syn_filter(struct rte_eth_dev *dev,
  * item->last should be NULL.
  */
 static int
-cons_parse_l2_tn_filter(const struct rte_flow_attr *attr,
+cons_parse_l2_tn_filter(struct rte_eth_dev *dev,
+			const struct rte_flow_attr *attr,
 			const struct rte_flow_item pattern[],
 			const struct rte_flow_action actions[],
 			struct rte_eth_l2_tunnel_conf *filter,
@@ -1025,7 +1116,8 @@ cons_parse_l2_tn_filter(const struct rte_flow_attr *attr,
 	const struct rte_flow_item_e_tag *e_tag_spec;
 	const struct rte_flow_item_e_tag *e_tag_mask;
 	const struct rte_flow_action *act;
-	const struct rte_flow_action_queue *act_q;
+	const struct rte_flow_action_vf *act_vf;
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 
 	if (!pattern) {
 		rte_flow_error_set(error, EINVAL,
@@ -1133,9 +1225,10 @@ cons_parse_l2_tn_filter(const struct rte_flow_attr *attr,
 		return -rte_errno;
 	}
 
-	/* check if the first not void action is QUEUE. */
+	/* check if the first not void action is VF or PF. */
 	act = next_no_void_action(actions, NULL);
-	if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) {
+	if (act->type != RTE_FLOW_ACTION_TYPE_VF &&
+			act->type != RTE_FLOW_ACTION_TYPE_PF) {
 		memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf));
 		rte_flow_error_set(error, EINVAL,
 			RTE_FLOW_ERROR_TYPE_ACTION,
@@ -1143,8 +1236,12 @@ cons_parse_l2_tn_filter(const struct rte_flow_attr *attr,
 		return -rte_errno;
 	}
 
-	act_q = (const struct rte_flow_action_queue *)act->conf;
-	filter->pool = act_q->index;
+	if (act->type == RTE_FLOW_ACTION_TYPE_VF) {
+		act_vf = (const struct rte_flow_action_vf *)act->conf;
+		filter->pool = act_vf->id;
+	} else {
+		filter->pool = pci_dev->max_vfs;
+	}
 
 	/* check if the next not void item is END */
 	act = next_no_void_action(actions, act);
@@ -1169,8 +1266,10 @@ ixgbe_parse_l2_tn_filter(struct rte_eth_dev *dev,
 {
 	int ret = 0;
 	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	uint16_t vf_num;
 
-	ret = cons_parse_l2_tn_filter(attr, pattern,
+	ret = cons_parse_l2_tn_filter(dev, attr, pattern,
 				actions, l2_tn_filter, error);
 
 	if (hw->mac.type != ixgbe_mac_X550 &&
@@ -1183,7 +1282,9 @@ ixgbe_parse_l2_tn_filter(struct rte_eth_dev *dev,
 		return -rte_errno;
 	}
 
-	if (l2_tn_filter->pool >= dev->data->nb_rx_queues)
+	vf_num = pci_dev->max_vfs;
+
+	if (l2_tn_filter->pool > vf_num)
 		return -rte_errno;
 
 	return ret;
@@ -2600,6 +2701,17 @@ step_next:
 }
 
 void
+ixgbe_filterlist_init(void)
+{
+	TAILQ_INIT(&filter_ntuple_list);
+	TAILQ_INIT(&filter_ethertype_list);
+	TAILQ_INIT(&filter_syn_list);
+	TAILQ_INIT(&filter_fdir_list);
+	TAILQ_INIT(&filter_l2_tunnel_list);
+	TAILQ_INIT(&ixgbe_flow_list);
+}
+
+void
 ixgbe_filterlist_flush(void)
 {
 	struct ixgbe_ntuple_filter_ele *ntuple_filter_ptr;
@@ -2702,12 +2814,23 @@ ixgbe_flow_create(struct rte_eth_dev *dev,
 	memset(&ntuple_filter, 0, sizeof(struct rte_eth_ntuple_filter));
 	ret = ixgbe_parse_ntuple_filter(dev, attr, pattern,
 			actions, &ntuple_filter, error);
+
+#ifdef RTE_LIBRTE_SECURITY
+	/* ESP flow not really a flow*/
+	if (ntuple_filter.proto == IPPROTO_ESP)
+		return flow;
+#endif
+
 	if (!ret) {
 		ret = ixgbe_add_del_ntuple_filter(dev, &ntuple_filter, TRUE);
 		if (!ret) {
 			ntuple_filter_ptr = rte_zmalloc("ixgbe_ntuple_filter",
 				sizeof(struct ixgbe_ntuple_filter_ele), 0);
-			(void)rte_memcpy(&ntuple_filter_ptr->filter_info,
+			if (!ntuple_filter_ptr) {
+				PMD_DRV_LOG(ERR, "failed to allocate memory");
+				goto out;
+			}
+			rte_memcpy(&ntuple_filter_ptr->filter_info,
 				&ntuple_filter,
 				sizeof(struct rte_eth_ntuple_filter));
 			TAILQ_INSERT_TAIL(&filter_ntuple_list,
@@ -2729,7 +2852,11 @@ ixgbe_flow_create(struct rte_eth_dev *dev,
 			ethertype_filter_ptr = rte_zmalloc(
 				"ixgbe_ethertype_filter",
 				sizeof(struct ixgbe_ethertype_filter_ele), 0);
-			(void)rte_memcpy(&ethertype_filter_ptr->filter_info,
+			if (!ethertype_filter_ptr) {
+				PMD_DRV_LOG(ERR, "failed to allocate memory");
+				goto out;
+			}
+			rte_memcpy(&ethertype_filter_ptr->filter_info,
 				&ethertype_filter,
 				sizeof(struct rte_eth_ethertype_filter));
 			TAILQ_INSERT_TAIL(&filter_ethertype_list,
@@ -2749,7 +2876,11 @@ ixgbe_flow_create(struct rte_eth_dev *dev,
 		if (!ret) {
 			syn_filter_ptr = rte_zmalloc("ixgbe_syn_filter",
 				sizeof(struct ixgbe_eth_syn_filter_ele), 0);
-			(void)rte_memcpy(&syn_filter_ptr->filter_info,
+			if (!syn_filter_ptr) {
+				PMD_DRV_LOG(ERR, "failed to allocate memory");
+				goto out;
+			}
+			rte_memcpy(&syn_filter_ptr->filter_info,
 				&syn_filter,
 				sizeof(struct rte_eth_syn_filter));
 			TAILQ_INSERT_TAIL(&filter_syn_list,
@@ -2809,7 +2940,11 @@ ixgbe_flow_create(struct rte_eth_dev *dev,
 			if (!ret) {
 				fdir_rule_ptr = rte_zmalloc("ixgbe_fdir_filter",
 					sizeof(struct ixgbe_fdir_rule_ele), 0);
-				(void)rte_memcpy(&fdir_rule_ptr->filter_info,
+				if (!fdir_rule_ptr) {
+					PMD_DRV_LOG(ERR, "failed to allocate memory");
+					goto out;
+				}
+				rte_memcpy(&fdir_rule_ptr->filter_info,
 					&fdir_rule,
 					sizeof(struct ixgbe_fdir_rule));
 				TAILQ_INSERT_TAIL(&filter_fdir_list,
@@ -2842,7 +2977,11 @@ ixgbe_flow_create(struct rte_eth_dev *dev,
 		if (!ret) {
 			l2_tn_filter_ptr = rte_zmalloc("ixgbe_l2_tn_filter",
 				sizeof(struct ixgbe_eth_l2_tunnel_conf_ele), 0);
-			(void)rte_memcpy(&l2_tn_filter_ptr->filter_info,
+			if (!l2_tn_filter_ptr) {
+				PMD_DRV_LOG(ERR, "failed to allocate memory");
+				goto out;
+			}
+			rte_memcpy(&l2_tn_filter_ptr->filter_info,
 				&l2_tn_filter,
 				sizeof(struct rte_eth_l2_tunnel_conf));
 			TAILQ_INSERT_TAIL(&filter_l2_tunnel_list,
@@ -2941,7 +3080,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev,
 	case RTE_ETH_FILTER_NTUPLE:
 		ntuple_filter_ptr = (struct ixgbe_ntuple_filter_ele *)
 					pmd_flow->rule;
-		(void)rte_memcpy(&ntuple_filter,
+		rte_memcpy(&ntuple_filter,
 			&ntuple_filter_ptr->filter_info,
 			sizeof(struct rte_eth_ntuple_filter));
 		ret = ixgbe_add_del_ntuple_filter(dev, &ntuple_filter, FALSE);
@@ -2954,7 +3093,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev,
 	case RTE_ETH_FILTER_ETHERTYPE:
 		ethertype_filter_ptr = (struct ixgbe_ethertype_filter_ele *)
 					pmd_flow->rule;
-		(void)rte_memcpy(&ethertype_filter,
+		rte_memcpy(&ethertype_filter,
 			&ethertype_filter_ptr->filter_info,
 			sizeof(struct rte_eth_ethertype_filter));
 		ret = ixgbe_add_del_ethertype_filter(dev,
@@ -2968,7 +3107,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev,
 	case RTE_ETH_FILTER_SYN:
 		syn_filter_ptr = (struct ixgbe_eth_syn_filter_ele *)
 				pmd_flow->rule;
-		(void)rte_memcpy(&syn_filter,
+		rte_memcpy(&syn_filter,
 			&syn_filter_ptr->filter_info,
 			sizeof(struct rte_eth_syn_filter));
 		ret = ixgbe_syn_filter_set(dev, &syn_filter, FALSE);
@@ -2980,7 +3119,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev,
 		break;
 	case RTE_ETH_FILTER_FDIR:
 		fdir_rule_ptr = (struct ixgbe_fdir_rule_ele *)pmd_flow->rule;
-		(void)rte_memcpy(&fdir_rule,
+		rte_memcpy(&fdir_rule,
 			&fdir_rule_ptr->filter_info,
 			sizeof(struct ixgbe_fdir_rule));
 		ret = ixgbe_fdir_filter_program(dev, &fdir_rule, TRUE, FALSE);
@@ -2995,7 +3134,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev,
 	case RTE_ETH_FILTER_L2_TUNNEL:
 		l2_tn_filter_ptr = (struct ixgbe_eth_l2_tunnel_conf_ele *)
 				pmd_flow->rule;
-		(void)rte_memcpy(&l2_tn_filter, &l2_tn_filter_ptr->filter_info,
+		rte_memcpy(&l2_tn_filter, &l2_tn_filter_ptr->filter_info,
 			sizeof(struct rte_eth_l2_tunnel_conf));
 		ret = ixgbe_dev_l2_tunnel_filter_del(dev, &l2_tn_filter);
 		if (!ret) {
diff --git a/drivers/net/ixgbe/ixgbe_ipsec.c b/drivers/net/ixgbe/ixgbe_ipsec.c
new file mode 100644
index 00000000..105da11a
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_ipsec.c
@@ -0,0 +1,737 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_ethdev.h>
+#include <rte_ethdev_pci.h>
+#include <rte_ip.h>
+#include <rte_jhash.h>
+#include <rte_security_driver.h>
+#include <rte_cryptodev.h>
+#include <rte_flow.h>
+
+#include "base/ixgbe_type.h"
+#include "base/ixgbe_api.h"
+#include "ixgbe_ethdev.h"
+#include "ixgbe_ipsec.h"
+
+#define RTE_IXGBE_REGISTER_POLL_WAIT_5_MS  5
+
+#define IXGBE_WAIT_RREAD \
+	IXGBE_WRITE_REG_THEN_POLL_MASK(hw, IXGBE_IPSRXIDX, reg_val, \
+	IPSRXIDX_READ, RTE_IXGBE_REGISTER_POLL_WAIT_5_MS)
+#define IXGBE_WAIT_RWRITE \
+	IXGBE_WRITE_REG_THEN_POLL_MASK(hw, IXGBE_IPSRXIDX, reg_val, \
+	IPSRXIDX_WRITE, RTE_IXGBE_REGISTER_POLL_WAIT_5_MS)
+#define IXGBE_WAIT_TREAD \
+	IXGBE_WRITE_REG_THEN_POLL_MASK(hw, IXGBE_IPSTXIDX, reg_val, \
+	IPSRXIDX_READ, RTE_IXGBE_REGISTER_POLL_WAIT_5_MS)
+#define IXGBE_WAIT_TWRITE \
+	IXGBE_WRITE_REG_THEN_POLL_MASK(hw, IXGBE_IPSTXIDX, reg_val, \
+	IPSRXIDX_WRITE, RTE_IXGBE_REGISTER_POLL_WAIT_5_MS)
+
+#define CMP_IP(a, b) (\
+	(a).ipv6[0] == (b).ipv6[0] && \
+	(a).ipv6[1] == (b).ipv6[1] && \
+	(a).ipv6[2] == (b).ipv6[2] && \
+	(a).ipv6[3] == (b).ipv6[3])
+
+
+static void
+ixgbe_crypto_clear_ipsec_tables(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int i = 0;
+
+	/* clear Rx IP table*/
+	for (i = 0; i < IPSEC_MAX_RX_IP_COUNT; i++) {
+		uint16_t index = i << 3;
+		uint32_t reg_val = IPSRXIDX_WRITE | IPSRXIDX_TABLE_IP | index;
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(0), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(1), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(2), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(3), 0);
+		IXGBE_WAIT_RWRITE;
+	}
+
+	/* clear Rx SPI and Rx/Tx SA tables*/
+	for (i = 0; i < IPSEC_MAX_SA_COUNT; i++) {
+		uint32_t index = i << 3;
+		uint32_t reg_val = IPSRXIDX_WRITE | IPSRXIDX_TABLE_SPI | index;
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPIDX, 0);
+		IXGBE_WAIT_RWRITE;
+		reg_val = IPSRXIDX_WRITE | IPSRXIDX_TABLE_KEY | index;
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(0), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(1), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(2), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(3), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXSALT, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXMOD, 0);
+		IXGBE_WAIT_RWRITE;
+		reg_val = IPSRXIDX_WRITE | index;
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(0), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(1), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(2), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(3), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXSALT, 0);
+		IXGBE_WAIT_TWRITE;
+	}
+}
+
+static int
+ixgbe_crypto_add_sa(struct ixgbe_crypto_session *ic_session)
+{
+	struct rte_eth_dev *dev = ic_session->dev;
+	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct ixgbe_ipsec *priv = IXGBE_DEV_PRIVATE_TO_IPSEC(
+			dev->data->dev_private);
+	uint32_t reg_val;
+	int sa_index = -1;
+
+	if (ic_session->op == IXGBE_OP_AUTHENTICATED_DECRYPTION) {
+		int i, ip_index = -1;
+
+		/* Find a match in the IP table*/
+		for (i = 0; i < IPSEC_MAX_RX_IP_COUNT; i++) {
+			if (CMP_IP(priv->rx_ip_tbl[i].ip,
+				   ic_session->dst_ip)) {
+				ip_index = i;
+				break;
+			}
+		}
+		/* If no match, find a free entry in the IP table*/
+		if (ip_index < 0) {
+			for (i = 0; i < IPSEC_MAX_RX_IP_COUNT; i++) {
+				if (priv->rx_ip_tbl[i].ref_count == 0) {
+					ip_index = i;
+					break;
+				}
+			}
+		}
+
+		/* Fail if no match and no free entries*/
+		if (ip_index < 0) {
+			PMD_DRV_LOG(ERR,
+				    "No free entry left in the Rx IP table\n");
+			return -1;
+		}
+
+		/* Find a free entry in the SA table*/
+		for (i = 0; i < IPSEC_MAX_SA_COUNT; i++) {
+			if (priv->rx_sa_tbl[i].used == 0) {
+				sa_index = i;
+				break;
+			}
+		}
+		/* Fail if no free entries*/
+		if (sa_index < 0) {
+			PMD_DRV_LOG(ERR,
+				    "No free entry left in the Rx SA table\n");
+			return -1;
+		}
+
+		priv->rx_ip_tbl[ip_index].ip.ipv6[0] =
+				ic_session->dst_ip.ipv6[0];
+		priv->rx_ip_tbl[ip_index].ip.ipv6[1] =
+				ic_session->dst_ip.ipv6[1];
+		priv->rx_ip_tbl[ip_index].ip.ipv6[2] =
+				ic_session->dst_ip.ipv6[2];
+		priv->rx_ip_tbl[ip_index].ip.ipv6[3] =
+				ic_session->dst_ip.ipv6[3];
+		priv->rx_ip_tbl[ip_index].ref_count++;
+
+		priv->rx_sa_tbl[sa_index].spi =
+			rte_cpu_to_be_32(ic_session->spi);
+		priv->rx_sa_tbl[sa_index].ip_index = ip_index;
+		priv->rx_sa_tbl[sa_index].key[3] =
+			rte_cpu_to_be_32(*(uint32_t *)&ic_session->key[0]);
+		priv->rx_sa_tbl[sa_index].key[2] =
+			rte_cpu_to_be_32(*(uint32_t *)&ic_session->key[4]);
+		priv->rx_sa_tbl[sa_index].key[1] =
+			rte_cpu_to_be_32(*(uint32_t *)&ic_session->key[8]);
+		priv->rx_sa_tbl[sa_index].key[0] =
+			rte_cpu_to_be_32(*(uint32_t *)&ic_session->key[12]);
+		priv->rx_sa_tbl[sa_index].salt =
+			rte_cpu_to_be_32(ic_session->salt);
+		priv->rx_sa_tbl[sa_index].mode = IPSRXMOD_VALID;
+		if (ic_session->op == IXGBE_OP_AUTHENTICATED_DECRYPTION)
+			priv->rx_sa_tbl[sa_index].mode |=
+					(IPSRXMOD_PROTO | IPSRXMOD_DECRYPT);
+		if (ic_session->dst_ip.type == IPv6)
+			priv->rx_sa_tbl[sa_index].mode |= IPSRXMOD_IPV6;
+		priv->rx_sa_tbl[sa_index].used = 1;
+
+		/* write IP table entry*/
+		reg_val = IPSRXIDX_RX_EN | IPSRXIDX_WRITE |
+				IPSRXIDX_TABLE_IP | (ip_index << 3);
+		if (priv->rx_ip_tbl[ip_index].ip.type == IPv4) {
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(0), 0);
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(1), 0);
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(2), 0);
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(3),
+					priv->rx_ip_tbl[ip_index].ip.ipv4);
+		} else {
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(0),
+					priv->rx_ip_tbl[ip_index].ip.ipv6[0]);
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(1),
+					priv->rx_ip_tbl[ip_index].ip.ipv6[1]);
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(2),
+					priv->rx_ip_tbl[ip_index].ip.ipv6[2]);
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(3),
+					priv->rx_ip_tbl[ip_index].ip.ipv6[3]);
+		}
+		IXGBE_WAIT_RWRITE;
+
+		/* write SPI table entry*/
+		reg_val = IPSRXIDX_RX_EN | IPSRXIDX_WRITE |
+				IPSRXIDX_TABLE_SPI | (sa_index << 3);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI,
+				priv->rx_sa_tbl[sa_index].spi);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPIDX,
+				priv->rx_sa_tbl[sa_index].ip_index);
+		IXGBE_WAIT_RWRITE;
+
+		/* write Key table entry*/
+		reg_val = IPSRXIDX_RX_EN | IPSRXIDX_WRITE |
+				IPSRXIDX_TABLE_KEY | (sa_index << 3);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(0),
+				priv->rx_sa_tbl[sa_index].key[0]);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(1),
+				priv->rx_sa_tbl[sa_index].key[1]);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(2),
+				priv->rx_sa_tbl[sa_index].key[2]);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(3),
+				priv->rx_sa_tbl[sa_index].key[3]);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXSALT,
+				priv->rx_sa_tbl[sa_index].salt);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXMOD,
+				priv->rx_sa_tbl[sa_index].mode);
+		IXGBE_WAIT_RWRITE;
+
+	} else { /* sess->dir == RTE_CRYPTO_OUTBOUND */
+		int i;
+
+		/* Find a free entry in the SA table*/
+		for (i = 0; i < IPSEC_MAX_SA_COUNT; i++) {
+			if (priv->tx_sa_tbl[i].used == 0) {
+				sa_index = i;
+				break;
+			}
+		}
+		/* Fail if no free entries*/
+		if (sa_index < 0) {
+			PMD_DRV_LOG(ERR,
+				    "No free entry left in the Tx SA table\n");
+			return -1;
+		}
+
+		priv->tx_sa_tbl[sa_index].spi =
+			rte_cpu_to_be_32(ic_session->spi);
+		priv->tx_sa_tbl[sa_index].key[3] =
+			rte_cpu_to_be_32(*(uint32_t *)&ic_session->key[0]);
+		priv->tx_sa_tbl[sa_index].key[2] =
+			rte_cpu_to_be_32(*(uint32_t *)&ic_session->key[4]);
+		priv->tx_sa_tbl[sa_index].key[1] =
+			rte_cpu_to_be_32(*(uint32_t *)&ic_session->key[8]);
+		priv->tx_sa_tbl[sa_index].key[0] =
+			rte_cpu_to_be_32(*(uint32_t *)&ic_session->key[12]);
+		priv->tx_sa_tbl[sa_index].salt =
+			rte_cpu_to_be_32(ic_session->salt);
+
+		reg_val = IPSRXIDX_RX_EN | IPSRXIDX_WRITE | (sa_index << 3);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(0),
+				priv->tx_sa_tbl[sa_index].key[0]);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(1),
+				priv->tx_sa_tbl[sa_index].key[1]);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(2),
+				priv->tx_sa_tbl[sa_index].key[2]);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(3),
+				priv->tx_sa_tbl[sa_index].key[3]);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXSALT,
+				priv->tx_sa_tbl[sa_index].salt);
+		IXGBE_WAIT_TWRITE;
+
+		priv->tx_sa_tbl[i].used = 1;
+		ic_session->sa_index = sa_index;
+	}
+
+	return 0;
+}
+
+static int
+ixgbe_crypto_remove_sa(struct rte_eth_dev *dev,
+		       struct ixgbe_crypto_session *ic_session)
+{
+	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct ixgbe_ipsec *priv =
+			IXGBE_DEV_PRIVATE_TO_IPSEC(dev->data->dev_private);
+	uint32_t reg_val;
+	int sa_index = -1;
+
+	if (ic_session->op == IXGBE_OP_AUTHENTICATED_DECRYPTION) {
+		int i, ip_index = -1;
+
+		/* Find a match in the IP table*/
+		for (i = 0; i < IPSEC_MAX_RX_IP_COUNT; i++) {
+			if (CMP_IP(priv->rx_ip_tbl[i].ip, ic_session->dst_ip)) {
+				ip_index = i;
+				break;
+			}
+		}
+
+		/* Fail if no match*/
+		if (ip_index < 0) {
+			PMD_DRV_LOG(ERR,
+				    "Entry not found in the Rx IP table\n");
+			return -1;
+		}
+
+		/* Find a free entry in the SA table*/
+		for (i = 0; i < IPSEC_MAX_SA_COUNT; i++) {
+			if (priv->rx_sa_tbl[i].spi ==
+				  rte_cpu_to_be_32(ic_session->spi)) {
+				sa_index = i;
+				break;
+			}
+		}
+		/* Fail if no match*/
+		if (sa_index < 0) {
+			PMD_DRV_LOG(ERR,
+				    "Entry not found in the Rx SA table\n");
+			return -1;
+		}
+
+		/* Disable and clear Rx SPI and key table table entryes*/
+		reg_val = IPSRXIDX_WRITE | IPSRXIDX_TABLE_SPI | (sa_index << 3);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPIDX, 0);
+		IXGBE_WAIT_RWRITE;
+		reg_val = IPSRXIDX_WRITE | IPSRXIDX_TABLE_KEY | (sa_index << 3);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(0), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(1), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(2), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(3), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXSALT, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXMOD, 0);
+		IXGBE_WAIT_RWRITE;
+		priv->rx_sa_tbl[sa_index].used = 0;
+
+		/* If last used then clear the IP table entry*/
+		priv->rx_ip_tbl[ip_index].ref_count--;
+		if (priv->rx_ip_tbl[ip_index].ref_count == 0) {
+			reg_val = IPSRXIDX_WRITE | IPSRXIDX_TABLE_IP |
+					(ip_index << 3);
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(0), 0);
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(1), 0);
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(2), 0);
+			IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(3), 0);
+		}
+	} else { /* session->dir == RTE_CRYPTO_OUTBOUND */
+		int i;
+
+		/* Find a match in the SA table*/
+		for (i = 0; i < IPSEC_MAX_SA_COUNT; i++) {
+			if (priv->tx_sa_tbl[i].spi ==
+				    rte_cpu_to_be_32(ic_session->spi)) {
+				sa_index = i;
+				break;
+			}
+		}
+		/* Fail if no match entries*/
+		if (sa_index < 0) {
+			PMD_DRV_LOG(ERR,
+				    "Entry not found in the Tx SA table\n");
+			return -1;
+		}
+		reg_val = IPSRXIDX_WRITE | (sa_index << 3);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(0), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(1), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(2), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(3), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXSALT, 0);
+		IXGBE_WAIT_TWRITE;
+
+		priv->tx_sa_tbl[sa_index].used = 0;
+	}
+
+	return 0;
+}
+
+static int
+ixgbe_crypto_create_session(void *device,
+		struct rte_security_session_conf *conf,
+		struct rte_security_session *session,
+		struct rte_mempool *mempool)
+{
+	struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)device;
+	struct ixgbe_crypto_session *ic_session = NULL;
+	struct rte_crypto_aead_xform *aead_xform;
+	struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
+
+	if (rte_mempool_get(mempool, (void **)&ic_session)) {
+		PMD_DRV_LOG(ERR, "Cannot get object from ic_session mempool");
+		return -ENOMEM;
+	}
+
+	if (conf->crypto_xform->type != RTE_CRYPTO_SYM_XFORM_AEAD ||
+			conf->crypto_xform->aead.algo !=
+					RTE_CRYPTO_AEAD_AES_GCM) {
+		PMD_DRV_LOG(ERR, "Unsupported crypto transformation mode\n");
+		return -ENOTSUP;
+	}
+	aead_xform = &conf->crypto_xform->aead;
+
+	if (conf->ipsec.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS) {
+		if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_SECURITY) {
+			ic_session->op = IXGBE_OP_AUTHENTICATED_DECRYPTION;
+		} else {
+			PMD_DRV_LOG(ERR, "IPsec decryption not enabled\n");
+			return -ENOTSUP;
+		}
+	} else {
+		if (dev_conf->txmode.offloads & DEV_TX_OFFLOAD_SECURITY) {
+			ic_session->op = IXGBE_OP_AUTHENTICATED_ENCRYPTION;
+		} else {
+			PMD_DRV_LOG(ERR, "IPsec encryption not enabled\n");
+			return -ENOTSUP;
+		}
+	}
+
+	ic_session->key = aead_xform->key.data;
+	memcpy(&ic_session->salt,
+	       &aead_xform->key.data[aead_xform->key.length], 4);
+	ic_session->spi = conf->ipsec.spi;
+	ic_session->dev = eth_dev;
+
+	set_sec_session_private_data(session, ic_session);
+
+	if (ic_session->op == IXGBE_OP_AUTHENTICATED_ENCRYPTION) {
+		if (ixgbe_crypto_add_sa(ic_session)) {
+			PMD_DRV_LOG(ERR, "Failed to add SA\n");
+			return -EPERM;
+		}
+	}
+
+	return 0;
+}
+
+static int
+ixgbe_crypto_remove_session(void *device,
+		struct rte_security_session *session)
+{
+	struct rte_eth_dev *eth_dev = device;
+	struct ixgbe_crypto_session *ic_session =
+		(struct ixgbe_crypto_session *)
+		get_sec_session_private_data(session);
+	struct rte_mempool *mempool = rte_mempool_from_obj(ic_session);
+
+	if (eth_dev != ic_session->dev) {
+		PMD_DRV_LOG(ERR, "Session not bound to this device\n");
+		return -ENODEV;
+	}
+
+	if (ixgbe_crypto_remove_sa(eth_dev, ic_session)) {
+		PMD_DRV_LOG(ERR, "Failed to remove session\n");
+		return -EFAULT;
+	}
+
+	rte_mempool_put(mempool, (void *)ic_session);
+
+	return 0;
+}
+
+static inline uint8_t
+ixgbe_crypto_compute_pad_len(struct rte_mbuf *m)
+{
+	if (m->nb_segs == 1) {
+		/* 16 bytes ICV + 2 bytes ESP trailer + payload padding size
+		 * payload padding size is stored at <pkt_len - 18>
+		 */
+		uint8_t *esp_pad_len = rte_pktmbuf_mtod_offset(m, uint8_t *,
+					rte_pktmbuf_pkt_len(m) -
+					(ESP_TRAILER_SIZE + ESP_ICV_SIZE));
+		return *esp_pad_len + ESP_TRAILER_SIZE + ESP_ICV_SIZE;
+	}
+	return 0;
+}
+
+static int
+ixgbe_crypto_update_mb(void *device __rte_unused,
+		struct rte_security_session *session,
+		       struct rte_mbuf *m, void *params __rte_unused)
+{
+	struct ixgbe_crypto_session *ic_session =
+			get_sec_session_private_data(session);
+	if (ic_session->op == IXGBE_OP_AUTHENTICATED_ENCRYPTION) {
+		union ixgbe_crypto_tx_desc_md *mdata =
+			(union ixgbe_crypto_tx_desc_md *)&m->udata64;
+		mdata->enc = 1;
+		mdata->sa_idx = ic_session->sa_index;
+		mdata->pad_len = ixgbe_crypto_compute_pad_len(m);
+	}
+	return 0;
+}
+
+
+static const struct rte_security_capability *
+ixgbe_crypto_capabilities_get(void *device __rte_unused)
+{
+	static const struct rte_cryptodev_capabilities
+	aes_gcm_gmac_crypto_capabilities[] = {
+		{	/* AES GMAC (128-bit) */
+			.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+			{.sym = {
+				.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+				{.auth = {
+					.algo = RTE_CRYPTO_AUTH_AES_GMAC,
+					.block_size = 16,
+					.key_size = {
+						.min = 16,
+						.max = 16,
+						.increment = 0
+					},
+					.digest_size = {
+						.min = 16,
+						.max = 16,
+						.increment = 0
+					},
+					.iv_size = {
+						.min = 12,
+						.max = 12,
+						.increment = 0
+					}
+				}, }
+			}, }
+		},
+		{	/* AES GCM (128-bit) */
+			.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+			{.sym = {
+				.xform_type = RTE_CRYPTO_SYM_XFORM_AEAD,
+				{.aead = {
+					.algo = RTE_CRYPTO_AEAD_AES_GCM,
+					.block_size = 16,
+					.key_size = {
+						.min = 16,
+						.max = 16,
+						.increment = 0
+					},
+					.digest_size = {
+						.min = 16,
+						.max = 16,
+						.increment = 0
+					},
+					.aad_size = {
+						.min = 0,
+						.max = 65535,
+						.increment = 1
+					},
+					.iv_size = {
+						.min = 12,
+						.max = 12,
+						.increment = 0
+					}
+				}, }
+			}, }
+		},
+		{
+			.op = RTE_CRYPTO_OP_TYPE_UNDEFINED,
+			{.sym = {
+				.xform_type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED
+			}, }
+		},
+	};
+
+	static const struct rte_security_capability
+	ixgbe_security_capabilities[] = {
+		{ /* IPsec Inline Crypto ESP Transport Egress */
+			.action = RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO,
+			.protocol = RTE_SECURITY_PROTOCOL_IPSEC,
+			{.ipsec = {
+				.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
+				.mode = RTE_SECURITY_IPSEC_SA_MODE_TRANSPORT,
+				.direction = RTE_SECURITY_IPSEC_SA_DIR_EGRESS,
+				.options = { 0 }
+			} },
+			.crypto_capabilities = aes_gcm_gmac_crypto_capabilities,
+			.ol_flags = RTE_SECURITY_TX_OLOAD_NEED_MDATA
+		},
+		{ /* IPsec Inline Crypto ESP Transport Ingress */
+			.action = RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO,
+			.protocol = RTE_SECURITY_PROTOCOL_IPSEC,
+			{.ipsec = {
+				.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
+				.mode = RTE_SECURITY_IPSEC_SA_MODE_TRANSPORT,
+				.direction = RTE_SECURITY_IPSEC_SA_DIR_INGRESS,
+				.options = { 0 }
+			} },
+			.crypto_capabilities = aes_gcm_gmac_crypto_capabilities,
+			.ol_flags = 0
+		},
+		{ /* IPsec Inline Crypto ESP Tunnel Egress */
+			.action = RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO,
+			.protocol = RTE_SECURITY_PROTOCOL_IPSEC,
+			{.ipsec = {
+				.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
+				.mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
+				.direction = RTE_SECURITY_IPSEC_SA_DIR_EGRESS,
+				.options = { 0 }
+			} },
+			.crypto_capabilities = aes_gcm_gmac_crypto_capabilities,
+			.ol_flags = RTE_SECURITY_TX_OLOAD_NEED_MDATA
+		},
+		{ /* IPsec Inline Crypto ESP Tunnel Ingress */
+			.action = RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO,
+			.protocol = RTE_SECURITY_PROTOCOL_IPSEC,
+			{.ipsec = {
+				.proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
+				.mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
+				.direction = RTE_SECURITY_IPSEC_SA_DIR_INGRESS,
+				.options = { 0 }
+			} },
+			.crypto_capabilities = aes_gcm_gmac_crypto_capabilities,
+			.ol_flags = 0
+		},
+		{
+			.action = RTE_SECURITY_ACTION_TYPE_NONE
+		}
+	};
+
+	return ixgbe_security_capabilities;
+}
+
+
+int
+ixgbe_crypto_enable_ipsec(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	uint32_t reg;
+
+	/* sanity checks */
+	if (dev->data->dev_conf.rxmode.enable_lro) {
+		PMD_DRV_LOG(ERR, "RSC and IPsec not supported");
+		return -1;
+	}
+	if (!dev->data->dev_conf.rxmode.hw_strip_crc) {
+		PMD_DRV_LOG(ERR, "HW CRC strip needs to be enabled for IPsec");
+		return -1;
+	}
+
+
+	/* Set IXGBE_SECTXBUFFAF to 0x15 as required in the datasheet*/
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, 0x15);
+
+	/* IFG needs to be set to 3 when we are using security. Otherwise a Tx
+	 * hang will occur with heavy traffic.
+	 */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+	reg = (reg & 0xFFFFFFF0) | 0x3;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+	reg  = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+	reg |= IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_RXCRCSTRP;
+	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+	if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SECURITY) {
+		IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, 0);
+		reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+		if (reg != 0) {
+			PMD_DRV_LOG(ERR, "Error enabling Rx Crypto");
+			return -1;
+		}
+	}
+	if (dev->data->dev_conf.txmode.offloads & DEV_TX_OFFLOAD_SECURITY) {
+		IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL,
+				IXGBE_SECTXCTRL_STORE_FORWARD);
+		reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+		if (reg != IXGBE_SECTXCTRL_STORE_FORWARD) {
+			PMD_DRV_LOG(ERR, "Error enabling Rx Crypto");
+			return -1;
+		}
+	}
+
+	ixgbe_crypto_clear_ipsec_tables(dev);
+
+	return 0;
+}
+
+int
+ixgbe_crypto_add_ingress_sa_from_flow(const void *sess,
+				      const void *ip_spec,
+				      uint8_t is_ipv6)
+{
+	struct ixgbe_crypto_session *ic_session
+		= get_sec_session_private_data(sess);
+
+	if (ic_session->op == IXGBE_OP_AUTHENTICATED_DECRYPTION) {
+		if (is_ipv6) {
+			const struct rte_flow_item_ipv6 *ipv6 = ip_spec;
+			ic_session->src_ip.type = IPv6;
+			ic_session->dst_ip.type = IPv6;
+			rte_memcpy(ic_session->src_ip.ipv6,
+				   ipv6->hdr.src_addr, 16);
+			rte_memcpy(ic_session->dst_ip.ipv6,
+				   ipv6->hdr.dst_addr, 16);
+		} else {
+			const struct rte_flow_item_ipv4 *ipv4 = ip_spec;
+			ic_session->src_ip.type = IPv4;
+			ic_session->dst_ip.type = IPv4;
+			ic_session->src_ip.ipv4 = ipv4->hdr.src_addr;
+			ic_session->dst_ip.ipv4 = ipv4->hdr.dst_addr;
+		}
+		return ixgbe_crypto_add_sa(ic_session);
+	}
+
+	return 0;
+}
+
+static struct rte_security_ops ixgbe_security_ops = {
+	.session_create = ixgbe_crypto_create_session,
+	.session_update = NULL,
+	.session_stats_get = NULL,
+	.session_destroy = ixgbe_crypto_remove_session,
+	.set_pkt_metadata = ixgbe_crypto_update_mb,
+	.capabilities_get = ixgbe_crypto_capabilities_get
+};
+
+struct rte_security_ctx *
+ixgbe_ipsec_ctx_create(struct rte_eth_dev *dev)
+{
+	struct rte_security_ctx *ctx = rte_malloc("rte_security_instances_ops",
+					sizeof(struct rte_security_ctx), 0);
+	if (ctx) {
+		ctx->device = (void *)dev;
+		ctx->ops = &ixgbe_security_ops;
+		ctx->sess_cnt = 0;
+	}
+	return ctx;
+}
diff --git a/drivers/net/ixgbe/ixgbe_ipsec.h b/drivers/net/ixgbe/ixgbe_ipsec.h
new file mode 100644
index 00000000..fb8fefc8
--- /dev/null
+++ b/drivers/net/ixgbe/ixgbe_ipsec.h
@@ -0,0 +1,151 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef IXGBE_IPSEC_H_
+#define IXGBE_IPSEC_H_
+
+#include <rte_security.h>
+
+#define IPSRXIDX_RX_EN                                    0x00000001
+#define IPSRXIDX_TABLE_IP                                 0x00000002
+#define IPSRXIDX_TABLE_SPI                                0x00000004
+#define IPSRXIDX_TABLE_KEY                                0x00000006
+#define IPSRXIDX_WRITE                                    0x80000000
+#define IPSRXIDX_READ                                     0x40000000
+#define IPSRXMOD_VALID                                    0x00000001
+#define IPSRXMOD_PROTO                                    0x00000004
+#define IPSRXMOD_DECRYPT                                  0x00000008
+#define IPSRXMOD_IPV6                                     0x00000010
+#define IXGBE_ADVTXD_POPTS_IPSEC                          0x00000400
+#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP                 0x00002000
+#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN               0x00004000
+#define IXGBE_RXDADV_IPSEC_STATUS_SECP                    0x00020000
+#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK                 0x18000000
+#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL         0x08000000
+#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH           0x10000000
+#define IXGBE_RXDADV_IPSEC_ERROR_AUTHENTICATION_FAILED    0x18000000
+
+#define IPSEC_MAX_RX_IP_COUNT           128
+#define IPSEC_MAX_SA_COUNT              1024
+
+#define ESP_ICV_SIZE 16
+#define ESP_TRAILER_SIZE 2
+
+enum ixgbe_operation {
+	IXGBE_OP_AUTHENTICATED_ENCRYPTION,
+	IXGBE_OP_AUTHENTICATED_DECRYPTION
+};
+
+enum ixgbe_gcm_key {
+	IXGBE_GCM_KEY_128,
+	IXGBE_GCM_KEY_256
+};
+
+/**
+ * Generic IP address structure
+ * TODO: Find better location for this rte_net.h possibly.
+ **/
+struct ipaddr {
+	enum ipaddr_type {
+		IPv4,
+		IPv6
+	} type;
+	/**< IP Address Type - IPv4/IPv6 */
+
+	union {
+		uint32_t ipv4;
+		uint32_t ipv6[4];
+	};
+};
+
+/** inline crypto crypto private session structure */
+struct ixgbe_crypto_session {
+	enum ixgbe_operation op;
+	uint8_t *key;
+	uint32_t salt;
+	uint32_t sa_index;
+	uint32_t spi;
+	struct ipaddr src_ip;
+	struct ipaddr dst_ip;
+	struct rte_eth_dev *dev;
+} __rte_cache_aligned;
+
+struct ixgbe_crypto_rx_ip_table {
+	struct ipaddr ip;
+	uint16_t ref_count;
+};
+struct ixgbe_crypto_rx_sa_table {
+	uint32_t spi;
+	uint32_t ip_index;
+	uint32_t key[4];
+	uint32_t salt;
+	uint8_t  mode;
+	uint8_t  used;
+};
+
+struct ixgbe_crypto_tx_sa_table {
+	uint32_t spi;
+	uint32_t key[4];
+	uint32_t salt;
+	uint8_t  used;
+};
+
+union ixgbe_crypto_tx_desc_md {
+	uint64_t data;
+	struct {
+		/**< SA table index */
+		uint32_t sa_idx;
+		/**< ICV and ESP trailer length */
+		uint8_t pad_len;
+		/**< enable encryption */
+		uint8_t enc;
+	};
+};
+
+struct ixgbe_ipsec {
+	struct ixgbe_crypto_rx_ip_table rx_ip_tbl[IPSEC_MAX_RX_IP_COUNT];
+	struct ixgbe_crypto_rx_sa_table rx_sa_tbl[IPSEC_MAX_SA_COUNT];
+	struct ixgbe_crypto_tx_sa_table tx_sa_tbl[IPSEC_MAX_SA_COUNT];
+};
+
+
+struct rte_security_ctx *
+ixgbe_ipsec_ctx_create(struct rte_eth_dev *dev);
+int ixgbe_crypto_enable_ipsec(struct rte_eth_dev *dev);
+int ixgbe_crypto_add_ingress_sa_from_flow(const void *sess,
+					  const void *ip_spec,
+					  uint8_t is_ipv6);
+
+
+
+#endif /*IXGBE_IPSEC_H_*/
diff --git a/drivers/net/ixgbe/ixgbe_pf.c b/drivers/net/ixgbe/ixgbe_pf.c
index c0d86c76..676e92c7 100644
--- a/drivers/net/ixgbe/ixgbe_pf.c
+++ b/drivers/net/ixgbe/ixgbe_pf.c
@@ -627,6 +627,18 @@ ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
 	struct ixgbe_vf_info *vfinfo =
 		*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
 	uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool;
+	struct rte_eth_conf *eth_conf;
+	struct rte_eth_vmdq_dcb_tx_conf *vmdq_dcb_tx_conf;
+	u8 num_tcs;
+	struct ixgbe_hw *hw;
+	u32 vmvir;
+#define IXGBE_VMVIR_VLANA_MASK		0xC0000000
+#define IXGBE_VMVIR_VLAN_VID_MASK	0x00000FFF
+#define IXGBE_VMVIR_VLAN_UP_MASK	0x0000E000
+#define VLAN_PRIO_SHIFT			13
+	u32 vlana;
+	u32 vid;
+	u32 user_priority;
 
 	/* Verify if the PF supports the mbox APIs version or not */
 	switch (vfinfo[vf].api_version) {
@@ -645,10 +657,51 @@ ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
 	/* Notify VF of default queue */
 	msgbuf[IXGBE_VF_DEF_QUEUE] = default_q;
 
-	/*
-	 * FIX ME if it needs fill msgbuf[IXGBE_VF_TRANS_VLAN]
-	 * for VLAN strip or VMDQ_DCB or VMDQ_DCB_RSS
-	 */
+	/* Notify VF of number of DCB traffic classes */
+	eth_conf = &dev->data->dev_conf;
+	switch (eth_conf->txmode.mq_mode) {
+	case ETH_MQ_TX_NONE:
+	case ETH_MQ_TX_DCB:
+		RTE_LOG(ERR, PMD, "PF must work with virtualization for VF %u"
+			", but its tx mode = %d\n", vf,
+			eth_conf->txmode.mq_mode);
+		return -1;
+
+	case ETH_MQ_TX_VMDQ_DCB:
+		vmdq_dcb_tx_conf = &eth_conf->tx_adv_conf.vmdq_dcb_tx_conf;
+		switch (vmdq_dcb_tx_conf->nb_queue_pools) {
+		case ETH_16_POOLS:
+			num_tcs = ETH_8_TCS;
+			break;
+		case ETH_32_POOLS:
+			num_tcs = ETH_4_TCS;
+			break;
+		default:
+			return -1;
+		}
+		break;
+
+	/* ETH_MQ_TX_VMDQ_ONLY,  DCB not enabled */
+	case ETH_MQ_TX_VMDQ_ONLY:
+		hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+		vmvir = IXGBE_READ_REG(hw, IXGBE_VMVIR(vf));
+		vlana = vmvir & IXGBE_VMVIR_VLANA_MASK;
+		vid = vmvir & IXGBE_VMVIR_VLAN_VID_MASK;
+		user_priority =
+			(vmvir & IXGBE_VMVIR_VLAN_UP_MASK) >> VLAN_PRIO_SHIFT;
+		if ((vlana == IXGBE_VMVIR_VLANA_DEFAULT) &&
+			((vid !=  0) || (user_priority != 0)))
+			num_tcs = 1;
+		else
+			num_tcs = 0;
+		break;
+
+	default:
+		RTE_LOG(ERR, PMD, "PF work with invalid mode = %d\n",
+			eth_conf->txmode.mq_mode);
+		return -1;
+	}
+	msgbuf[IXGBE_VF_TRANS_VLAN] = num_tcs;
 
 	return 0;
 }
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 64bff258..012d9ee8 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -93,6 +93,7 @@
 		PKT_TX_TCP_SEG |		 \
 		PKT_TX_MACSEC |			 \
 		PKT_TX_OUTER_IP_CKSUM |		 \
+		PKT_TX_SEC_OFFLOAD |	 \
 		IXGBE_TX_IEEE1588_TMST)
 
 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
@@ -184,7 +185,7 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
 	int i;
 
 	for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
-		buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
+		buf_dma_addr = rte_mbuf_data_iova(*pkts);
 		pkt_len = (*pkts)->data_len;
 
 		/* write data to descriptor */
@@ -207,7 +208,7 @@ tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
 	uint64_t buf_dma_addr;
 	uint32_t pkt_len;
 
-	buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
+	buf_dma_addr = rte_mbuf_data_iova(*pkts);
 	pkt_len = (*pkts)->data_len;
 
 	/* write data to descriptor */
@@ -395,7 +396,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 static inline void
 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
 		volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
-		uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
+		uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
+		__rte_unused uint64_t *mdata)
 {
 	uint32_t type_tucmd_mlhl;
 	uint32_t mss_l4len_idx = 0;
@@ -479,6 +481,21 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
 		seqnum_seed |= tx_offload.l2_len
 			       << IXGBE_ADVTXD_TUNNEL_LEN;
 	}
+#ifdef RTE_LIBRTE_SECURITY
+	if (ol_flags & PKT_TX_SEC_OFFLOAD) {
+		union ixgbe_crypto_tx_desc_md *md =
+				(union ixgbe_crypto_tx_desc_md *)mdata;
+		seqnum_seed |=
+			(IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
+		type_tucmd_mlhl |= md->enc ?
+				(IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
+				IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
+		type_tucmd_mlhl |=
+			(md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
+		tx_offload_mask.sa_idx |= ~0;
+		tx_offload_mask.sec_pad_len |= ~0;
+	}
+#endif
 
 	txq->ctx_cache[ctx_idx].flags = ol_flags;
 	txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
@@ -657,6 +674,9 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint32_t ctx = 0;
 	uint32_t new_ctx;
 	union ixgbe_tx_offload tx_offload;
+#ifdef RTE_LIBRTE_SECURITY
+	uint8_t use_ipsec;
+#endif
 
 	tx_offload.data[0] = 0;
 	tx_offload.data[1] = 0;
@@ -684,6 +704,9 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		 * are needed for offload functionality.
 		 */
 		ol_flags = tx_pkt->ol_flags;
+#ifdef RTE_LIBRTE_SECURITY
+		use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
+#endif
 
 		/* If hardware offload required */
 		tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
@@ -695,6 +718,15 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			tx_offload.tso_segsz = tx_pkt->tso_segsz;
 			tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
 			tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
+#ifdef RTE_LIBRTE_SECURITY
+			if (use_ipsec) {
+				union ixgbe_crypto_tx_desc_md *ipsec_mdata =
+					(union ixgbe_crypto_tx_desc_md *)
+							&tx_pkt->udata64;
+				tx_offload.sa_idx = ipsec_mdata->sa_idx;
+				tx_offload.sec_pad_len = ipsec_mdata->pad_len;
+			}
+#endif
 
 			/* If new context need be built or reuse the exist ctx. */
 			ctx = what_advctx_update(txq, tx_ol_req,
@@ -855,7 +887,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 				}
 
 				ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
-					tx_offload);
+					tx_offload, &tx_pkt->udata64);
 
 				txe->last_id = tx_last;
 				tx_id = txe->next_id;
@@ -873,6 +905,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		}
 
 		olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+#ifdef RTE_LIBRTE_SECURITY
+		if (use_ipsec)
+			olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
+#endif
 
 		m_seg = tx_pkt;
 		do {
@@ -888,7 +924,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			 * Set up Transmit Data Descriptor.
 			 */
 			slen = m_seg->data_len;
-			buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
+			buf_dma_addr = rte_mbuf_data_iova(m_seg);
 			txd->read.buffer_addr =
 				rte_cpu_to_le_64(buf_dma_addr);
 			txd->read.cmd_type_len =
@@ -1447,6 +1483,14 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status)
 		pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
 	}
 
+#ifdef RTE_LIBRTE_SECURITY
+	if (rx_status & IXGBE_RXD_STAT_SECP) {
+		pkt_flags |= PKT_RX_SEC_OFFLOAD;
+		if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
+			pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
+	}
+#endif
+
 	return pkt_flags;
 }
 
@@ -1589,7 +1633,7 @@ ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
 		mb->data_off = RTE_PKTMBUF_HEADROOM;
 
 		/* populate the descriptors */
-		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
+		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
 		rxdp[i].read.hdr_addr = 0;
 		rxdp[i].read.pkt_addr = dma_addr;
 	}
@@ -1821,7 +1865,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxm = rxe->mbuf;
 		rxe->mbuf = nmb;
 		dma_addr =
-			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 		rxdp->read.hdr_addr = 0;
 		rxdp->read.pkt_addr = dma_addr;
 
@@ -1849,7 +1893,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		rxm->port = rxq->port_id;
 
 		pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
-		/* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
+		/* Only valid if PKT_RX_VLAN set in pkt_flags */
 		rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
 
 		pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
@@ -1940,7 +1984,7 @@ ixgbe_fill_cluster_head_buf(
 
 	head->port = rxq->port_id;
 
-	/* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
+	/* The vlan_tci field is only valid when PKT_RX_VLAN is
 	 * set in the pkt_flags field.
 	 */
 	head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
@@ -2115,7 +2159,7 @@ next_desc:
 
 		if (!bulk_alloc) {
 			__le64 dma =
-			  rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
+			  rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 			/*
 			 * Update RX descriptor with the physical address of the
 			 * new data buffer of the new allocated mbuf.
@@ -2364,8 +2408,11 @@ void __attribute__((cold))
 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
 {
 	/* Use a simple Tx queue (no offloads, no multi segs) if possible */
-	if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
-			&& (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
+	if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) &&
+#ifdef RTE_LIBRTE_SECURITY
+			!(txq->using_ipsec) &&
+#endif
+			(txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
 		PMD_INIT_LOG(DEBUG, "Using simple tx code path");
 		dev->tx_pkt_prepare = NULL;
 #ifdef RTE_IXGBE_INC_VECTOR
@@ -2535,6 +2582,10 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
 	txq->txq_flags = tx_conf->txq_flags;
 	txq->ops = &def_txq_ops;
 	txq->tx_deferred_start = tx_conf->tx_deferred_start;
+#ifdef RTE_LIBRTE_SECURITY
+	txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
+			DEV_TX_OFFLOAD_SECURITY);
+#endif
 
 	/*
 	 * Modification to set VFTDT for virtual function if vf is detected
@@ -2548,7 +2599,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
 	else
 		txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
 
-	txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+	txq->tx_ring_phys_addr = tz->iova;
 	txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
 
 	/* Allocate software ring */
@@ -2850,7 +2901,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
 	}
 
-	rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+	rxq->rx_ring_phys_addr = rz->iova;
 	rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
 
 	/*
@@ -3517,12 +3568,19 @@ ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
 		dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
 		dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
 	}
+
+	/* Initialize User Priority to Traffic Class mapping */
+	for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
+		tc = &dcb_config->tc_config[j];
+		tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
+	}
+
 	/* User Priority to Traffic Class mapping */
 	for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
 		j = vmdq_rx_conf->dcb_tc[i];
 		tc = &dcb_config->tc_config[j];
-		tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
-						(uint8_t)(1 << j);
+		tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
+						(uint8_t)(1 << i);
 	}
 }
 
@@ -3544,12 +3602,18 @@ ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
 		dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
 	}
 
+	/* Initialize User Priority to Traffic Class mapping */
+	for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
+		tc = &dcb_config->tc_config[j];
+		tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
+	}
+
 	/* User Priority to Traffic Class mapping */
 	for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
 		j = vmdq_tx_conf->dcb_tc[i];
 		tc = &dcb_config->tc_config[j];
-		tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
-						(uint8_t)(1 << j);
+		tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
+						(uint8_t)(1 << i);
 	}
 }
 
@@ -3565,12 +3629,18 @@ ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
 	dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
 	dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
 
+	/* Initialize User Priority to Traffic Class mapping */
+	for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
+		tc = &dcb_config->tc_config[j];
+		tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
+	}
+
 	/* User Priority to Traffic Class mapping */
 	for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
 		j = rx_conf->dcb_tc[i];
 		tc = &dcb_config->tc_config[j];
-		tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
-						(uint8_t)(1 << j);
+		tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
+						(uint8_t)(1 << i);
 	}
 }
 
@@ -3586,12 +3656,18 @@ ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
 	dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
 	dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
 
+	/* Initialize User Priority to Traffic Class mapping */
+	for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
+		tc = &dcb_config->tc_config[j];
+		tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
+	}
+
 	/* User Priority to Traffic Class mapping */
 	for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
 		j = tx_conf->dcb_tc[i];
 		tc = &dcb_config->tc_config[j];
-		tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
-						(uint8_t)(1 << j);
+		tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
+						(uint8_t)(1 << i);
 	}
 }
 
@@ -4112,7 +4188,7 @@ ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
 		mbuf->port = rxq->port_id;
 
 		dma_addr =
-			rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
 		rxd = &rxq->rx_ring[i];
 		rxd->read.hdr_addr = 0;
 		rxd->read.pkt_addr = dma_addr;
@@ -4494,6 +4570,10 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
 		struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
 
 		rxq->rx_using_sse = rx_using_sse;
+#ifdef RTE_LIBRTE_SECURITY
+		rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
+				DEV_RX_OFFLOAD_SECURITY);
+#endif
 	}
 }
 
@@ -4981,6 +5061,21 @@ ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
 			dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
 		ixgbe_setup_loopback_link_82599(hw);
 
+#ifdef RTE_LIBRTE_SECURITY
+	if ((dev->data->dev_conf.rxmode.offloads &
+			DEV_RX_OFFLOAD_SECURITY) ||
+		(dev->data->dev_conf.txmode.offloads &
+			DEV_TX_OFFLOAD_SECURITY)) {
+		ret = ixgbe_crypto_enable_ipsec(dev);
+		if (ret != 0) {
+			PMD_DRV_LOG(ERR,
+				    "ixgbe_crypto_enable_ipsec fails with %d.",
+				    ret);
+			return ret;
+		}
+	}
+#endif
+
 	return 0;
 }
 
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 85feb0bd..cc7c8288 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -138,8 +138,12 @@ struct ixgbe_rx_queue {
 	uint16_t rx_nb_avail; /**< nr of staged pkts ready to ret to app */
 	uint16_t rx_next_avail; /**< idx of next staged pkt to ret to app */
 	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
-	uint16_t            rx_using_sse;
+	uint8_t            rx_using_sse;
 	/**< indicates that vector RX is in use */
+#ifdef RTE_LIBRTE_SECURITY
+	uint8_t            using_ipsec;
+	/**< indicates that IPsec RX feature is in use */
+#endif
 #ifdef RTE_IXGBE_INC_VECTOR
 	uint16_t            rxrearm_nb;     /**< number of remaining to be re-armed */
 	uint16_t            rxrearm_start;  /**< the idx we start the re-arming from */
@@ -148,7 +152,7 @@ struct ixgbe_rx_queue {
 	uint16_t            queue_id; /**< RX queue index. */
 	uint16_t            reg_idx;  /**< RX queue register index. */
 	uint16_t            pkt_type_mask;  /**< Packet type mask for different NICs. */
-	uint8_t             port_id;  /**< Device port identifier. */
+	uint16_t            port_id;  /**< Device port identifier. */
 	uint8_t             crc_len;  /**< 0 if CRC stripped, 4 otherwise. */
 	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
 	uint8_t             rx_deferred_start; /**< not in global dev start. */
@@ -183,6 +187,11 @@ union ixgbe_tx_offload {
 		/* fields for TX offloading of tunnels */
 		uint64_t outer_l3_len:8; /**< Outer L3 (IP) Hdr Length. */
 		uint64_t outer_l2_len:8; /**< Outer L2 (MAC) Hdr Length. */
+#ifdef RTE_LIBRTE_SECURITY
+		/* inline ipsec related*/
+		uint64_t sa_idx:8;	/**< TX SA database entry index */
+		uint64_t sec_pad_len:4;	/**< padding length */
+#endif
 	};
 };
 
@@ -237,7 +246,7 @@ struct ixgbe_tx_queue {
 	uint16_t tx_next_rs; /**< next desc to set RS bit */
 	uint16_t            queue_id;      /**< TX queue index. */
 	uint16_t            reg_idx;       /**< TX queue register index. */
-	uint8_t             port_id;       /**< Device port identifier. */
+	uint16_t            port_id;       /**< Device port identifier. */
 	uint8_t             pthresh;       /**< Prefetch threshold register. */
 	uint8_t             hthresh;       /**< Host threshold register. */
 	uint8_t             wthresh;       /**< Write-back threshold reg. */
@@ -247,6 +256,10 @@ struct ixgbe_tx_queue {
 	struct ixgbe_advctx_info ctx_cache[IXGBE_CTX_NUM];
 	const struct ixgbe_txq_ops *ops;       /**< txq ops */
 	uint8_t             tx_deferred_start; /**< not in global dev start. */
+#ifdef RTE_LIBRTE_SECURITY
+	uint8_t		    using_ipsec;
+	/**< indicates that IPsec TX feature is in use */
+#endif
 };
 
 struct ixgbe_txq_ops {
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
index 44de1caa..2e87ffa0 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
@@ -87,13 +87,13 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 		 * Data to be rearmed is 6 bytes long.
 		 */
 		vst1_u8((uint8_t *)&mb0->rearm_data, p);
-		paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+		paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
 		dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
 		/* flush desc with pa dma_addr */
 		vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
 
 		vst1_u8((uint8_t *)&mb1->rearm_data, p);
-		paddr = mb1->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+		paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
 		dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
 		vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
 	}
@@ -126,8 +126,8 @@ desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2,
 	} vol;
 
 	const uint8x16_t pkttype_msk = {
-			PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,
-			PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,
+			PKT_RX_VLAN, PKT_RX_VLAN,
+			PKT_RX_VLAN, PKT_RX_VLAN,
 			0x00, 0x00, 0x00, 0x00,
 			0x00, 0x00, 0x00, 0x00,
 			0x00, 0x00, 0x00, 0x00};
@@ -414,7 +414,7 @@ vtx1(volatile union ixgbe_adv_tx_desc *txdp,
 		struct rte_mbuf *pkt, uint64_t flags)
 {
 	uint64x2_t descriptor = {
-			pkt->buf_physaddr + pkt->data_off,
+			pkt->buf_iova + pkt->data_off,
 			(uint64_t)pkt->pkt_len << 46 | flags | pkt->data_len};
 
 	vst1q_u64((uint64_t *)&txdp->read, descriptor);
diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
index e704a7f3..486239ba 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c
@@ -86,8 +86,8 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 		mb0 = rxep[0].mbuf;
 		mb1 = rxep[1].mbuf;
 
-		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
-		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_physaddr) !=
+		/* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
+		RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
 				offsetof(struct rte_mbuf, buf_addr) + 8);
 		vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
 		vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr));
@@ -122,6 +122,43 @@ ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
 	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
 }
 
+#ifdef RTE_LIBRTE_SECURITY
+static inline void
+desc_to_olflags_v_ipsec(__m128i descs[4], struct rte_mbuf **rx_pkts)
+{
+	__m128i sterr, rearm, tmp_e, tmp_p;
+	uint32_t *rearm0 = (uint32_t *)rx_pkts[0]->rearm_data + 2;
+	uint32_t *rearm1 = (uint32_t *)rx_pkts[1]->rearm_data + 2;
+	uint32_t *rearm2 = (uint32_t *)rx_pkts[2]->rearm_data + 2;
+	uint32_t *rearm3 = (uint32_t *)rx_pkts[3]->rearm_data + 2;
+	const __m128i ipsec_sterr_msk =
+			_mm_set1_epi32(IXGBE_RXDADV_IPSEC_STATUS_SECP |
+				       IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED);
+	const __m128i ipsec_proc_msk  =
+			_mm_set1_epi32(IXGBE_RXDADV_IPSEC_STATUS_SECP);
+	const __m128i ipsec_err_flag  =
+			_mm_set1_epi32(PKT_RX_SEC_OFFLOAD_FAILED |
+				       PKT_RX_SEC_OFFLOAD);
+	const __m128i ipsec_proc_flag = _mm_set1_epi32(PKT_RX_SEC_OFFLOAD);
+
+	rearm = _mm_set_epi32(*rearm3, *rearm2, *rearm1, *rearm0);
+	sterr = _mm_set_epi32(_mm_extract_epi32(descs[3], 2),
+			      _mm_extract_epi32(descs[2], 2),
+			      _mm_extract_epi32(descs[1], 2),
+			      _mm_extract_epi32(descs[0], 2));
+	sterr = _mm_and_si128(sterr, ipsec_sterr_msk);
+	tmp_e = _mm_cmpeq_epi32(sterr, ipsec_sterr_msk);
+	tmp_p = _mm_cmpeq_epi32(sterr, ipsec_proc_msk);
+	sterr = _mm_or_si128(_mm_and_si128(tmp_e, ipsec_err_flag),
+				_mm_and_si128(tmp_p, ipsec_proc_flag));
+	rearm = _mm_or_si128(rearm, sterr);
+	*rearm0 = _mm_extract_epi32(rearm, 0);
+	*rearm1 = _mm_extract_epi32(rearm, 1);
+	*rearm2 = _mm_extract_epi32(rearm, 2);
+	*rearm3 = _mm_extract_epi32(rearm, 3);
+}
+#endif
+
 static inline void
 desc_to_olflags_v(__m128i descs[4], __m128i mbuf_init, uint8_t vlan_flags,
 	struct rte_mbuf **rx_pkts)
@@ -310,6 +347,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	volatile union ixgbe_adv_rx_desc *rxdp;
 	struct ixgbe_rx_entry *sw_ring;
 	uint16_t nb_pkts_recd;
+#ifdef RTE_LIBRTE_SECURITY
+	uint8_t use_ipsec = rxq->using_ipsec;
+#endif
 	int pos;
 	uint64_t var;
 	__m128i shuf_msk;
@@ -397,7 +437,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	sw_ring = &rxq->sw_ring[rxq->rx_tail];
 
 	/* ensure these 2 flags are in the lower 8 bits */
-	RTE_BUILD_BUG_ON((PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED) > UINT8_MAX);
+	RTE_BUILD_BUG_ON((PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED) > UINT8_MAX);
 	vlan_flags = rxq->vlan_flags & UINT8_MAX;
 
 	/* A. load 4 packet in one loop
@@ -473,6 +513,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		/* set ol_flags with vlan packet type */
 		desc_to_olflags_v(descs, mbuf_init, vlan_flags, &rx_pkts[pos]);
 
+#ifdef RTE_LIBRTE_SECURITY
+		if (unlikely(use_ipsec))
+			desc_to_olflags_v_ipsec(descs, &rx_pkts[pos]);
+#endif
+
 		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
 		pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
 		pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
@@ -604,7 +649,7 @@ vtx1(volatile union ixgbe_adv_tx_desc *txdp,
 {
 	__m128i descriptor = _mm_set_epi64x((uint64_t)pkt->pkt_len << 46 |
 			flags | pkt->data_len,
-			pkt->buf_physaddr + pkt->data_off);
+			pkt->buf_iova + pkt->data_off);
 	_mm_store_si128((__m128i *)&txdp->read, descriptor);
 }
 
diff --git a/drivers/net/ixgbe/ixgbe_tm.c b/drivers/net/ixgbe/ixgbe_tm.c
index cdcf45cb..ca4182c9 100644
--- a/drivers/net/ixgbe/ixgbe_tm.c
+++ b/drivers/net/ixgbe/ixgbe_tm.c
@@ -312,7 +312,7 @@ ixgbe_shaper_profile_add(struct rte_eth_dev *dev,
 	if (!shaper_profile)
 		return -ENOMEM;
 	shaper_profile->shaper_profile_id = shaper_profile_id;
-	(void)rte_memcpy(&shaper_profile->profile, profile,
+	rte_memcpy(&shaper_profile->profile, profile,
 			 sizeof(struct rte_tm_shaper_params));
 	TAILQ_INSERT_TAIL(&tm_conf->shaper_profile_list,
 			  shaper_profile, node);
@@ -482,7 +482,7 @@ ixgbe_queue_base_nb_get(struct rte_eth_dev *dev, uint16_t tc_node_no,
 }
 
 static int
-ixgbe_node_param_check(uint32_t node_id, uint32_t parent_node_id,
+ixgbe_node_param_check(struct rte_eth_dev *dev, uint32_t node_id,
 		       uint32_t priority, uint32_t weight,
 		       struct rte_tm_node_params *params,
 		       struct rte_tm_error *error)
@@ -517,8 +517,8 @@ ixgbe_node_param_check(uint32_t node_id, uint32_t parent_node_id,
 		return -EINVAL;
 	}
 
-	/* for root node */
-	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+	/* for non-leaf node */
+	if (node_id >= dev->data->nb_tx_queues) {
 		/* check the unsupported parameters */
 		if (params->nonleaf.wfq_weight_mode) {
 			error->type =
@@ -542,7 +542,7 @@ ixgbe_node_param_check(uint32_t node_id, uint32_t parent_node_id,
 		return 0;
 	}
 
-	/* for TC or queue node */
+	/* for leaf node */
 	/* check the unsupported parameters */
 	if (params->leaf.cman) {
 		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
@@ -588,7 +588,7 @@ ixgbe_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 		IXGBE_DEV_PRIVATE_TO_TM_CONF(dev->data->dev_private);
 	enum ixgbe_tm_node_type node_type = IXGBE_TM_NODE_TYPE_MAX;
 	enum ixgbe_tm_node_type parent_node_type = IXGBE_TM_NODE_TYPE_MAX;
-	struct ixgbe_tm_shaper_profile *shaper_profile;
+	struct ixgbe_tm_shaper_profile *shaper_profile = NULL;
 	struct ixgbe_tm_node *tm_node;
 	struct ixgbe_tm_node *parent_node;
 	uint8_t nb_tcs;
@@ -606,7 +606,7 @@ ixgbe_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 		return -EINVAL;
 	}
 
-	ret = ixgbe_node_param_check(node_id, parent_node_id, priority, weight,
+	ret = ixgbe_node_param_check(dev, node_id, priority, weight,
 				     params, error);
 	if (ret)
 		return ret;
@@ -619,12 +619,15 @@ ixgbe_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 	}
 
 	/* check the shaper profile id */
-	shaper_profile = ixgbe_shaper_profile_search(dev,
-						     params->shaper_profile_id);
-	if (!shaper_profile) {
-		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
-		error->message = "shaper profile not exist";
-		return -EINVAL;
+	if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE) {
+		shaper_profile = ixgbe_shaper_profile_search(
+					dev, params->shaper_profile_id);
+		if (!shaper_profile) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+			error->message = "shaper profile not exist";
+			return -EINVAL;
+		}
 	}
 
 	/* root node if not have a parent */
@@ -657,12 +660,13 @@ ixgbe_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 		tm_node->no = 0;
 		tm_node->parent = NULL;
 		tm_node->shaper_profile = shaper_profile;
-		(void)rte_memcpy(&tm_node->params, params,
+		rte_memcpy(&tm_node->params, params,
 				 sizeof(struct rte_tm_node_params));
 		tm_conf->root = tm_node;
 
 		/* increase the reference counter of the shaper profile */
-		shaper_profile->reference_count++;
+		if (shaper_profile)
+			shaper_profile->reference_count++;
 
 		return 0;
 	}
@@ -737,7 +741,7 @@ ixgbe_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 	tm_node->reference_count = 0;
 	tm_node->parent = parent_node;
 	tm_node->shaper_profile = shaper_profile;
-	(void)rte_memcpy(&tm_node->params, params,
+	rte_memcpy(&tm_node->params, params,
 			 sizeof(struct rte_tm_node_params));
 	if (parent_node_type == IXGBE_TM_NODE_TYPE_PORT) {
 		tm_node->no = parent_node->reference_count;
@@ -753,7 +757,8 @@ ixgbe_node_add(struct rte_eth_dev *dev, uint32_t node_id,
 	tm_node->parent->reference_count++;
 
 	/* increase the reference counter of the shaper profile */
-	shaper_profile->reference_count++;
+	if (shaper_profile)
+		shaper_profile->reference_count++;
 
 	return 0;
 }
@@ -801,14 +806,16 @@ ixgbe_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
 
 	/* root node */
 	if (node_type == IXGBE_TM_NODE_TYPE_PORT) {
-		tm_node->shaper_profile->reference_count--;
+		if (tm_node->shaper_profile)
+			tm_node->shaper_profile->reference_count--;
 		rte_free(tm_node);
 		tm_conf->root = NULL;
 		return 0;
 	}
 
 	/* TC or queue node */
-	tm_node->shaper_profile->reference_count--;
+	if (tm_node->shaper_profile)
+		tm_node->shaper_profile->reference_count--;
 	tm_node->parent->reference_count--;
 	if (node_type == IXGBE_TM_NODE_TYPE_TC) {
 		TAILQ_REMOVE(&tm_conf->tc_list, tm_node, node);
@@ -876,15 +883,34 @@ ixgbe_level_capabilities_get(struct rte_eth_dev *dev,
 		cap->n_nodes_max = 1;
 		cap->n_nodes_nonleaf_max = 1;
 		cap->n_nodes_leaf_max = 0;
-		cap->non_leaf_nodes_identical = true;
-		cap->leaf_nodes_identical = true;
+	} else if (level_id == IXGBE_TM_NODE_TYPE_TC) {
+		/* TC */
+		cap->n_nodes_max = IXGBE_DCB_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_nonleaf_max = IXGBE_DCB_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_leaf_max = 0;
+	} else {
+		/* queue */
+		cap->n_nodes_max = hw->mac.max_tx_queues;
+		cap->n_nodes_nonleaf_max = 0;
+		cap->n_nodes_leaf_max = hw->mac.max_tx_queues;
+	}
+
+	cap->non_leaf_nodes_identical = true;
+	cap->leaf_nodes_identical = true;
+
+	if (level_id != IXGBE_TM_NODE_TYPE_QUEUE) {
 		cap->nonleaf.shaper_private_supported = true;
 		cap->nonleaf.shaper_private_dual_rate_supported = false;
 		cap->nonleaf.shaper_private_rate_min = 0;
 		/* 10Gbps -> 1.25GBps */
 		cap->nonleaf.shaper_private_rate_max = 1250000000ull;
 		cap->nonleaf.shaper_shared_n_max = 0;
-		cap->nonleaf.sched_n_children_max = IXGBE_DCB_MAX_TRAFFIC_CLASS;
+		if (level_id == IXGBE_TM_NODE_TYPE_PORT)
+			cap->nonleaf.sched_n_children_max =
+				IXGBE_DCB_MAX_TRAFFIC_CLASS;
+		else
+			cap->nonleaf.sched_n_children_max =
+				hw->mac.max_tx_queues;
 		cap->nonleaf.sched_sp_n_priorities_max = 1;
 		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
 		cap->nonleaf.sched_wfq_n_groups_max = 0;
@@ -894,21 +920,7 @@ ixgbe_level_capabilities_get(struct rte_eth_dev *dev,
 		return 0;
 	}
 
-	/* TC or queue node */
-	if (level_id == IXGBE_TM_NODE_TYPE_TC) {
-		/* TC */
-		cap->n_nodes_max = IXGBE_DCB_MAX_TRAFFIC_CLASS;
-		cap->n_nodes_nonleaf_max = IXGBE_DCB_MAX_TRAFFIC_CLASS;
-		cap->n_nodes_leaf_max = 0;
-		cap->non_leaf_nodes_identical = true;
-	} else {
-		/* queue */
-		cap->n_nodes_max = hw->mac.max_tx_queues;
-		cap->n_nodes_nonleaf_max = 0;
-		cap->n_nodes_leaf_max = hw->mac.max_tx_queues;
-		cap->non_leaf_nodes_identical = true;
-	}
-	cap->leaf_nodes_identical = true;
+	/* queue node */
 	cap->leaf.shaper_private_supported = true;
 	cap->leaf.shaper_private_dual_rate_supported = false;
 	cap->leaf.shaper_private_rate_min = 0;
@@ -998,7 +1010,8 @@ ixgbe_hierarchy_commit(struct rte_eth_dev *dev,
 		goto done;
 
 	/* not support port max bandwidth yet */
-	if (tm_conf->root->shaper_profile->profile.peak.rate) {
+	if (tm_conf->root->shaper_profile &&
+	    tm_conf->root->shaper_profile->profile.peak.rate) {
 		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE;
 		error->message = "no port max bandwidth";
 		goto fail_clear;
@@ -1006,7 +1019,8 @@ ixgbe_hierarchy_commit(struct rte_eth_dev *dev,
 
 	/* HW not support TC max bandwidth */
 	TAILQ_FOREACH(tm_node, &tm_conf->tc_list, node) {
-		if (tm_node->shaper_profile->profile.peak.rate) {
+		if (tm_node->shaper_profile &&
+		    tm_node->shaper_profile->profile.peak.rate) {
 			error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE;
 			error->message = "no TC max bandwidth";
 			goto fail_clear;
@@ -1015,7 +1029,10 @@ ixgbe_hierarchy_commit(struct rte_eth_dev *dev,
 
 	/* queue max bandwidth */
 	TAILQ_FOREACH(tm_node, &tm_conf->queue_list, node) {
-		bw = tm_node->shaper_profile->profile.peak.rate;
+		if (tm_node->shaper_profile)
+			bw = tm_node->shaper_profile->profile.peak.rate;
+		else
+			bw = 0;
 		if (bw) {
 			/* interpret Bps to Mbps */
 			bw = bw * 8 / 1000 / 1000;
diff --git a/drivers/net/ixgbe/rte_pmd_ixgbe.c b/drivers/net/ixgbe/rte_pmd_ixgbe.c
index 79897ff6..f1273785 100644
--- a/drivers/net/ixgbe/rte_pmd_ixgbe.c
+++ b/drivers/net/ixgbe/rte_pmd_ixgbe.c
@@ -38,7 +38,7 @@
 #include "rte_pmd_ixgbe.h"
 
 int
-rte_pmd_ixgbe_set_vf_mac_addr(uint8_t port, uint16_t vf,
+rte_pmd_ixgbe_set_vf_mac_addr(uint16_t port, uint16_t vf,
 			      struct ether_addr *mac_addr)
 {
 	struct ixgbe_hw *hw;
@@ -73,7 +73,7 @@ rte_pmd_ixgbe_set_vf_mac_addr(uint8_t port, uint16_t vf,
 }
 
 int
-rte_pmd_ixgbe_ping_vf(uint8_t port, uint16_t vf)
+rte_pmd_ixgbe_ping_vf(uint16_t port, uint16_t vf)
 {
 	struct ixgbe_hw *hw;
 	struct ixgbe_vf_info *vfinfo;
@@ -105,7 +105,7 @@ rte_pmd_ixgbe_ping_vf(uint8_t port, uint16_t vf)
 }
 
 int
-rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on)
+rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint16_t port, uint16_t vf, uint8_t on)
 {
 	struct ixgbe_hw *hw;
 	struct ixgbe_mac_info *mac;
@@ -135,7 +135,7 @@ rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on)
 }
 
 int
-rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on)
+rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint16_t port, uint16_t vf, uint8_t on)
 {
 	struct ixgbe_hw *hw;
 	struct ixgbe_mac_info *mac;
@@ -164,7 +164,7 @@ rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on)
 }
 
 int
-rte_pmd_ixgbe_set_vf_vlan_insert(uint8_t port, uint16_t vf, uint16_t vlan_id)
+rte_pmd_ixgbe_set_vf_vlan_insert(uint16_t port, uint16_t vf, uint16_t vlan_id)
 {
 	struct ixgbe_hw *hw;
 	uint32_t ctrl;
@@ -200,7 +200,7 @@ rte_pmd_ixgbe_set_vf_vlan_insert(uint8_t port, uint16_t vf, uint16_t vlan_id)
 }
 
 int
-rte_pmd_ixgbe_set_tx_loopback(uint8_t port, uint8_t on)
+rte_pmd_ixgbe_set_tx_loopback(uint16_t port, uint8_t on)
 {
 	struct ixgbe_hw *hw;
 	uint32_t ctrl;
@@ -230,7 +230,7 @@ rte_pmd_ixgbe_set_tx_loopback(uint8_t port, uint8_t on)
 }
 
 int
-rte_pmd_ixgbe_set_all_queues_drop_en(uint8_t port, uint8_t on)
+rte_pmd_ixgbe_set_all_queues_drop_en(uint16_t port, uint8_t on)
 {
 	struct ixgbe_hw *hw;
 	uint32_t reg_value;
@@ -260,7 +260,7 @@ rte_pmd_ixgbe_set_all_queues_drop_en(uint8_t port, uint8_t on)
 }
 
 int
-rte_pmd_ixgbe_set_vf_split_drop_en(uint8_t port, uint16_t vf, uint8_t on)
+rte_pmd_ixgbe_set_vf_split_drop_en(uint16_t port, uint16_t vf, uint8_t on)
 {
 	struct ixgbe_hw *hw;
 	uint32_t reg_value;
@@ -295,7 +295,7 @@ rte_pmd_ixgbe_set_vf_split_drop_en(uint8_t port, uint16_t vf, uint8_t on)
 }
 
 int
-rte_pmd_ixgbe_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on)
+rte_pmd_ixgbe_set_vf_vlan_stripq(uint16_t port, uint16_t vf, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct rte_pci_device *pci_dev;
@@ -342,7 +342,7 @@ rte_pmd_ixgbe_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on)
 }
 
 int
-rte_pmd_ixgbe_set_vf_rxmode(uint8_t port, uint16_t vf,
+rte_pmd_ixgbe_set_vf_rxmode(uint16_t port, uint16_t vf,
 			    uint16_t rx_mask, uint8_t on)
 {
 	int val = 0;
@@ -389,7 +389,7 @@ rte_pmd_ixgbe_set_vf_rxmode(uint8_t port, uint16_t vf,
 }
 
 int
-rte_pmd_ixgbe_set_vf_rx(uint8_t port, uint16_t vf, uint8_t on)
+rte_pmd_ixgbe_set_vf_rx(uint16_t port, uint16_t vf, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct rte_pci_device *pci_dev;
@@ -439,7 +439,7 @@ rte_pmd_ixgbe_set_vf_rx(uint8_t port, uint16_t vf, uint8_t on)
 }
 
 int
-rte_pmd_ixgbe_set_vf_tx(uint8_t port, uint16_t vf, uint8_t on)
+rte_pmd_ixgbe_set_vf_tx(uint16_t port, uint16_t vf, uint8_t on)
 {
 	struct rte_eth_dev *dev;
 	struct rte_pci_device *pci_dev;
@@ -489,7 +489,7 @@ rte_pmd_ixgbe_set_vf_tx(uint8_t port, uint16_t vf, uint8_t on)
 }
 
 int
-rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, uint16_t vlan,
+rte_pmd_ixgbe_set_vf_vlan_filter(uint16_t port, uint16_t vlan,
 				 uint64_t vf_mask, uint8_t vlan_on)
 {
 	struct rte_eth_dev *dev;
@@ -524,7 +524,7 @@ rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, uint16_t vlan,
 }
 
 int
-rte_pmd_ixgbe_set_vf_rate_limit(uint8_t port, uint16_t vf,
+rte_pmd_ixgbe_set_vf_rate_limit(uint16_t port, uint16_t vf,
 				uint16_t tx_rate, uint64_t q_msk)
 {
 	struct rte_eth_dev *dev;
@@ -540,7 +540,7 @@ rte_pmd_ixgbe_set_vf_rate_limit(uint8_t port, uint16_t vf,
 }
 
 int
-rte_pmd_ixgbe_macsec_enable(uint8_t port, uint8_t en, uint8_t rp)
+rte_pmd_ixgbe_macsec_enable(uint16_t port, uint8_t en, uint8_t rp)
 {
 	struct ixgbe_hw *hw;
 	struct rte_eth_dev *dev;
@@ -623,7 +623,7 @@ rte_pmd_ixgbe_macsec_enable(uint8_t port, uint8_t en, uint8_t rp)
 }
 
 int
-rte_pmd_ixgbe_macsec_disable(uint8_t port)
+rte_pmd_ixgbe_macsec_disable(uint16_t port)
 {
 	struct ixgbe_hw *hw;
 	struct rte_eth_dev *dev;
@@ -687,7 +687,7 @@ rte_pmd_ixgbe_macsec_disable(uint8_t port)
 }
 
 int
-rte_pmd_ixgbe_macsec_config_txsc(uint8_t port, uint8_t *mac)
+rte_pmd_ixgbe_macsec_config_txsc(uint16_t port, uint8_t *mac)
 {
 	struct ixgbe_hw *hw;
 	struct rte_eth_dev *dev;
@@ -712,7 +712,7 @@ rte_pmd_ixgbe_macsec_config_txsc(uint8_t port, uint8_t *mac)
 }
 
 int
-rte_pmd_ixgbe_macsec_config_rxsc(uint8_t port, uint8_t *mac, uint16_t pi)
+rte_pmd_ixgbe_macsec_config_rxsc(uint16_t port, uint8_t *mac, uint16_t pi)
 {
 	struct ixgbe_hw *hw;
 	struct rte_eth_dev *dev;
@@ -738,7 +738,7 @@ rte_pmd_ixgbe_macsec_config_rxsc(uint8_t port, uint8_t *mac, uint16_t pi)
 }
 
 int
-rte_pmd_ixgbe_macsec_select_txsa(uint8_t port, uint8_t idx, uint8_t an,
+rte_pmd_ixgbe_macsec_select_txsa(uint16_t port, uint8_t idx, uint8_t an,
 				 uint32_t pn, uint8_t *key)
 {
 	struct ixgbe_hw *hw;
@@ -794,7 +794,7 @@ rte_pmd_ixgbe_macsec_select_txsa(uint8_t port, uint8_t idx, uint8_t an,
 }
 
 int
-rte_pmd_ixgbe_macsec_select_rxsa(uint8_t port, uint8_t idx, uint8_t an,
+rte_pmd_ixgbe_macsec_select_rxsa(uint16_t port, uint8_t idx, uint8_t an,
 				 uint32_t pn, uint8_t *key)
 {
 	struct ixgbe_hw *hw;
@@ -837,7 +837,7 @@ rte_pmd_ixgbe_macsec_select_rxsa(uint8_t port, uint8_t idx, uint8_t an,
 }
 
 int
-rte_pmd_ixgbe_set_tc_bw_alloc(uint8_t port,
+rte_pmd_ixgbe_set_tc_bw_alloc(uint16_t port,
 			      uint8_t tc_num,
 			      uint8_t *bw_weight)
 {
@@ -911,7 +911,7 @@ rte_pmd_ixgbe_set_tc_bw_alloc(uint8_t port,
 
 #ifdef RTE_LIBRTE_IXGBE_BYPASS
 int
-rte_pmd_ixgbe_bypass_init(uint8_t port_id)
+rte_pmd_ixgbe_bypass_init(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
@@ -926,7 +926,7 @@ rte_pmd_ixgbe_bypass_init(uint8_t port_id)
 }
 
 int
-rte_pmd_ixgbe_bypass_state_show(uint8_t port_id, uint32_t *state)
+rte_pmd_ixgbe_bypass_state_show(uint16_t port_id, uint32_t *state)
 {
 	struct rte_eth_dev *dev;
 
@@ -940,7 +940,7 @@ rte_pmd_ixgbe_bypass_state_show(uint8_t port_id, uint32_t *state)
 }
 
 int
-rte_pmd_ixgbe_bypass_state_set(uint8_t port_id, uint32_t *new_state)
+rte_pmd_ixgbe_bypass_state_set(uint16_t port_id, uint32_t *new_state)
 {
 	struct rte_eth_dev *dev;
 
@@ -954,7 +954,7 @@ rte_pmd_ixgbe_bypass_state_set(uint8_t port_id, uint32_t *new_state)
 }
 
 int
-rte_pmd_ixgbe_bypass_event_show(uint8_t port_id,
+rte_pmd_ixgbe_bypass_event_show(uint16_t port_id,
 				uint32_t event,
 				uint32_t *state)
 {
@@ -970,7 +970,7 @@ rte_pmd_ixgbe_bypass_event_show(uint8_t port_id,
 }
 
 int
-rte_pmd_ixgbe_bypass_event_store(uint8_t port_id,
+rte_pmd_ixgbe_bypass_event_store(uint16_t port_id,
 				 uint32_t event,
 				 uint32_t state)
 {
@@ -986,7 +986,7 @@ rte_pmd_ixgbe_bypass_event_store(uint8_t port_id,
 }
 
 int
-rte_pmd_ixgbe_bypass_wd_timeout_store(uint8_t port_id, uint32_t timeout)
+rte_pmd_ixgbe_bypass_wd_timeout_store(uint16_t port_id, uint32_t timeout)
 {
 	struct rte_eth_dev *dev;
 
@@ -1000,7 +1000,7 @@ rte_pmd_ixgbe_bypass_wd_timeout_store(uint8_t port_id, uint32_t timeout)
 }
 
 int
-rte_pmd_ixgbe_bypass_ver_show(uint8_t port_id, uint32_t *ver)
+rte_pmd_ixgbe_bypass_ver_show(uint16_t port_id, uint32_t *ver)
 {
 	struct rte_eth_dev *dev;
 
@@ -1014,7 +1014,7 @@ rte_pmd_ixgbe_bypass_ver_show(uint8_t port_id, uint32_t *ver)
 }
 
 int
-rte_pmd_ixgbe_bypass_wd_timeout_show(uint8_t port_id, uint32_t *wd_timeout)
+rte_pmd_ixgbe_bypass_wd_timeout_show(uint16_t port_id, uint32_t *wd_timeout)
 {
 	struct rte_eth_dev *dev;
 
@@ -1028,7 +1028,7 @@ rte_pmd_ixgbe_bypass_wd_timeout_show(uint8_t port_id, uint32_t *wd_timeout)
 }
 
 int
-rte_pmd_ixgbe_bypass_wd_reset(uint8_t port_id)
+rte_pmd_ixgbe_bypass_wd_reset(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
 
diff --git a/drivers/net/ixgbe/rte_pmd_ixgbe.h b/drivers/net/ixgbe/rte_pmd_ixgbe.h
index d33c285d..81b18f87 100644
--- a/drivers/net/ixgbe/rte_pmd_ixgbe.h
+++ b/drivers/net/ixgbe/rte_pmd_ixgbe.h
@@ -53,7 +53,7 @@
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if *vf* invalid.
  */
-int rte_pmd_ixgbe_ping_vf(uint8_t port, uint16_t vf);
+int rte_pmd_ixgbe_ping_vf(uint16_t port, uint16_t vf);
 
 /**
  * Set the VF MAC address.
@@ -69,7 +69,7 @@ int rte_pmd_ixgbe_ping_vf(uint8_t port, uint16_t vf);
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if *vf* or *mac_addr* is invalid.
  */
-int rte_pmd_ixgbe_set_vf_mac_addr(uint8_t port, uint16_t vf,
+int rte_pmd_ixgbe_set_vf_mac_addr(uint16_t port, uint16_t vf,
 		struct ether_addr *mac_addr);
 
 /**
@@ -87,7 +87,8 @@ int rte_pmd_ixgbe_set_vf_mac_addr(uint8_t port, uint16_t vf,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on);
+int rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint16_t port, uint16_t vf,
+					 uint8_t on);
 
 /**
  * Enable/Disable VF MAC anti spoofing.
@@ -104,7 +105,7 @@ int rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on);
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on);
+int rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint16_t port, uint16_t vf, uint8_t on);
 
 /**
  * Enable/Disable vf vlan insert
@@ -122,7 +123,7 @@ int rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on);
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_set_vf_vlan_insert(uint8_t port, uint16_t vf,
+int rte_pmd_ixgbe_set_vf_vlan_insert(uint16_t port, uint16_t vf,
 		uint16_t vlan_id);
 
 /**
@@ -139,7 +140,7 @@ int rte_pmd_ixgbe_set_vf_vlan_insert(uint8_t port, uint16_t vf,
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_set_tx_loopback(uint8_t port, uint8_t on);
+int rte_pmd_ixgbe_set_tx_loopback(uint16_t port, uint8_t on);
 
 /**
  * set all queues drop enable bit
@@ -155,7 +156,7 @@ int rte_pmd_ixgbe_set_tx_loopback(uint8_t port, uint8_t on);
  *   - (-ENODEV) if *port* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_set_all_queues_drop_en(uint8_t port, uint8_t on);
+int rte_pmd_ixgbe_set_all_queues_drop_en(uint16_t port, uint8_t on);
 
 /**
  * set drop enable bit in the VF split rx control register
@@ -174,7 +175,7 @@ int rte_pmd_ixgbe_set_all_queues_drop_en(uint8_t port, uint8_t on);
  *   - (-EINVAL) if bad parameter.
  */
 
-int rte_pmd_ixgbe_set_vf_split_drop_en(uint8_t port, uint16_t vf, uint8_t on);
+int rte_pmd_ixgbe_set_vf_split_drop_en(uint16_t port, uint16_t vf, uint8_t on);
 
 /**
  * Enable/Disable vf vlan strip for all queues in a pool
@@ -194,7 +195,7 @@ int rte_pmd_ixgbe_set_vf_split_drop_en(uint8_t port, uint16_t vf, uint8_t on);
  *   - (-EINVAL) if bad parameter.
  */
 int
-rte_pmd_ixgbe_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on);
+rte_pmd_ixgbe_set_vf_vlan_stripq(uint16_t port, uint16_t vf, uint8_t on);
 
 /**
  * Enable MACsec offload.
@@ -212,7 +213,7 @@ rte_pmd_ixgbe_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on);
  *   - (-ENODEV) if *port* invalid.
  *   - (-ENOTSUP) if hardware doesn't support this feature.
  */
-int rte_pmd_ixgbe_macsec_enable(uint8_t port, uint8_t en, uint8_t rp);
+int rte_pmd_ixgbe_macsec_enable(uint16_t port, uint8_t en, uint8_t rp);
 
 /**
  * Disable MACsec offload.
@@ -224,7 +225,7 @@ int rte_pmd_ixgbe_macsec_enable(uint8_t port, uint8_t en, uint8_t rp);
  *   - (-ENODEV) if *port* invalid.
  *   - (-ENOTSUP) if hardware doesn't support this feature.
  */
-int rte_pmd_ixgbe_macsec_disable(uint8_t port);
+int rte_pmd_ixgbe_macsec_disable(uint16_t port);
 
 /**
  * Configure Tx SC (Secure Connection).
@@ -238,7 +239,7 @@ int rte_pmd_ixgbe_macsec_disable(uint8_t port);
  *   - (-ENODEV) if *port* invalid.
  *   - (-ENOTSUP) if hardware doesn't support this feature.
  */
-int rte_pmd_ixgbe_macsec_config_txsc(uint8_t port, uint8_t *mac);
+int rte_pmd_ixgbe_macsec_config_txsc(uint16_t port, uint8_t *mac);
 
 /**
  * Configure Rx SC (Secure Connection).
@@ -254,7 +255,7 @@ int rte_pmd_ixgbe_macsec_config_txsc(uint8_t port, uint8_t *mac);
  *   - (-ENODEV) if *port* invalid.
  *   - (-ENOTSUP) if hardware doesn't support this feature.
  */
-int rte_pmd_ixgbe_macsec_config_rxsc(uint8_t port, uint8_t *mac, uint16_t pi);
+int rte_pmd_ixgbe_macsec_config_rxsc(uint16_t port, uint8_t *mac, uint16_t pi);
 
 /**
  * Enable Tx SA (Secure Association).
@@ -275,7 +276,7 @@ int rte_pmd_ixgbe_macsec_config_rxsc(uint8_t port, uint8_t *mac, uint16_t pi);
  *   - (-ENOTSUP) if hardware doesn't support this feature.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_macsec_select_txsa(uint8_t port, uint8_t idx, uint8_t an,
+int rte_pmd_ixgbe_macsec_select_txsa(uint16_t port, uint8_t idx, uint8_t an,
 		uint32_t pn, uint8_t *key);
 
 /**
@@ -297,7 +298,7 @@ int rte_pmd_ixgbe_macsec_select_txsa(uint8_t port, uint8_t idx, uint8_t an,
  *   - (-ENOTSUP) if hardware doesn't support this feature.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_macsec_select_rxsa(uint8_t port, uint8_t idx, uint8_t an,
+int rte_pmd_ixgbe_macsec_select_rxsa(uint16_t port, uint8_t idx, uint8_t an,
 		uint32_t pn, uint8_t *key);
 
 /**
@@ -323,7 +324,8 @@ int rte_pmd_ixgbe_macsec_select_rxsa(uint8_t port, uint8_t idx, uint8_t an,
 *   - (-EINVAL) if bad parameter.
 */
 int
-rte_pmd_ixgbe_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mask, uint8_t on);
+rte_pmd_ixgbe_set_vf_rxmode(uint16_t port, uint16_t vf, uint16_t rx_mask,
+			     uint8_t on);
 
 /**
 * Enable or disable a VF traffic receive of an Ethernet device.
@@ -342,7 +344,7 @@ rte_pmd_ixgbe_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mask, uint8_t
 *   - (-EINVAL) if bad parameter.
 */
 int
-rte_pmd_ixgbe_set_vf_rx(uint8_t port, uint16_t vf, uint8_t on);
+rte_pmd_ixgbe_set_vf_rx(uint16_t port, uint16_t vf, uint8_t on);
 
 /**
 * Enable or disable a VF traffic transmit of the Ethernet device.
@@ -361,7 +363,7 @@ rte_pmd_ixgbe_set_vf_rx(uint8_t port, uint16_t vf, uint8_t on);
 *   - (-EINVAL) if bad parameter.
 */
 int
-rte_pmd_ixgbe_set_vf_tx(uint8_t port, uint16_t vf, uint8_t on);
+rte_pmd_ixgbe_set_vf_tx(uint16_t port, uint16_t vf, uint8_t on);
 
 /**
 * Enable/Disable hardware VF VLAN filtering by an Ethernet device of
@@ -383,7 +385,8 @@ rte_pmd_ixgbe_set_vf_tx(uint8_t port, uint16_t vf, uint8_t on);
 *   - (-EINVAL) if bad parameter.
 */
 int
-rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, uint16_t vlan, uint64_t vf_mask, uint8_t vlan_on);
+rte_pmd_ixgbe_set_vf_vlan_filter(uint16_t port, uint16_t vlan,
+				 uint64_t vf_mask, uint8_t vlan_on);
 
 /**
  * Set the rate limitation for a vf on an Ethernet device.
@@ -402,7 +405,8 @@ rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, uint16_t vlan, uint64_t vf_mask,
  *   - (-ENODEV) if *port_id* invalid.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_set_vf_rate_limit(uint8_t port, uint16_t vf, uint16_t tx_rate, uint64_t q_msk);
+int rte_pmd_ixgbe_set_vf_rate_limit(uint16_t port, uint16_t vf,
+				     uint16_t tx_rate, uint64_t q_msk);
 
 /**
  * Set all the TCs' bandwidth weight.
@@ -423,7 +427,7 @@ int rte_pmd_ixgbe_set_vf_rate_limit(uint8_t port, uint16_t vf, uint16_t tx_rate,
  *   - (-EINVAL) if bad parameter.
  *   - (-ENOTSUP) not supported by firmware.
  */
-int rte_pmd_ixgbe_set_tc_bw_alloc(uint8_t port,
+int rte_pmd_ixgbe_set_tc_bw_alloc(uint16_t port,
 				  uint8_t tc_num,
 				  uint8_t *bw_weight);
 
@@ -439,7 +443,7 @@ int rte_pmd_ixgbe_set_tc_bw_alloc(uint8_t port,
  *   - (-ENOTSUP) if hardware doesn't support.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_bypass_init(uint8_t port);
+int rte_pmd_ixgbe_bypass_init(uint16_t port);
 
 /**
  * Return bypass state.
@@ -456,7 +460,7 @@ int rte_pmd_ixgbe_bypass_init(uint8_t port);
  *   - (-ENOTSUP) if hardware doesn't support.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_bypass_state_show(uint8_t port, uint32_t *state);
+int rte_pmd_ixgbe_bypass_state_show(uint16_t port, uint32_t *state);
 
 /**
  * Set bypass state
@@ -473,7 +477,7 @@ int rte_pmd_ixgbe_bypass_state_show(uint8_t port, uint32_t *state);
  *   - (-ENOTSUP) if hardware doesn't support.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_bypass_state_set(uint8_t port, uint32_t *new_state);
+int rte_pmd_ixgbe_bypass_state_set(uint16_t port, uint32_t *new_state);
 
 /**
  * Return bypass state when given event occurs.
@@ -497,7 +501,7 @@ int rte_pmd_ixgbe_bypass_state_set(uint8_t port, uint32_t *new_state);
  *   - (-ENOTSUP) if hardware doesn't support.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_bypass_event_show(uint8_t port,
+int rte_pmd_ixgbe_bypass_event_show(uint16_t port,
 				    uint32_t event,
 				    uint32_t *state);
 
@@ -523,7 +527,7 @@ int rte_pmd_ixgbe_bypass_event_show(uint8_t port,
  *   - (-ENOTSUP) if hardware doesn't support.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_bypass_event_store(uint8_t port,
+int rte_pmd_ixgbe_bypass_event_store(uint16_t port,
 				     uint32_t event,
 				     uint32_t state);
 
@@ -547,7 +551,7 @@ int rte_pmd_ixgbe_bypass_event_store(uint8_t port,
  *   - (-ENOTSUP) if hardware doesn't support.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_bypass_wd_timeout_store(uint8_t port, uint32_t timeout);
+int rte_pmd_ixgbe_bypass_wd_timeout_store(uint16_t port, uint32_t timeout);
 
 /**
  * Get bypass firmware version.
@@ -561,7 +565,7 @@ int rte_pmd_ixgbe_bypass_wd_timeout_store(uint8_t port, uint32_t timeout);
  *   - (-ENOTSUP) if hardware doesn't support.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_bypass_ver_show(uint8_t port, uint32_t *ver);
+int rte_pmd_ixgbe_bypass_ver_show(uint16_t port, uint32_t *ver);
 
 /**
  * Return bypass watchdog timeout in seconds
@@ -583,7 +587,7 @@ int rte_pmd_ixgbe_bypass_ver_show(uint8_t port, uint32_t *ver);
  *   - (-ENOTSUP) if hardware doesn't support.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_bypass_wd_timeout_show(uint8_t port, uint32_t *wd_timeout);
+int rte_pmd_ixgbe_bypass_wd_timeout_show(uint16_t port, uint32_t *wd_timeout);
 
 /**
  * Reset bypass watchdog timer
@@ -595,7 +599,7 @@ int rte_pmd_ixgbe_bypass_wd_timeout_show(uint8_t port, uint32_t *wd_timeout);
  *   - (-ENOTSUP) if hardware doesn't support.
  *   - (-EINVAL) if bad parameter.
  */
-int rte_pmd_ixgbe_bypass_wd_reset(uint8_t port);
+int rte_pmd_ixgbe_bypass_wd_reset(uint16_t port);
 
 
 /**
diff --git a/drivers/net/kni/Makefile b/drivers/net/kni/Makefile
index 46a1ad08..a3f51f92 100644
--- a/drivers/net/kni/Makefile
+++ b/drivers/net/kni/Makefile
@@ -38,6 +38,9 @@ LIB = librte_pmd_kni.a
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lpthread
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_kni
+LDLIBS += -lrte_bus_vdev
 
 EXPORT_MAP := rte_pmd_kni_version.map
 
diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
index 72a2733b..8f269532 100644
--- a/drivers/net/kni/rte_eth_kni.c
+++ b/drivers/net/kni/rte_eth_kni.c
@@ -40,7 +40,7 @@
 #include <rte_kni.h>
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 
 /* Only single queue supported */
 #define KNI_MAX_QUEUE_PER_PORT 1
@@ -283,7 +283,7 @@ eth_kni_link_update(struct rte_eth_dev *dev __rte_unused,
 	return 0;
 }
 
-static void
+static int
 eth_kni_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
@@ -320,6 +320,8 @@ eth_kni_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	stats->opackets = tx_packets_total;
 	stats->obytes = tx_bytes_total;
 	stats->oerrors = tx_packets_err_total;
+
+	return 0;
 }
 
 static void
@@ -356,8 +358,6 @@ static const struct eth_dev_ops eth_kni_ops = {
 	.stats_reset = eth_kni_stats_reset,
 };
 
-static struct rte_vdev_driver eth_kni_drv;
-
 static struct rte_eth_dev *
 eth_kni_create(struct rte_vdev_device *vdev,
 		struct eth_kni_args *args,
@@ -395,8 +395,6 @@ eth_kni_create(struct rte_vdev_device *vdev,
 	eth_dev->data = data;
 	eth_dev->dev_ops = &eth_kni_ops;
 
-	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
-
 	internals->no_request_thread = args->no_request_thread;
 
 	return eth_dev;
diff --git a/drivers/net/liquidio/Makefile b/drivers/net/liquidio/Makefile
index 32c06f5b..5110099f 100644
--- a/drivers/net/liquidio/Makefile
+++ b/drivers/net/liquidio/Makefile
@@ -40,6 +40,9 @@ LIB = librte_pmd_lio.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)/base -I$(SRCDIR)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 EXPORT_MAP := rte_pmd_lio_version.map
 
diff --git a/drivers/net/liquidio/base/lio_23xx_vf.c b/drivers/net/liquidio/base/lio_23xx_vf.c
index e30c20dc..99780178 100644
--- a/drivers/net/liquidio/base/lio_23xx_vf.c
+++ b/drivers/net/liquidio/base/lio_23xx_vf.c
@@ -379,25 +379,6 @@ cn23xx_vf_disable_io_queues(struct lio_device *lio_dev)
 	cn23xx_vf_reset_io_queues(lio_dev, num_queues);
 }
 
-void
-cn23xx_vf_ask_pf_to_do_flr(struct lio_device *lio_dev)
-{
-	struct lio_mbox_cmd mbox_cmd;
-
-	memset(&mbox_cmd, 0, sizeof(struct lio_mbox_cmd));
-	mbox_cmd.msg.s.type = LIO_MBOX_REQUEST;
-	mbox_cmd.msg.s.resp_needed = 0;
-	mbox_cmd.msg.s.cmd = LIO_VF_FLR_REQUEST;
-	mbox_cmd.msg.s.len = 1;
-	mbox_cmd.q_no = 0;
-	mbox_cmd.recv_len = 0;
-	mbox_cmd.recv_status = 0;
-	mbox_cmd.fn = NULL;
-	mbox_cmd.fn_arg = 0;
-
-	lio_mbox_write(lio_dev, &mbox_cmd);
-}
-
 static void
 cn23xx_pfvf_hs_callback(struct lio_device *lio_dev,
 			struct lio_mbox_cmd *cmd, void *arg)
diff --git a/drivers/net/liquidio/base/lio_23xx_vf.h b/drivers/net/liquidio/base/lio_23xx_vf.h
index ad8db0df..83dc053a 100644
--- a/drivers/net/liquidio/base/lio_23xx_vf.h
+++ b/drivers/net/liquidio/base/lio_23xx_vf.h
@@ -87,8 +87,6 @@ int cn23xx_vf_set_io_queues_off(struct lio_device *lio_dev);
 
 #define CN23XX_VF_BUSY_READING_REG_LOOP_COUNT	100000
 
-void cn23xx_vf_ask_pf_to_do_flr(struct lio_device *lio_dev);
-
 int cn23xx_pfvf_handshake(struct lio_device *lio_dev);
 
 int cn23xx_vf_setup_device(struct lio_device  *lio_dev);
diff --git a/drivers/net/liquidio/base/lio_hw_defs.h b/drivers/net/liquidio/base/lio_hw_defs.h
index de58c7cc..d4cd23ce 100644
--- a/drivers/net/liquidio/base/lio_hw_defs.h
+++ b/drivers/net/liquidio/base/lio_hw_defs.h
@@ -43,10 +43,14 @@
 #define LIO_CN23XX_VF_VID	0x9712
 
 /* CN23xx subsystem device ids */
-#define PCI_SUBSYS_DEV_ID_CN2350_210	0x0004
-#define PCI_SUBSYS_DEV_ID_CN2360_210	0x0005
-#define PCI_SUBSYS_DEV_ID_CN2360_225	0x0006
-#define PCI_SUBSYS_DEV_ID_CN2350_225	0x0007
+#define PCI_SUBSYS_DEV_ID_CN2350_210		0x0004
+#define PCI_SUBSYS_DEV_ID_CN2360_210		0x0005
+#define PCI_SUBSYS_DEV_ID_CN2360_225		0x0006
+#define PCI_SUBSYS_DEV_ID_CN2350_225		0x0007
+#define PCI_SUBSYS_DEV_ID_CN2350_210SVPN3	0x0008
+#define PCI_SUBSYS_DEV_ID_CN2360_210SVPN3	0x0009
+#define PCI_SUBSYS_DEV_ID_CN2350_210SVPT	0x000a
+#define PCI_SUBSYS_DEV_ID_CN2360_210SVPT	0x000b
 
 /* --------------------------CONFIG VALUES------------------------ */
 
@@ -106,6 +110,8 @@ enum lio_card_type {
 
 #define LIO_FW_VERSION_LENGTH		32
 
+#define LIO_VF_TRUST_MIN_VERSION	"1.7.1"
+
 /** Tag types used by Octeon cores in its work. */
 enum octeon_tag_type {
 	OCTEON_ORDERED_TAG	= 0,
@@ -137,6 +143,7 @@ enum octeon_tag_type {
 #define LIO_MAX_RX_PKTLEN		(64 * 1024)
 
 /* NIC Command types */
+#define LIO_CMD_CHANGE_MTU		0x1
 #define LIO_CMD_CHANGE_DEVFLAGS		0x3
 #define LIO_CMD_RX_CTL			0x4
 #define LIO_CMD_CLEAR_STATS		0x6
@@ -184,6 +191,7 @@ enum octeon_tag_type {
 
 /* Interface flags communicated between host driver and core app. */
 enum lio_ifflags {
+	LIO_IFFLAG_PROMISC	= 0x01,
 	LIO_IFFLAG_ALLMULTI	= 0x02,
 	LIO_IFFLAG_UNICAST	= 0x10
 };
diff --git a/drivers/net/liquidio/base/lio_mbox.h b/drivers/net/liquidio/base/lio_mbox.h
index b0875d64..f1c5b8ec 100644
--- a/drivers/net/liquidio/base/lio_mbox.h
+++ b/drivers/net/liquidio/base/lio_mbox.h
@@ -43,7 +43,6 @@
 #define LIO_MBOX_DATA_MAX			32
 
 #define LIO_VF_ACTIVE				0x1
-#define LIO_VF_FLR_REQUEST			0x2
 #define LIO_CORES_CRASHED			0x3
 
 /* Macro for Read acknowledgment */
diff --git a/drivers/net/liquidio/lio_ethdev.c b/drivers/net/liquidio/lio_ethdev.c
index 479936a5..4b189661 100644
--- a/drivers/net/liquidio/lio_ethdev.c
+++ b/drivers/net/liquidio/lio_ethdev.c
@@ -311,7 +311,7 @@ lio_dev_xstats_reset(struct rte_eth_dev *eth_dev)
 }
 
 /* Retrieve the device statistics (# packets in/out, # bytes in/out, etc */
-static void
+static int
 lio_dev_stats_get(struct rte_eth_dev *eth_dev,
 		  struct rte_eth_stats *stats)
 {
@@ -359,6 +359,8 @@ lio_dev_stats_get(struct rte_eth_dev *eth_dev,
 	stats->ibytes = bytes;
 	stats->ipackets = pkts;
 	stats->ierrors = drop;
+
+	return 0;
 }
 
 static void
@@ -403,6 +405,10 @@ lio_dev_info_get(struct rte_eth_dev *eth_dev,
 	/* CN23xx 10G cards */
 	case PCI_SUBSYS_DEV_ID_CN2350_210:
 	case PCI_SUBSYS_DEV_ID_CN2360_210:
+	case PCI_SUBSYS_DEV_ID_CN2350_210SVPN3:
+	case PCI_SUBSYS_DEV_ID_CN2360_210SVPN3:
+	case PCI_SUBSYS_DEV_ID_CN2350_210SVPT:
+	case PCI_SUBSYS_DEV_ID_CN2360_210SVPT:
 		devinfo->speed_capa = ETH_LINK_SPEED_10G;
 		break;
 	/* CN23xx 25G cards */
@@ -411,8 +417,9 @@ lio_dev_info_get(struct rte_eth_dev *eth_dev,
 		devinfo->speed_capa = ETH_LINK_SPEED_25G;
 		break;
 	default:
+		devinfo->speed_capa = ETH_LINK_SPEED_10G;
 		lio_dev_err(lio_dev,
-			    "Unknown CN23XX subsystem device id. Not setting speed capability.\n");
+			    "Unknown CN23XX subsystem device id. Setting 10G as default link speed.\n");
 	}
 
 	devinfo->max_rx_queues = lio_dev->max_rx_queues;
@@ -446,29 +453,64 @@ lio_dev_info_get(struct rte_eth_dev *eth_dev,
 }
 
 static int
-lio_dev_validate_vf_mtu(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
+lio_dev_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu)
 {
 	struct lio_device *lio_dev = LIO_DEV(eth_dev);
+	uint16_t pf_mtu = lio_dev->linfo.link.s.mtu;
+	uint32_t frame_len = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
+	struct lio_dev_ctrl_cmd ctrl_cmd;
+	struct lio_ctrl_pkt ctrl_pkt;
 
 	PMD_INIT_FUNC_TRACE();
 
 	if (!lio_dev->intf_open) {
-		lio_dev_err(lio_dev, "Port %d down, can't check MTU\n",
+		lio_dev_err(lio_dev, "Port %d down, can't set MTU\n",
 			    lio_dev->port_id);
 		return -EINVAL;
 	}
 
-	/* Limit the MTU to make sure the ethernet packets are between
-	 * ETHER_MIN_MTU bytes and PF's MTU
+	/* check if VF MTU is within allowed range.
+	 * New value should not exceed PF MTU.
 	 */
-	if ((new_mtu < ETHER_MIN_MTU) ||
-			(new_mtu > lio_dev->linfo.link.s.mtu)) {
-		lio_dev_err(lio_dev, "Invalid MTU: %d\n", new_mtu);
-		lio_dev_err(lio_dev, "Valid range %d and %d\n",
-			    ETHER_MIN_MTU, lio_dev->linfo.link.s.mtu);
+	if ((mtu < ETHER_MIN_MTU) || (mtu > pf_mtu)) {
+		lio_dev_err(lio_dev, "VF MTU should be >= %d and <= %d\n",
+			    ETHER_MIN_MTU, pf_mtu);
 		return -EINVAL;
 	}
 
+	/* flush added to prevent cmd failure
+	 * incase the queue is full
+	 */
+	lio_flush_iq(lio_dev, lio_dev->instr_queue[0]);
+
+	memset(&ctrl_pkt, 0, sizeof(struct lio_ctrl_pkt));
+	memset(&ctrl_cmd, 0, sizeof(struct lio_dev_ctrl_cmd));
+
+	ctrl_cmd.eth_dev = eth_dev;
+	ctrl_cmd.cond = 0;
+
+	ctrl_pkt.ncmd.s.cmd = LIO_CMD_CHANGE_MTU;
+	ctrl_pkt.ncmd.s.param1 = mtu;
+	ctrl_pkt.ctrl_cmd = &ctrl_cmd;
+
+	if (lio_send_ctrl_pkt(lio_dev, &ctrl_pkt)) {
+		lio_dev_err(lio_dev, "Failed to send command to change MTU\n");
+		return -1;
+	}
+
+	if (lio_wait_for_ctrl_cmd(lio_dev, &ctrl_cmd)) {
+		lio_dev_err(lio_dev, "Command to change MTU timed out\n");
+		return -1;
+	}
+
+	if (frame_len > ETHER_MAX_LEN)
+		eth_dev->data->dev_conf.rxmode.jumbo_frame = 1;
+	else
+		eth_dev->data->dev_conf.rxmode.jumbo_frame = 0;
+
+	eth_dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_len;
+	eth_dev->data->mtu = mtu;
+
 	return 0;
 }
 
@@ -939,6 +981,7 @@ lio_dev_link_update(struct rte_eth_dev *eth_dev,
 	link.link_status = ETH_LINK_DOWN;
 	link.link_speed = ETH_SPEED_NUM_NONE;
 	link.link_duplex = ETH_LINK_HALF_DUPLEX;
+	link.link_autoneg = ETH_LINK_AUTONEG;
 	memset(&old, 0, sizeof(old));
 
 	/* Return what we found */
@@ -1011,6 +1054,48 @@ lio_change_dev_flag(struct rte_eth_dev *eth_dev)
 }
 
 static void
+lio_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
+{
+	struct lio_device *lio_dev = LIO_DEV(eth_dev);
+
+	if (strcmp(lio_dev->firmware_version, LIO_VF_TRUST_MIN_VERSION) < 0) {
+		lio_dev_err(lio_dev, "Require firmware version >= %s\n",
+			    LIO_VF_TRUST_MIN_VERSION);
+		return;
+	}
+
+	if (!lio_dev->intf_open) {
+		lio_dev_err(lio_dev, "Port %d down, can't enable promiscuous\n",
+			    lio_dev->port_id);
+		return;
+	}
+
+	lio_dev->ifflags |= LIO_IFFLAG_PROMISC;
+	lio_change_dev_flag(eth_dev);
+}
+
+static void
+lio_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
+{
+	struct lio_device *lio_dev = LIO_DEV(eth_dev);
+
+	if (strcmp(lio_dev->firmware_version, LIO_VF_TRUST_MIN_VERSION) < 0) {
+		lio_dev_err(lio_dev, "Require firmware version >= %s\n",
+			    LIO_VF_TRUST_MIN_VERSION);
+		return;
+	}
+
+	if (!lio_dev->intf_open) {
+		lio_dev_err(lio_dev, "Port %d down, can't disable promiscuous\n",
+			    lio_dev->port_id);
+		return;
+	}
+
+	lio_dev->ifflags &= ~LIO_IFFLAG_PROMISC;
+	lio_change_dev_flag(eth_dev);
+}
+
+static void
 lio_dev_allmulticast_enable(struct rte_eth_dev *eth_dev)
 {
 	struct lio_device *lio_dev = LIO_DEV(eth_dev);
@@ -1333,6 +1418,11 @@ lio_dev_get_link_status(struct rte_eth_dev *eth_dev)
 	lio_swap_8B_data((uint64_t *)ls, sizeof(union octeon_link_status) >> 3);
 
 	if (lio_dev->linfo.link.link_status64 != ls->link_status64) {
+		if (ls->s.mtu < eth_dev->data->mtu) {
+			lio_dev_info(lio_dev, "Lowered VF MTU to %d as PF MTU dropped\n",
+				     ls->s.mtu);
+			eth_dev->data->mtu = ls->s.mtu;
+		}
 		lio_dev->linfo.link.link_status64 = ls->link_status64;
 		lio_dev_link_update(eth_dev, 0);
 	}
@@ -1404,35 +1494,22 @@ lio_dev_start(struct rte_eth_dev *eth_dev)
 
 	if (lio_dev->linfo.link.link_status64 == 0) {
 		ret = -1;
-		goto dev_mtu_check_error;
+		goto dev_mtu_set_error;
 	}
 
-	if (eth_dev->data->dev_conf.rxmode.jumbo_frame == 1) {
-		if (frame_len <= ETHER_MAX_LEN ||
-		    frame_len > LIO_MAX_RX_PKTLEN) {
-			lio_dev_err(lio_dev, "max packet length should be >= %d and < %d when jumbo frame is enabled\n",
-				    ETHER_MAX_LEN, LIO_MAX_RX_PKTLEN);
-			ret = -EINVAL;
-			goto dev_mtu_check_error;
-		}
-		mtu = (uint16_t)(frame_len - ETHER_HDR_LEN - ETHER_CRC_LEN);
-	} else {
-		/* default MTU */
-		mtu = ETHER_MTU;
-		eth_dev->data->dev_conf.rxmode.max_rx_pkt_len = ETHER_MAX_LEN;
-	}
+	mtu = (uint16_t)(frame_len - ETHER_HDR_LEN - ETHER_CRC_LEN);
+	if (mtu < ETHER_MIN_MTU)
+		mtu = ETHER_MIN_MTU;
 
-	if (lio_dev->linfo.link.s.mtu != mtu) {
-		ret = lio_dev_validate_vf_mtu(eth_dev, mtu);
+	if (eth_dev->data->mtu != mtu) {
+		ret = lio_dev_mtu_set(eth_dev, mtu);
 		if (ret)
-			goto dev_mtu_check_error;
+			goto dev_mtu_set_error;
 	}
 
-	eth_dev->data->mtu = mtu;
-
 	return 0;
 
-dev_mtu_check_error:
+dev_mtu_set_error:
 	rte_eal_alarm_cancel(lio_sync_link_state_check, eth_dev);
 
 dev_lsc_handle_error:
@@ -1559,9 +1636,6 @@ lio_dev_close(struct rte_eth_dev *eth_dev)
 		rte_write32(pkt_count, droq->pkts_sent_reg);
 	}
 
-	/* Do FLR for the VF */
-	cn23xx_vf_ask_pf_to_do_flr(lio_dev);
-
 	/* lio_free_mbox */
 	lio_dev->fn_list.free_mbox(lio_dev);
 
@@ -1721,6 +1795,9 @@ static int lio_dev_configure(struct rte_eth_dev *eth_dev)
 		goto nic_config_fail;
 	}
 
+	snprintf(lio_dev->firmware_version, LIO_FW_VERSION_LENGTH, "%s",
+		 resp->cfg_info.lio_firmware_version);
+
 	lio_swap_8B_data((uint64_t *)(&resp->cfg_info),
 			 sizeof(struct octeon_if_cfg_info) >> 3);
 
@@ -1824,6 +1901,8 @@ static const struct eth_dev_ops liovf_eth_dev_ops = {
 	.dev_set_link_up	= lio_dev_set_link_up,
 	.dev_set_link_down	= lio_dev_set_link_down,
 	.dev_close		= lio_dev_close,
+	.promiscuous_enable	= lio_dev_promiscuous_enable,
+	.promiscuous_disable	= lio_dev_promiscuous_disable,
 	.allmulticast_enable	= lio_dev_allmulticast_enable,
 	.allmulticast_disable	= lio_dev_allmulticast_disable,
 	.link_update		= lio_dev_link_update,
@@ -1844,6 +1923,7 @@ static const struct eth_dev_ops liovf_eth_dev_ops = {
 	.rss_hash_update	= lio_dev_rss_hash_update,
 	.udp_tunnel_port_add	= lio_dev_udp_tunnel_add,
 	.udp_tunnel_port_del	= lio_dev_udp_tunnel_del,
+	.mtu_set		= lio_dev_mtu_set,
 };
 
 static void
@@ -1929,11 +2009,6 @@ lio_first_time_init(struct lio_device *lio_dev,
 	if (cn23xx_pfvf_handshake(lio_dev))
 		goto error;
 
-	/* Initial reset */
-	cn23xx_vf_ask_pf_to_do_flr(lio_dev);
-	/* Wait for FLR for 100ms per SRIOV specification */
-	rte_delay_ms(100);
-
 	if (cn23xx_vf_set_io_queues_off(lio_dev)) {
 		lio_dev_err(lio_dev, "Setting io queues off failed\n");
 		goto error;
@@ -2009,7 +2084,6 @@ lio_eth_dev_init(struct rte_eth_dev *eth_dev)
 		return 0;
 
 	rte_eth_copy_pci_info(eth_dev, pdev);
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	if (pdev->mem_resource[0].addr) {
 		lio_dev->hw_addr = pdev->mem_resource[0].addr;
diff --git a/drivers/net/liquidio/lio_rxtx.c b/drivers/net/liquidio/lio_rxtx.c
index 2bbb893c..376893ac 100644
--- a/drivers/net/liquidio/lio_rxtx.c
+++ b/drivers/net/liquidio/lio_rxtx.c
@@ -172,7 +172,7 @@ lio_alloc_info_buffer(struct lio_device *lio_dev,
 	if (droq->info_mz == NULL)
 		return NULL;
 
-	droq->info_list_dma = droq->info_mz->phys_addr;
+	droq->info_list_dma = droq->info_mz->iova;
 	droq->info_alloc_size = droq->info_mz->len;
 	droq->info_base_addr = (size_t)droq->info_mz->addr;
 
@@ -222,7 +222,7 @@ lio_init_droq(struct lio_device *lio_dev, uint32_t q_no,
 		return -1;
 	}
 
-	droq->desc_ring_dma = droq->desc_ring_mz->phys_addr;
+	droq->desc_ring_dma = droq->desc_ring_mz->iova;
 	droq->desc_ring = (struct lio_droq_desc *)droq->desc_ring_mz->addr;
 
 	lio_dev_dbg(lio_dev, "droq[%d]: desc_ring: virt: 0x%p, dma: %lx\n",
@@ -734,7 +734,7 @@ lio_init_instr_queue(struct lio_device *lio_dev,
 		return -1;
 	}
 
-	iq->base_addr_dma = iq->iq_mz->phys_addr;
+	iq->base_addr_dma = iq->iq_mz->iova;
 	iq->base_addr = (uint8_t *)iq->iq_mz->addr;
 
 	iq->max_count = num_descs;
@@ -1298,7 +1298,7 @@ lio_alloc_soft_command(struct lio_device *lio_dev, uint32_t datasize,
 	sc = rte_pktmbuf_mtod(m, struct lio_soft_command *);
 	memset(sc, 0, LIO_SOFT_COMMAND_BUFFER_SIZE);
 	sc->size = LIO_SOFT_COMMAND_BUFFER_SIZE;
-	sc->dma_addr = rte_mbuf_data_dma_addr(m);
+	sc->dma_addr = rte_mbuf_data_iova(m);
 	sc->mbuf = m;
 
 	dma_addr = sc->dma_addr;
@@ -1739,12 +1739,12 @@ lio_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
 			cmdsetup.s.u.datasize = pkt_len;
 			lio_prepare_pci_cmd(lio_dev, &ndata.cmd,
 					    &cmdsetup, tag);
-			ndata.cmd.cmd3.dptr = rte_mbuf_data_dma_addr(m);
+			ndata.cmd.cmd3.dptr = rte_mbuf_data_iova(m);
 			ndata.reqtype = LIO_REQTYPE_NORESP_NET;
 		} else {
 			struct lio_buf_free_info *finfo;
 			struct lio_gather *g;
-			phys_addr_t phyaddr;
+			rte_iova_t phyaddr;
 			int i, frags;
 
 			finfo = (struct lio_buf_free_info *)rte_malloc(NULL,
@@ -1771,7 +1771,7 @@ lio_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
 					    &cmdsetup, tag);
 
 			memset(g->sg, 0, g->sg_size);
-			g->sg[0].ptr[0] = rte_mbuf_data_dma_addr(m);
+			g->sg[0].ptr[0] = rte_mbuf_data_iova(m);
 			lio_add_sg_size(&g->sg[0], m->data_len, 0);
 			pkt_len = m->data_len;
 			finfo->mbuf = m;
@@ -1782,7 +1782,7 @@ lio_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
 			m = m->next;
 			while (frags--) {
 				g->sg[(i >> 2)].ptr[(i & 3)] =
-						rte_mbuf_data_dma_addr(m);
+						rte_mbuf_data_iova(m);
 				lio_add_sg_size(&g->sg[(i >> 2)],
 						m->data_len, (i & 3));
 				pkt_len += m->data_len;
@@ -1790,8 +1790,8 @@ lio_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
 				m = m->next;
 			}
 
-			phyaddr = rte_mem_virt2phy(g->sg);
-			if (phyaddr == RTE_BAD_PHYS_ADDR) {
+			phyaddr = rte_mem_virt2iova(g->sg);
+			if (phyaddr == RTE_BAD_IOVA) {
 				PMD_TX_LOG(lio_dev, ERR, "bad phys addr\n");
 				goto xmit_failed;
 			}
diff --git a/drivers/net/liquidio/lio_rxtx.h b/drivers/net/liquidio/lio_rxtx.h
index 85685dc7..ef033735 100644
--- a/drivers/net/liquidio/lio_rxtx.h
+++ b/drivers/net/liquidio/lio_rxtx.h
@@ -686,9 +686,9 @@ lio_swap_8B_data(uint64_t *data, uint32_t blocks)
 static inline uint64_t
 lio_map_ring(void *buf)
 {
-	phys_addr_t dma_addr;
+	rte_iova_t dma_addr;
 
-	dma_addr = rte_mbuf_data_dma_addr_default(((struct rte_mbuf *)buf));
+	dma_addr = rte_mbuf_data_iova_default(((struct rte_mbuf *)buf));
 
 	return (uint64_t)dma_addr;
 }
@@ -696,7 +696,7 @@ lio_map_ring(void *buf)
 static inline uint64_t
 lio_map_ring_info(struct lio_droq *droq, uint32_t i)
 {
-	phys_addr_t dma_addr;
+	rte_iova_t dma_addr;
 
 	dma_addr = droq->info_list_dma + (i * LIO_DROQ_INFO_SIZE);
 
diff --git a/drivers/net/liquidio/lio_struct.h b/drivers/net/liquidio/lio_struct.h
index d9cbf000..10e3976a 100644
--- a/drivers/net/liquidio/lio_struct.h
+++ b/drivers/net/liquidio/lio_struct.h
@@ -684,6 +684,7 @@ struct lio_device {
 	uint8_t nb_tx_queues;
 	uint8_t port_configured;
 	struct lio_rss_ctx rss_state;
-	uint8_t port_id;
+	uint16_t port_id;
+	char firmware_version[LIO_FW_VERSION_LENGTH];
 };
 #endif /* _LIO_STRUCT_H_ */
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
index c045bd79..f1f47c28 100644
--- a/drivers/net/mlx4/Makefile
+++ b/drivers/net/mlx4/Makefile
@@ -1,7 +1,7 @@
 #   BSD LICENSE
 #
-#   Copyright 2012-2015 6WIND S.A.
-#   Copyright 2012 Mellanox.
+#   Copyright 2012 6WIND S.A.
+#   Copyright 2012 Mellanox
 #
 #   Redistribution and use in source and binary forms, with or without
 #   modification, are permitted provided that the following conditions
@@ -36,7 +36,14 @@ LIB = librte_pmd_mlx4.a
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_ethdev.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_txq.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_utils.c
 
 # Basic CFLAGS.
 CFLAGS += -O3
@@ -47,7 +54,10 @@ CFLAGS += -D_BSD_SOURCE
 CFLAGS += -D_DEFAULT_SOURCE
 CFLAGS += -D_XOPEN_SOURCE=600
 CFLAGS += $(WERROR_FLAGS)
-LDLIBS += -libverbs
+LDLIBS += -libverbs -lmlx4
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 # A few warnings cannot be avoided in external headers.
 CFLAGS += -Wno-error=cast-qual
@@ -68,22 +78,10 @@ else
 CFLAGS += -DNDEBUG -UPEDANTIC
 endif
 
-ifdef CONFIG_RTE_LIBRTE_MLX4_SGE_WR_N
-CFLAGS += -DMLX4_PMD_SGE_WR_N=$(CONFIG_RTE_LIBRTE_MLX4_SGE_WR_N)
-endif
-
-ifdef CONFIG_RTE_LIBRTE_MLX4_MAX_INLINE
-CFLAGS += -DMLX4_PMD_MAX_INLINE=$(CONFIG_RTE_LIBRTE_MLX4_MAX_INLINE)
-endif
-
 ifdef CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE
 CFLAGS += -DMLX4_PMD_TX_MP_CACHE=$(CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE)
 endif
 
-ifdef CONFIG_RTE_LIBRTE_MLX4_SOFT_COUNTERS
-CFLAGS += -DMLX4_PMD_SOFT_COUNTERS=$(CONFIG_RTE_LIBRTE_MLX4_SOFT_COUNTERS)
-endif
-
 ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DEBUG_BROKEN_VERBS),y)
 CFLAGS += -DMLX4_PMD_DEBUG_BROKEN_VERBS
 endif
@@ -103,23 +101,7 @@ mlx4_autoconf.h.new: FORCE
 
 mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 	$Q $(RM) -f -- '$@'
-	$Q sh -- '$<' '$@' \
-		RSS_SUPPORT \
-		infiniband/verbs.h \
-		enum IBV_EXP_DEVICE_UD_RSS $(AUTOCONF_OUTPUT)
-	$Q sh -- '$<' '$@' \
-		INLINE_RECV \
-		infiniband/verbs.h \
-		enum IBV_EXP_DEVICE_ATTR_INLINE_RECV_SZ $(AUTOCONF_OUTPUT)
-	$Q sh -- '$<' '$@' \
-		HAVE_EXP_QUERY_DEVICE \
-		infiniband/verbs.h \
-		type 'struct ibv_exp_device_attr' $(AUTOCONF_OUTPUT)
-	$Q sh -- '$<' '$@' \
-		HAVE_EXP_QP_BURST_CREATE_DISABLE_ETH_LOOPBACK \
-		infiniband/verbs.h \
-		enum IBV_EXP_QP_BURST_CREATE_DISABLE_ETH_LOOPBACK \
-		$(AUTOCONF_OUTPUT)
+	$Q : > '$@'
 
 # Create mlx4_autoconf.h or update it in case it differs from the new one.
 
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 055de49a..f9e4f9d7 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -1,8 +1,8 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright 2012-2017 6WIND S.A.
- *   Copyright 2012-2017 Mellanox.
+ *   Copyright 2012 6WIND S.A.
+ *   Copyright 2012 Mellanox
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -31,95 +31,52 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/*
- * Known limitations:
- * - RSS hash key and options cannot be modified.
- * - Hardware counters aren't implemented.
+/**
+ * @file
+ * mlx4 driver initialization.
  */
 
-/* System headers. */
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
 #include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdint.h>
-#include <inttypes.h>
 #include <string.h>
-#include <errno.h>
-#include <unistd.h>
-#include <limits.h>
-#include <assert.h>
-#include <arpa/inet.h>
-#include <net/if.h>
-#include <dirent.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <linux/ethtool.h>
-#include <linux/sockios.h>
-#include <fcntl.h>
 
-#include <rte_ether.h>
-#include <rte_ethdev.h>
-#include <rte_ethdev_pci.h>
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_common.h>
 #include <rte_dev.h>
-#include <rte_mbuf.h>
 #include <rte_errno.h>
-#include <rte_mempool.h>
-#include <rte_prefetch.h>
-#include <rte_malloc.h>
-#include <rte_spinlock.h>
-#include <rte_atomic.h>
-#include <rte_version.h>
-#include <rte_log.h>
-#include <rte_alarm.h>
-#include <rte_memory.h>
+#include <rte_ethdev.h>
+#include <rte_ethdev_pci.h>
+#include <rte_ether.h>
 #include <rte_flow.h>
-#include <rte_kvargs.h>
 #include <rte_interrupts.h>
+#include <rte_kvargs.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
 
-/* Generated configuration header. */
-#include "mlx4_autoconf.h"
-
-/* PMD headers. */
 #include "mlx4.h"
 #include "mlx4_flow.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
 
-/* Convenience macros for accessing mbuf fields. */
-#define NEXT(m) ((m)->next)
-#define DATA_LEN(m) ((m)->data_len)
-#define PKT_LEN(m) ((m)->pkt_len)
-#define DATA_OFF(m) ((m)->data_off)
-#define SET_DATA_OFF(m, o) ((m)->data_off = (o))
-#define NB_SEGS(m) ((m)->nb_segs)
-#define PORT(m) ((m)->port)
-
-/* Work Request ID data type (64 bit). */
-typedef union {
-	struct {
-		uint32_t id;
-		uint16_t offset;
-	} data;
-	uint64_t raw;
-} wr_id_t;
-
-#define WR_ID(o) (((wr_id_t *)&(o))->data)
-
-/* Transpose flags. Useful to convert IBV to DPDK flags. */
-#define TRANSPOSE(val, from, to) \
-	(((from) >= (to)) ? \
-	 (((val) & (from)) / ((from) / (to))) : \
-	 (((val) & (from)) * ((to) / (from))))
-
-/* Local storage for secondary process data. */
-struct mlx4_secondary_data {
-	struct rte_eth_dev_data data; /* Local device data. */
-	struct priv *primary_priv; /* Private structure from primary. */
-	struct rte_eth_dev_data *shared_dev_data; /* Shared device data. */
-	rte_spinlock_t lock; /* Port configuration lock. */
-} mlx4_secondary_data[RTE_MAX_ETHPORTS];
-
+/** Configuration structure for device arguments. */
 struct mlx4_conf {
-	uint8_t active_ports;
+	struct {
+		uint32_t present; /**< Bit-field for existing ports. */
+		uint32_t enabled; /**< Bit-field for user-enabled ports. */
+	} ports;
 };
 
 /* Available parameters list. */
@@ -128,593 +85,6 @@ const char *pmd_mlx4_init_params[] = {
 	NULL,
 };
 
-static int
-mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
-
-static int
-mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx);
-
-static int
-priv_rx_intr_vec_enable(struct priv *priv);
-
-static void
-priv_rx_intr_vec_disable(struct priv *priv);
-
-/**
- * Check if running as a secondary process.
- *
- * @return
- *   Nonzero if running as a secondary process.
- */
-static inline int
-mlx4_is_secondary(void)
-{
-	return rte_eal_process_type() != RTE_PROC_PRIMARY;
-}
-
-/**
- * Return private structure associated with an Ethernet device.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- *
- * @return
- *   Pointer to private structure.
- */
-static struct priv *
-mlx4_get_priv(struct rte_eth_dev *dev)
-{
-	struct mlx4_secondary_data *sd;
-
-	if (!mlx4_is_secondary())
-		return dev->data->dev_private;
-	sd = &mlx4_secondary_data[dev->data->port_id];
-	return sd->data.dev_private;
-}
-
-/**
- * Lock private structure to protect it from concurrent access in the
- * control path.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void priv_lock(struct priv *priv)
-{
-	rte_spinlock_lock(&priv->lock);
-}
-
-/**
- * Unlock private structure.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void priv_unlock(struct priv *priv)
-{
-	rte_spinlock_unlock(&priv->lock);
-}
-
-/* Allocate a buffer on the stack and fill it with a printf format string. */
-#define MKSTR(name, ...) \
-	char name[snprintf(NULL, 0, __VA_ARGS__) + 1]; \
-	\
-	snprintf(name, sizeof(name), __VA_ARGS__)
-
-/**
- * Get interface name from private structure.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[out] ifname
- *   Interface name output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
-{
-	DIR *dir;
-	struct dirent *dent;
-	unsigned int dev_type = 0;
-	unsigned int dev_port_prev = ~0u;
-	char match[IF_NAMESIZE] = "";
-
-	{
-		MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
-
-		dir = opendir(path);
-		if (dir == NULL)
-			return -1;
-	}
-	while ((dent = readdir(dir)) != NULL) {
-		char *name = dent->d_name;
-		FILE *file;
-		unsigned int dev_port;
-		int r;
-
-		if ((name[0] == '.') &&
-		    ((name[1] == '\0') ||
-		     ((name[1] == '.') && (name[2] == '\0'))))
-			continue;
-
-		MKSTR(path, "%s/device/net/%s/%s",
-		      priv->ctx->device->ibdev_path, name,
-		      (dev_type ? "dev_id" : "dev_port"));
-
-		file = fopen(path, "rb");
-		if (file == NULL) {
-			if (errno != ENOENT)
-				continue;
-			/*
-			 * Switch to dev_id when dev_port does not exist as
-			 * is the case with Linux kernel versions < 3.15.
-			 */
-try_dev_id:
-			match[0] = '\0';
-			if (dev_type)
-				break;
-			dev_type = 1;
-			dev_port_prev = ~0u;
-			rewinddir(dir);
-			continue;
-		}
-		r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
-		fclose(file);
-		if (r != 1)
-			continue;
-		/*
-		 * Switch to dev_id when dev_port returns the same value for
-		 * all ports. May happen when using a MOFED release older than
-		 * 3.0 with a Linux kernel >= 3.15.
-		 */
-		if (dev_port == dev_port_prev)
-			goto try_dev_id;
-		dev_port_prev = dev_port;
-		if (dev_port == (priv->port - 1u))
-			snprintf(match, sizeof(match), "%s", name);
-	}
-	closedir(dir);
-	if (match[0] == '\0')
-		return -1;
-	strncpy(*ifname, match, sizeof(*ifname));
-	return 0;
-}
-
-/**
- * Read from sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[out] buf
- *   Data output buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_sysfs_read(const struct priv *priv, const char *entry,
-		char *buf, size_t size)
-{
-	char ifname[IF_NAMESIZE];
-	FILE *file;
-	int ret;
-	int err;
-
-	if (priv_get_ifname(priv, &ifname))
-		return -1;
-
-	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
-	      ifname, entry);
-
-	file = fopen(path, "rb");
-	if (file == NULL)
-		return -1;
-	ret = fread(buf, 1, size, file);
-	err = errno;
-	if (((size_t)ret < size) && (ferror(file)))
-		ret = -1;
-	else
-		ret = size;
-	fclose(file);
-	errno = err;
-	return ret;
-}
-
-/**
- * Write to sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[in] buf
- *   Data buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_sysfs_write(const struct priv *priv, const char *entry,
-		 char *buf, size_t size)
-{
-	char ifname[IF_NAMESIZE];
-	FILE *file;
-	int ret;
-	int err;
-
-	if (priv_get_ifname(priv, &ifname))
-		return -1;
-
-	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
-	      ifname, entry);
-
-	file = fopen(path, "wb");
-	if (file == NULL)
-		return -1;
-	ret = fwrite(buf, 1, size, file);
-	err = errno;
-	if (((size_t)ret < size) || (ferror(file)))
-		ret = -1;
-	else
-		ret = size;
-	fclose(file);
-	errno = err;
-	return ret;
-}
-
-/**
- * Get unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param[out] value
- *   Value output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
-{
-	int ret;
-	unsigned long value_ret;
-	char value_str[32];
-
-	ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
-	if (ret == -1) {
-		DEBUG("cannot read %s value from sysfs: %s",
-		      name, strerror(errno));
-		return -1;
-	}
-	value_str[ret] = '\0';
-	errno = 0;
-	value_ret = strtoul(value_str, NULL, 0);
-	if (errno) {
-		DEBUG("invalid %s value `%s': %s", name, value_str,
-		      strerror(errno));
-		return -1;
-	}
-	*value = value_ret;
-	return 0;
-}
-
-/**
- * Set unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param value
- *   Value to set.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
-{
-	int ret;
-	MKSTR(value_str, "%lu", value);
-
-	ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
-	if (ret == -1) {
-		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
-		      name, value_str, value, strerror(errno));
-		return -1;
-	}
-	return 0;
-}
-
-/**
- * Perform ifreq ioctl() on associated Ethernet device.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param req
- *   Request number to pass to ioctl().
- * @param[out] ifr
- *   Interface request structure output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr)
-{
-	int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
-	int ret = -1;
-
-	if (sock == -1)
-		return ret;
-	if (priv_get_ifname(priv, &ifr->ifr_name) == 0)
-		ret = ioctl(sock, req, ifr);
-	close(sock);
-	return ret;
-}
-
-/**
- * Get device MTU.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[out] mtu
- *   MTU value output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_get_mtu(struct priv *priv, uint16_t *mtu)
-{
-	unsigned long ulong_mtu;
-
-	if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
-		return -1;
-	*mtu = ulong_mtu;
-	return 0;
-}
-
-/**
- * Set device MTU.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mtu
- *   MTU value to set.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_set_mtu(struct priv *priv, uint16_t mtu)
-{
-	uint16_t new_mtu;
-
-	if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
-	    priv_get_mtu(priv, &new_mtu))
-		return -1;
-	if (new_mtu == mtu)
-		return 0;
-	errno = EINVAL;
-	return -1;
-}
-
-/**
- * Set device flags.
- *
- * @param priv
- *   Pointer to private structure.
- * @param keep
- *   Bitmask for flags that must remain untouched.
- * @param flags
- *   Bitmask for flags to modify.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
-{
-	unsigned long tmp;
-
-	if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
-		return -1;
-	tmp &= keep;
-	tmp |= (flags & (~keep));
-	return priv_set_sysfs_ulong(priv, "flags", tmp);
-}
-
-/* Device configuration. */
-
-static int
-txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
-	  unsigned int socket, const struct rte_eth_txconf *conf);
-
-static void
-txq_cleanup(struct txq *txq);
-
-static int
-rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive,
-	  const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp, int children_n,
-	  struct rxq *rxq_parent);
-
-static void
-rxq_cleanup(struct rxq *rxq);
-
-/**
- * Create RSS parent queue.
- *
- * The new parent is inserted in front of the list in the private structure.
- *
- * @param priv
- *   Pointer to private structure.
- * @param queues
- *   Queues indices array, if NULL use all Rx queues.
- * @param children_n
- *   The number of entries in queues[].
- *
- * @return
- *   Pointer to a parent rxq structure, NULL on failure.
- */
-struct rxq *
-priv_parent_create(struct priv *priv,
-		   uint16_t queues[],
-		   uint16_t children_n)
-{
-	int ret;
-	uint16_t i;
-	struct rxq *parent;
-
-	parent = rte_zmalloc("parent queue",
-			     sizeof(*parent),
-			     RTE_CACHE_LINE_SIZE);
-	if (!parent) {
-		ERROR("cannot allocate memory for RSS parent queue");
-		return NULL;
-	}
-	ret = rxq_setup(priv->dev, parent, 0, 0, 0,
-			NULL, NULL, children_n, NULL);
-	if (ret) {
-		rte_free(parent);
-		return NULL;
-	}
-	parent->rss.queues_n = children_n;
-	if (queues) {
-		for (i = 0; i < children_n; ++i)
-			parent->rss.queues[i] = queues[i];
-	} else {
-		/* the default RSS ring case */
-		assert(priv->rxqs_n == children_n);
-		for (i = 0; i < priv->rxqs_n; ++i)
-			parent->rss.queues[i] = i;
-	}
-	LIST_INSERT_HEAD(&priv->parents, parent, next);
-	return parent;
-}
-
-/**
- * Clean up RX queue parent structure.
- *
- * @param parent
- *   RX queue parent structure.
- */
-void
-rxq_parent_cleanup(struct rxq *parent)
-{
-	LIST_REMOVE(parent, next);
-	rxq_cleanup(parent);
-	rte_free(parent);
-}
-
-/**
- * Clean up parent structures from the parent list.
- *
- * @param priv
- *   Pointer to private structure.
- */
-static void
-priv_parent_list_cleanup(struct priv *priv)
-{
-	while (!LIST_EMPTY(&priv->parents))
-		rxq_parent_cleanup(LIST_FIRST(&priv->parents));
-}
-
-/**
- * Ethernet device configuration.
- *
- * Prepare the driver for a given number of TX and RX queues.
- * Allocate parent RSS queue when several RX queues are requested.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-dev_configure(struct rte_eth_dev *dev)
-{
-	struct priv *priv = dev->data->dev_private;
-	unsigned int rxqs_n = dev->data->nb_rx_queues;
-	unsigned int txqs_n = dev->data->nb_tx_queues;
-	unsigned int tmp;
-
-	priv->rxqs = (void *)dev->data->rx_queues;
-	priv->txqs = (void *)dev->data->tx_queues;
-	if (txqs_n != priv->txqs_n) {
-		INFO("%p: TX queues number update: %u -> %u",
-		     (void *)dev, priv->txqs_n, txqs_n);
-		priv->txqs_n = txqs_n;
-	}
-	if (rxqs_n == priv->rxqs_n)
-		return 0;
-	if (!rte_is_power_of_2(rxqs_n) && !priv->isolated) {
-		unsigned n_active;
-
-		n_active = rte_align32pow2(rxqs_n + 1) >> 1;
-		WARN("%p: number of RX queues must be a power"
-			" of 2: %u queues among %u will be active",
-			(void *)dev, n_active, rxqs_n);
-	}
-
-	INFO("%p: RX queues number update: %u -> %u",
-	     (void *)dev, priv->rxqs_n, rxqs_n);
-	/* If RSS is enabled, disable it first. */
-	if (priv->rss) {
-		unsigned int i;
-
-		/* Only if there are no remaining child RX queues. */
-		for (i = 0; (i != priv->rxqs_n); ++i)
-			if ((*priv->rxqs)[i] != NULL)
-				return EINVAL;
-		priv_parent_list_cleanup(priv);
-		priv->rss = 0;
-		priv->rxqs_n = 0;
-	}
-	if (rxqs_n <= 1) {
-		/* Nothing else to do. */
-		priv->rxqs_n = rxqs_n;
-		return 0;
-	}
-	/* Allocate a new RSS parent queue if supported by hardware. */
-	if (!priv->hw_rss) {
-		ERROR("%p: only a single RX queue can be configured when"
-		      " hardware doesn't support RSS",
-		      (void *)dev);
-		return EINVAL;
-	}
-	/* Fail if hardware doesn't support that many RSS queues. */
-	if (rxqs_n >= priv->max_rss_tbl_sz) {
-		ERROR("%p: only %u RX queues can be configured for RSS",
-		      (void *)dev, priv->max_rss_tbl_sz);
-		return EINVAL;
-	}
-	priv->rss = 1;
-	tmp = priv->rxqs_n;
-	priv->rxqs_n = rxqs_n;
-	if (priv->isolated)
-		return 0;
-	if (priv_parent_create(priv, NULL, priv->rxqs_n))
-		return 0;
-	/* Failure, rollback. */
-	priv->rss = 0;
-	priv->rxqs_n = tmp;
-	return ENOMEM;
-}
-
 /**
  * DPDK callback for Ethernet device configuration.
  *
@@ -722,3490 +92,78 @@ dev_configure(struct rte_eth_dev *dev)
  *   Pointer to Ethernet device structure.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx4_dev_configure(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct rte_flow_error error;
 	int ret;
 
-	if (mlx4_is_secondary())
-		return -E_RTE_SECONDARY;
-	priv_lock(priv);
-	ret = dev_configure(dev);
-	assert(ret >= 0);
-	priv_unlock(priv);
-	return -ret;
-}
-
-static uint16_t mlx4_tx_burst(void *, struct rte_mbuf **, uint16_t);
-static uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);
-
-/**
- * Configure secondary process queues from a private data pointer (primary
- * or secondary) and update burst callbacks. Can take place only once.
- *
- * All queues must have been previously created by the primary process to
- * avoid undefined behavior.
- *
- * @param priv
- *   Private data pointer from either primary or secondary process.
- *
- * @return
- *   Private data pointer from secondary process, NULL in case of error.
- */
-static struct priv *
-mlx4_secondary_data_setup(struct priv *priv)
-{
-	unsigned int port_id = 0;
-	struct mlx4_secondary_data *sd;
-	void **tx_queues;
-	void **rx_queues;
-	unsigned int nb_tx_queues;
-	unsigned int nb_rx_queues;
-	unsigned int i;
-
-	/* priv must be valid at this point. */
-	assert(priv != NULL);
-	/* priv->dev must also be valid but may point to local memory from
-	 * another process, possibly with the same address and must not
-	 * be dereferenced yet. */
-	assert(priv->dev != NULL);
-	/* Determine port ID by finding out where priv comes from. */
-	while (1) {
-		sd = &mlx4_secondary_data[port_id];
-		rte_spinlock_lock(&sd->lock);
-		/* Primary process? */
-		if (sd->primary_priv == priv)
-			break;
-		/* Secondary process? */
-		if (sd->data.dev_private == priv)
-			break;
-		rte_spinlock_unlock(&sd->lock);
-		if (++port_id == RTE_DIM(mlx4_secondary_data))
-			port_id = 0;
-	}
-	/* Switch to secondary private structure. If private data has already
-	 * been updated by another thread, there is nothing else to do. */
-	priv = sd->data.dev_private;
-	if (priv->dev->data == &sd->data)
-		goto end;
-	/* Sanity checks. Secondary private structure is supposed to point
-	 * to local eth_dev, itself still pointing to the shared device data
-	 * structure allocated by the primary process. */
-	assert(sd->shared_dev_data != &sd->data);
-	assert(sd->data.nb_tx_queues == 0);
-	assert(sd->data.tx_queues == NULL);
-	assert(sd->data.nb_rx_queues == 0);
-	assert(sd->data.rx_queues == NULL);
-	assert(priv != sd->primary_priv);
-	assert(priv->dev->data == sd->shared_dev_data);
-	assert(priv->txqs_n == 0);
-	assert(priv->txqs == NULL);
-	assert(priv->rxqs_n == 0);
-	assert(priv->rxqs == NULL);
-	nb_tx_queues = sd->shared_dev_data->nb_tx_queues;
-	nb_rx_queues = sd->shared_dev_data->nb_rx_queues;
-	/* Allocate local storage for queues. */
-	tx_queues = rte_zmalloc("secondary ethdev->tx_queues",
-				sizeof(sd->data.tx_queues[0]) * nb_tx_queues,
-				RTE_CACHE_LINE_SIZE);
-	rx_queues = rte_zmalloc("secondary ethdev->rx_queues",
-				sizeof(sd->data.rx_queues[0]) * nb_rx_queues,
-				RTE_CACHE_LINE_SIZE);
-	if (tx_queues == NULL || rx_queues == NULL)
-		goto error;
-	/* Lock to prevent control operations during setup. */
-	priv_lock(priv);
-	/* TX queues. */
-	for (i = 0; i != nb_tx_queues; ++i) {
-		struct txq *primary_txq = (*sd->primary_priv->txqs)[i];
-		struct txq *txq;
-
-		if (primary_txq == NULL)
-			continue;
-		txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0,
-					primary_txq->socket);
-		if (txq != NULL) {
-			if (txq_setup(priv->dev,
-				      txq,
-				      primary_txq->elts_n * MLX4_PMD_SGE_WR_N,
-				      primary_txq->socket,
-				      NULL) == 0) {
-				txq->stats.idx = primary_txq->stats.idx;
-				tx_queues[i] = txq;
-				continue;
-			}
-			rte_free(txq);
-		}
-		while (i) {
-			txq = tx_queues[--i];
-			txq_cleanup(txq);
-			rte_free(txq);
-		}
-		goto error;
-	}
-	/* RX queues. */
-	for (i = 0; i != nb_rx_queues; ++i) {
-		struct rxq *primary_rxq = (*sd->primary_priv->rxqs)[i];
-
-		if (primary_rxq == NULL)
-			continue;
-		/* Not supported yet. */
-		rx_queues[i] = NULL;
-	}
-	/* Update everything. */
-	priv->txqs = (void *)tx_queues;
-	priv->txqs_n = nb_tx_queues;
-	priv->rxqs = (void *)rx_queues;
-	priv->rxqs_n = nb_rx_queues;
-	sd->data.rx_queues = rx_queues;
-	sd->data.tx_queues = tx_queues;
-	sd->data.nb_rx_queues = nb_rx_queues;
-	sd->data.nb_tx_queues = nb_tx_queues;
-	sd->data.dev_link = sd->shared_dev_data->dev_link;
-	sd->data.mtu = sd->shared_dev_data->mtu;
-	memcpy(sd->data.rx_queue_state, sd->shared_dev_data->rx_queue_state,
-	       sizeof(sd->data.rx_queue_state));
-	memcpy(sd->data.tx_queue_state, sd->shared_dev_data->tx_queue_state,
-	       sizeof(sd->data.tx_queue_state));
-	sd->data.dev_flags = sd->shared_dev_data->dev_flags;
-	/* Use local data from now on. */
-	rte_mb();
-	priv->dev->data = &sd->data;
-	rte_mb();
-	priv->dev->tx_pkt_burst = mlx4_tx_burst;
-	priv->dev->rx_pkt_burst = removed_rx_burst;
-	priv_unlock(priv);
-end:
-	/* More sanity checks. */
-	assert(priv->dev->tx_pkt_burst == mlx4_tx_burst);
-	assert(priv->dev->rx_pkt_burst == removed_rx_burst);
-	assert(priv->dev->data == &sd->data);
-	rte_spinlock_unlock(&sd->lock);
-	return priv;
-error:
-	priv_unlock(priv);
-	rte_free(tx_queues);
-	rte_free(rx_queues);
-	rte_spinlock_unlock(&sd->lock);
-	return NULL;
-}
-
-/* TX queues handling. */
-
-/**
- * Allocate TX queue elements.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-txq_alloc_elts(struct txq *txq, unsigned int elts_n)
-{
-	unsigned int i;
-	struct txq_elt (*elts)[elts_n] =
-		rte_calloc_socket("TXQ", 1, sizeof(*elts), 0, txq->socket);
-	linear_t (*elts_linear)[elts_n] =
-		rte_calloc_socket("TXQ", 1, sizeof(*elts_linear), 0,
-				  txq->socket);
-	struct ibv_mr *mr_linear = NULL;
-	int ret = 0;
-
-	if ((elts == NULL) || (elts_linear == NULL)) {
-		ERROR("%p: can't allocate packets array", (void *)txq);
-		ret = ENOMEM;
-		goto error;
-	}
-	mr_linear =
-		ibv_reg_mr(txq->priv->pd, elts_linear, sizeof(*elts_linear),
-			   IBV_ACCESS_LOCAL_WRITE);
-	if (mr_linear == NULL) {
-		ERROR("%p: unable to configure MR, ibv_reg_mr() failed",
-		      (void *)txq);
-		ret = EINVAL;
-		goto error;
-	}
-	for (i = 0; (i != elts_n); ++i) {
-		struct txq_elt *elt = &(*elts)[i];
-
-		elt->buf = NULL;
-	}
-	DEBUG("%p: allocated and configured %u WRs", (void *)txq, elts_n);
-	txq->elts_n = elts_n;
-	txq->elts = elts;
-	txq->elts_head = 0;
-	txq->elts_tail = 0;
-	txq->elts_comp = 0;
-	/* Request send completion every MLX4_PMD_TX_PER_COMP_REQ packets or
-	 * at least 4 times per ring. */
-	txq->elts_comp_cd_init =
-		((MLX4_PMD_TX_PER_COMP_REQ < (elts_n / 4)) ?
-		 MLX4_PMD_TX_PER_COMP_REQ : (elts_n / 4));
-	txq->elts_comp_cd = txq->elts_comp_cd_init;
-	txq->elts_linear = elts_linear;
-	txq->mr_linear = mr_linear;
-	assert(ret == 0);
-	return 0;
-error:
-	if (mr_linear != NULL)
-		claim_zero(ibv_dereg_mr(mr_linear));
-
-	rte_free(elts_linear);
-	rte_free(elts);
-
-	DEBUG("%p: failed, freed everything", (void *)txq);
-	assert(ret > 0);
-	return ret;
-}
-
-/**
- * Free TX queue elements.
- *
- * @param txq
- *   Pointer to TX queue structure.
- */
-static void
-txq_free_elts(struct txq *txq)
-{
-	unsigned int elts_n = txq->elts_n;
-	unsigned int elts_head = txq->elts_head;
-	unsigned int elts_tail = txq->elts_tail;
-	struct txq_elt (*elts)[elts_n] = txq->elts;
-	linear_t (*elts_linear)[elts_n] = txq->elts_linear;
-	struct ibv_mr *mr_linear = txq->mr_linear;
-
-	DEBUG("%p: freeing WRs", (void *)txq);
-	txq->elts_n = 0;
-	txq->elts_head = 0;
-	txq->elts_tail = 0;
-	txq->elts_comp = 0;
-	txq->elts_comp_cd = 0;
-	txq->elts_comp_cd_init = 0;
-	txq->elts = NULL;
-	txq->elts_linear = NULL;
-	txq->mr_linear = NULL;
-	if (mr_linear != NULL)
-		claim_zero(ibv_dereg_mr(mr_linear));
-
-	rte_free(elts_linear);
-	if (elts == NULL)
-		return;
-	while (elts_tail != elts_head) {
-		struct txq_elt *elt = &(*elts)[elts_tail];
-
-		assert(elt->buf != NULL);
-		rte_pktmbuf_free(elt->buf);
-#ifndef NDEBUG
-		/* Poisoning. */
-		memset(elt, 0x77, sizeof(*elt));
-#endif
-		if (++elts_tail == elts_n)
-			elts_tail = 0;
-	}
-	rte_free(elts);
-}
-
-
-/**
- * Clean up a TX queue.
- *
- * Destroy objects, free allocated memory and reset the structure for reuse.
- *
- * @param txq
- *   Pointer to TX queue structure.
- */
-static void
-txq_cleanup(struct txq *txq)
-{
-	struct ibv_exp_release_intf_params params;
-	size_t i;
-
-	DEBUG("cleaning up %p", (void *)txq);
-	txq_free_elts(txq);
-	if (txq->if_qp != NULL) {
-		assert(txq->priv != NULL);
-		assert(txq->priv->ctx != NULL);
-		assert(txq->qp != NULL);
-		params = (struct ibv_exp_release_intf_params){
-			.comp_mask = 0,
-		};
-		claim_zero(ibv_exp_release_intf(txq->priv->ctx,
-						txq->if_qp,
-						&params));
-	}
-	if (txq->if_cq != NULL) {
-		assert(txq->priv != NULL);
-		assert(txq->priv->ctx != NULL);
-		assert(txq->cq != NULL);
-		params = (struct ibv_exp_release_intf_params){
-			.comp_mask = 0,
-		};
-		claim_zero(ibv_exp_release_intf(txq->priv->ctx,
-						txq->if_cq,
-						&params));
-	}
-	if (txq->qp != NULL)
-		claim_zero(ibv_destroy_qp(txq->qp));
-	if (txq->cq != NULL)
-		claim_zero(ibv_destroy_cq(txq->cq));
-	if (txq->rd != NULL) {
-		struct ibv_exp_destroy_res_domain_attr attr = {
-			.comp_mask = 0,
-		};
-
-		assert(txq->priv != NULL);
-		assert(txq->priv->ctx != NULL);
-		claim_zero(ibv_exp_destroy_res_domain(txq->priv->ctx,
-						      txq->rd,
-						      &attr));
-	}
-	for (i = 0; (i != elemof(txq->mp2mr)); ++i) {
-		if (txq->mp2mr[i].mp == NULL)
-			break;
-		assert(txq->mp2mr[i].mr != NULL);
-		claim_zero(ibv_dereg_mr(txq->mp2mr[i].mr));
-	}
-	memset(txq, 0, sizeof(*txq));
-}
-
-/**
- * Manage TX completions.
- *
- * When sending a burst, mlx4_tx_burst() posts several WRs.
- * To improve performance, a completion event is only required once every
- * MLX4_PMD_TX_PER_COMP_REQ sends. Doing so discards completion information
- * for other WRs, but this information would not be used anyway.
- *
- * @param txq
- *   Pointer to TX queue structure.
- *
- * @return
- *   0 on success, -1 on failure.
- */
-static int
-txq_complete(struct txq *txq)
-{
-	unsigned int elts_comp = txq->elts_comp;
-	unsigned int elts_tail = txq->elts_tail;
-	const unsigned int elts_n = txq->elts_n;
-	int wcs_n;
-
-	if (unlikely(elts_comp == 0))
-		return 0;
-#ifdef DEBUG_SEND
-	DEBUG("%p: processing %u work requests completions",
-	      (void *)txq, elts_comp);
-#endif
-	wcs_n = txq->if_cq->poll_cnt(txq->cq, elts_comp);
-	if (unlikely(wcs_n == 0))
-		return 0;
-	if (unlikely(wcs_n < 0)) {
-		DEBUG("%p: ibv_poll_cq() failed (wcs_n=%d)",
-		      (void *)txq, wcs_n);
-		return -1;
-	}
-	elts_comp -= wcs_n;
-	assert(elts_comp <= txq->elts_comp);
-	/*
-	 * Assume WC status is successful as nothing can be done about it
-	 * anyway.
-	 */
-	elts_tail += wcs_n * txq->elts_comp_cd_init;
-	if (elts_tail >= elts_n)
-		elts_tail -= elts_n;
-	txq->elts_tail = elts_tail;
-	txq->elts_comp = elts_comp;
-	return 0;
-}
-
-struct mlx4_check_mempool_data {
-	int ret;
-	char *start;
-	char *end;
-};
-
-/* Called by mlx4_check_mempool() when iterating the memory chunks. */
-static void mlx4_check_mempool_cb(struct rte_mempool *mp,
-	void *opaque, struct rte_mempool_memhdr *memhdr,
-	unsigned mem_idx)
-{
-	struct mlx4_check_mempool_data *data = opaque;
-
-	(void)mp;
-	(void)mem_idx;
-
-	/* It already failed, skip the next chunks. */
-	if (data->ret != 0)
-		return;
-	/* It is the first chunk. */
-	if (data->start == NULL && data->end == NULL) {
-		data->start = memhdr->addr;
-		data->end = data->start + memhdr->len;
-		return;
-	}
-	if (data->end == memhdr->addr) {
-		data->end += memhdr->len;
-		return;
-	}
-	if (data->start == (char *)memhdr->addr + memhdr->len) {
-		data->start -= memhdr->len;
-		return;
-	}
-	/* Error, mempool is not virtually contigous. */
-	data->ret = -1;
-}
-
-/**
- * Check if a mempool can be used: it must be virtually contiguous.
- *
- * @param[in] mp
- *   Pointer to memory pool.
- * @param[out] start
- *   Pointer to the start address of the mempool virtual memory area
- * @param[out] end
- *   Pointer to the end address of the mempool virtual memory area
- *
- * @return
- *   0 on success (mempool is virtually contiguous), -1 on error.
- */
-static int mlx4_check_mempool(struct rte_mempool *mp, uintptr_t *start,
-	uintptr_t *end)
-{
-	struct mlx4_check_mempool_data data;
-
-	memset(&data, 0, sizeof(data));
-	rte_mempool_mem_iter(mp, mlx4_check_mempool_cb, &data);
-	*start = (uintptr_t)data.start;
-	*end = (uintptr_t)data.end;
-
-	return data.ret;
-}
-
-/* For best performance, this function should not be inlined. */
-static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, struct rte_mempool *)
-	__rte_noinline;
-
-/**
- * Register mempool as a memory region.
- *
- * @param pd
- *   Pointer to protection domain.
- * @param mp
- *   Pointer to memory pool.
- *
- * @return
- *   Memory region pointer, NULL in case of error.
- */
-static struct ibv_mr *
-mlx4_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
-{
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	uintptr_t start;
-	uintptr_t end;
-	unsigned int i;
-
-	if (mlx4_check_mempool(mp, &start, &end) != 0) {
-		ERROR("mempool %p: not virtually contiguous",
-			(void *)mp);
-		return NULL;
-	}
-
-	DEBUG("mempool %p area start=%p end=%p size=%zu",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	/* Round start and end to page boundary if found in memory segments. */
-	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
-		uintptr_t addr = (uintptr_t)ms[i].addr;
-		size_t len = ms[i].len;
-		unsigned int align = ms[i].hugepage_sz;
-
-		if ((start > addr) && (start < addr + len))
-			start = RTE_ALIGN_FLOOR(start, align);
-		if ((end > addr) && (end < addr + len))
-			end = RTE_ALIGN_CEIL(end, align);
-	}
-	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	return ibv_reg_mr(pd,
-			  (void *)start,
-			  end - start,
-			  IBV_ACCESS_LOCAL_WRITE);
-}
-
-/**
- * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
- * the cloned mbuf is allocated is returned instead.
- *
- * @param buf
- *   Pointer to mbuf.
- *
- * @return
- *   Memory pool where data is located for given mbuf.
- */
-static struct rte_mempool *
-txq_mb2mp(struct rte_mbuf *buf)
-{
-	if (unlikely(RTE_MBUF_INDIRECT(buf)))
-		return rte_mbuf_from_indirect(buf)->pool;
-	return buf->pool;
-}
-
-/**
- * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[].
- * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
- * remove an entry first.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param[in] mp
- *   Memory Pool for which a Memory Region lkey must be returned.
- *
- * @return
- *   mr->lkey on success, (uint32_t)-1 on failure.
- */
-static uint32_t
-txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
-{
-	unsigned int i;
-	struct ibv_mr *mr;
-
-	for (i = 0; (i != elemof(txq->mp2mr)); ++i) {
-		if (unlikely(txq->mp2mr[i].mp == NULL)) {
-			/* Unknown MP, add a new MR for it. */
-			break;
-		}
-		if (txq->mp2mr[i].mp == mp) {
-			assert(txq->mp2mr[i].lkey != (uint32_t)-1);
-			assert(txq->mp2mr[i].mr->lkey == txq->mp2mr[i].lkey);
-			return txq->mp2mr[i].lkey;
-		}
-	}
-	/* Add a new entry, register MR first. */
-	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
-	      (void *)txq, mp->name, (void *)mp);
-	mr = mlx4_mp2mr(txq->priv->pd, mp);
-	if (unlikely(mr == NULL)) {
-		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
-		      (void *)txq);
-		return (uint32_t)-1;
-	}
-	if (unlikely(i == elemof(txq->mp2mr))) {
-		/* Table is full, remove oldest entry. */
-		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
-		      (void *)txq);
-		--i;
-		claim_zero(ibv_dereg_mr(txq->mp2mr[0].mr));
-		memmove(&txq->mp2mr[0], &txq->mp2mr[1],
-			(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
-	}
-	/* Store the new entry. */
-	txq->mp2mr[i].mp = mp;
-	txq->mp2mr[i].mr = mr;
-	txq->mp2mr[i].lkey = mr->lkey;
-	DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
-	      (void *)txq, mp->name, (void *)mp, txq->mp2mr[i].lkey);
-	return txq->mp2mr[i].lkey;
-}
-
-struct txq_mp2mr_mbuf_check_data {
-	int ret;
-};
-
-/**
- * Callback function for rte_mempool_obj_iter() to check whether a given
- * mempool object looks like a mbuf.
- *
- * @param[in] mp
- *   The mempool pointer
- * @param[in] arg
- *   Context data (struct txq_mp2mr_mbuf_check_data). Contains the
- *   return value.
- * @param[in] obj
- *   Object address.
- * @param index
- *   Object index, unused.
- */
-static void
-txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
-	uint32_t index __rte_unused)
-{
-	struct txq_mp2mr_mbuf_check_data *data = arg;
-	struct rte_mbuf *buf = obj;
-
-	/* Check whether mbuf structure fits element size and whether mempool
-	 * pointer is valid. */
-	if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
-		data->ret = -1;
-}
-
-/**
- * Iterator function for rte_mempool_walk() to register existing mempools and
- * fill the MP to MR cache of a TX queue.
- *
- * @param[in] mp
- *   Memory Pool to register.
- * @param *arg
- *   Pointer to TX queue structure.
- */
-static void
-txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
-{
-	struct txq *txq = arg;
-	struct txq_mp2mr_mbuf_check_data data = {
-		.ret = 0,
-	};
-
-	/* Register mempool only if the first element looks like a mbuf. */
-	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
-			data.ret == -1)
-		return;
-	txq_mp2mr(txq, mp);
-}
-
-#if MLX4_PMD_SGE_WR_N > 1
-
-/**
- * Copy scattered mbuf contents to a single linear buffer.
- *
- * @param[out] linear
- *   Linear output buffer.
- * @param[in] buf
- *   Scattered input buffer.
- *
- * @return
- *   Number of bytes copied to the output buffer or 0 if not large enough.
- */
-static unsigned int
-linearize_mbuf(linear_t *linear, struct rte_mbuf *buf)
-{
-	unsigned int size = 0;
-	unsigned int offset;
-
-	do {
-		unsigned int len = DATA_LEN(buf);
-
-		offset = size;
-		size += len;
-		if (unlikely(size > sizeof(*linear)))
-			return 0;
-		memcpy(&(*linear)[offset],
-		       rte_pktmbuf_mtod(buf, uint8_t *),
-		       len);
-		buf = NEXT(buf);
-	} while (buf != NULL);
-	return size;
-}
-
-/**
- * Handle scattered buffers for mlx4_tx_burst().
- *
- * @param txq
- *   TX queue structure.
- * @param segs
- *   Number of segments in buf.
- * @param elt
- *   TX queue element to fill.
- * @param[in] buf
- *   Buffer to process.
- * @param elts_head
- *   Index of the linear buffer to use if necessary (normally txq->elts_head).
- * @param[out] sges
- *   Array filled with SGEs on success.
- *
- * @return
- *   A structure containing the processed packet size in bytes and the
- *   number of SGEs. Both fields are set to (unsigned int)-1 in case of
- *   failure.
- */
-static struct tx_burst_sg_ret {
-	unsigned int length;
-	unsigned int num;
-}
-tx_burst_sg(struct txq *txq, unsigned int segs, struct txq_elt *elt,
-	    struct rte_mbuf *buf, unsigned int elts_head,
-	    struct ibv_sge (*sges)[MLX4_PMD_SGE_WR_N])
-{
-	unsigned int sent_size = 0;
-	unsigned int j;
-	int linearize = 0;
-
-	/* When there are too many segments, extra segments are
-	 * linearized in the last SGE. */
-	if (unlikely(segs > elemof(*sges))) {
-		segs = (elemof(*sges) - 1);
-		linearize = 1;
-	}
-	/* Update element. */
-	elt->buf = buf;
-	/* Register segments as SGEs. */
-	for (j = 0; (j != segs); ++j) {
-		struct ibv_sge *sge = &(*sges)[j];
-		uint32_t lkey;
-
-		/* Retrieve Memory Region key for this memory pool. */
-		lkey = txq_mp2mr(txq, txq_mb2mp(buf));
-		if (unlikely(lkey == (uint32_t)-1)) {
-			/* MR does not exist. */
-			DEBUG("%p: unable to get MP <-> MR association",
-			      (void *)txq);
-			/* Clean up TX element. */
-			elt->buf = NULL;
-			goto stop;
-		}
-		/* Update SGE. */
-		sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
-		if (txq->priv->vf)
-			rte_prefetch0((volatile void *)
-				      (uintptr_t)sge->addr);
-		sge->length = DATA_LEN(buf);
-		sge->lkey = lkey;
-		sent_size += sge->length;
-		buf = NEXT(buf);
-	}
-	/* If buf is not NULL here and is not going to be linearized,
-	 * nb_segs is not valid. */
-	assert(j == segs);
-	assert((buf == NULL) || (linearize));
-	/* Linearize extra segments. */
-	if (linearize) {
-		struct ibv_sge *sge = &(*sges)[segs];
-		linear_t *linear = &(*txq->elts_linear)[elts_head];
-		unsigned int size = linearize_mbuf(linear, buf);
-
-		assert(segs == (elemof(*sges) - 1));
-		if (size == 0) {
-			/* Invalid packet. */
-			DEBUG("%p: packet too large to be linearized.",
-			      (void *)txq);
-			/* Clean up TX element. */
-			elt->buf = NULL;
-			goto stop;
-		}
-		/* If MLX4_PMD_SGE_WR_N is 1, free mbuf immediately. */
-		if (elemof(*sges) == 1) {
-			do {
-				struct rte_mbuf *next = NEXT(buf);
-
-				rte_pktmbuf_free_seg(buf);
-				buf = next;
-			} while (buf != NULL);
-			elt->buf = NULL;
-		}
-		/* Update SGE. */
-		sge->addr = (uintptr_t)&(*linear)[0];
-		sge->length = size;
-		sge->lkey = txq->mr_linear->lkey;
-		sent_size += size;
-		/* Include last segment. */
-		segs++;
-	}
-	return (struct tx_burst_sg_ret){
-		.length = sent_size,
-		.num = segs,
-	};
-stop:
-	return (struct tx_burst_sg_ret){
-		.length = -1,
-		.num = -1,
-	};
-}
-
-#endif /* MLX4_PMD_SGE_WR_N > 1 */
-
-/**
- * DPDK callback for TX.
- *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
- * @param[in] pkts
- *   Packets to transmit.
- * @param pkts_n
- *   Number of packets in array.
- *
- * @return
- *   Number of packets successfully transmitted (<= pkts_n).
- */
-static uint16_t
-mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-{
-	struct txq *txq = (struct txq *)dpdk_txq;
-	unsigned int elts_head = txq->elts_head;
-	const unsigned int elts_n = txq->elts_n;
-	unsigned int elts_comp_cd = txq->elts_comp_cd;
-	unsigned int elts_comp = 0;
-	unsigned int i;
-	unsigned int max;
-	int err;
-
-	assert(elts_comp_cd != 0);
-	txq_complete(txq);
-	max = (elts_n - (elts_head - txq->elts_tail));
-	if (max > elts_n)
-		max -= elts_n;
-	assert(max >= 1);
-	assert(max <= elts_n);
-	/* Always leave one free entry in the ring. */
-	--max;
-	if (max == 0)
-		return 0;
-	if (max > pkts_n)
-		max = pkts_n;
-	for (i = 0; (i != max); ++i) {
-		struct rte_mbuf *buf = pkts[i];
-		unsigned int elts_head_next =
-			(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
-		struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
-		struct txq_elt *elt = &(*txq->elts)[elts_head];
-		unsigned int segs = NB_SEGS(buf);
-#ifdef MLX4_PMD_SOFT_COUNTERS
-		unsigned int sent_size = 0;
-#endif
-		uint32_t send_flags = 0;
-
-		/* Clean up old buffer. */
-		if (likely(elt->buf != NULL)) {
-			struct rte_mbuf *tmp = elt->buf;
-
-#ifndef NDEBUG
-			/* Poisoning. */
-			memset(elt, 0x66, sizeof(*elt));
-#endif
-			/* Faster than rte_pktmbuf_free(). */
-			do {
-				struct rte_mbuf *next = NEXT(tmp);
-
-				rte_pktmbuf_free_seg(tmp);
-				tmp = next;
-			} while (tmp != NULL);
-		}
-		/* Request TX completion. */
-		if (unlikely(--elts_comp_cd == 0)) {
-			elts_comp_cd = txq->elts_comp_cd_init;
-			++elts_comp;
-			send_flags |= IBV_EXP_QP_BURST_SIGNALED;
-		}
-		/* Should we enable HW CKSUM offload */
-		if (buf->ol_flags &
-		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-			send_flags |= IBV_EXP_QP_BURST_IP_CSUM;
-			/* HW does not support checksum offloads at arbitrary
-			 * offsets but automatically recognizes the packet
-			 * type. For inner L3/L4 checksums, only VXLAN (UDP)
-			 * tunnels are currently supported. */
-			if (RTE_ETH_IS_TUNNEL_PKT(buf->packet_type))
-				send_flags |= IBV_EXP_QP_BURST_TUNNEL;
-		}
-		if (likely(segs == 1)) {
-			uintptr_t addr;
-			uint32_t length;
-			uint32_t lkey;
-
-			/* Retrieve buffer information. */
-			addr = rte_pktmbuf_mtod(buf, uintptr_t);
-			length = DATA_LEN(buf);
-			/* Retrieve Memory Region key for this memory pool. */
-			lkey = txq_mp2mr(txq, txq_mb2mp(buf));
-			if (unlikely(lkey == (uint32_t)-1)) {
-				/* MR does not exist. */
-				DEBUG("%p: unable to get MP <-> MR"
-				      " association", (void *)txq);
-				/* Clean up TX element. */
-				elt->buf = NULL;
-				goto stop;
-			}
-			/* Update element. */
-			elt->buf = buf;
-			if (txq->priv->vf)
-				rte_prefetch0((volatile void *)
-					      (uintptr_t)addr);
-			RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
-			/* Put packet into send queue. */
-#if MLX4_PMD_MAX_INLINE > 0
-			if (length <= txq->max_inline)
-				err = txq->if_qp->send_pending_inline
-					(txq->qp,
-					 (void *)addr,
-					 length,
-					 send_flags);
-			else
-#endif
-				err = txq->if_qp->send_pending
-					(txq->qp,
-					 addr,
-					 length,
-					 lkey,
-					 send_flags);
-			if (unlikely(err))
-				goto stop;
-#ifdef MLX4_PMD_SOFT_COUNTERS
-			sent_size += length;
-#endif
-		} else {
-#if MLX4_PMD_SGE_WR_N > 1
-			struct ibv_sge sges[MLX4_PMD_SGE_WR_N];
-			struct tx_burst_sg_ret ret;
-
-			ret = tx_burst_sg(txq, segs, elt, buf, elts_head,
-					  &sges);
-			if (ret.length == (unsigned int)-1)
-				goto stop;
-			RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
-			/* Put SG list into send queue. */
-			err = txq->if_qp->send_pending_sg_list
-				(txq->qp,
-				 sges,
-				 ret.num,
-				 send_flags);
-			if (unlikely(err))
-				goto stop;
-#ifdef MLX4_PMD_SOFT_COUNTERS
-			sent_size += ret.length;
-#endif
-#else /* MLX4_PMD_SGE_WR_N > 1 */
-			DEBUG("%p: TX scattered buffers support not"
-			      " compiled in", (void *)txq);
-			goto stop;
-#endif /* MLX4_PMD_SGE_WR_N > 1 */
-		}
-		elts_head = elts_head_next;
-#ifdef MLX4_PMD_SOFT_COUNTERS
-		/* Increment sent bytes counter. */
-		txq->stats.obytes += sent_size;
-#endif
-	}
-stop:
-	/* Take a shortcut if nothing must be sent. */
-	if (unlikely(i == 0))
-		return 0;
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	/* Increment sent packets counter. */
-	txq->stats.opackets += i;
-#endif
-	/* Ring QP doorbell. */
-	err = txq->if_qp->send_flush(txq->qp);
-	if (unlikely(err)) {
-		/* A nonzero value is not supposed to be returned.
-		 * Nothing can be done about it. */
-		DEBUG("%p: send_flush() failed with error %d",
-		      (void *)txq, err);
-	}
-	txq->elts_head = elts_head;
-	txq->elts_comp += elts_comp;
-	txq->elts_comp_cd = elts_comp_cd;
-	return i;
-}
-
-/**
- * DPDK callback for TX in secondary processes.
- *
- * This function configures all queues from primary process information
- * if necessary before reverting to the normal TX burst callback.
- *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
- * @param[in] pkts
- *   Packets to transmit.
- * @param pkts_n
- *   Number of packets in array.
- *
- * @return
- *   Number of packets successfully transmitted (<= pkts_n).
- */
-static uint16_t
-mlx4_tx_burst_secondary_setup(void *dpdk_txq, struct rte_mbuf **pkts,
-			      uint16_t pkts_n)
-{
-	struct txq *txq = dpdk_txq;
-	struct priv *priv = mlx4_secondary_data_setup(txq->priv);
-	struct priv *primary_priv;
-	unsigned int index;
-
-	if (priv == NULL)
-		return 0;
-	primary_priv =
-		mlx4_secondary_data[priv->dev->data->port_id].primary_priv;
-	/* Look for queue index in both private structures. */
-	for (index = 0; index != priv->txqs_n; ++index)
-		if (((*primary_priv->txqs)[index] == txq) ||
-		    ((*priv->txqs)[index] == txq))
-			break;
-	if (index == priv->txqs_n)
-		return 0;
-	txq = (*priv->txqs)[index];
-	return priv->dev->tx_pkt_burst(txq, pkts, pkts_n);
-}
-
-/**
- * Configure a TX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param txq
- *   Pointer to TX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
-	  unsigned int socket, const struct rte_eth_txconf *conf)
-{
-	struct priv *priv = mlx4_get_priv(dev);
-	struct txq tmpl = {
-		.priv = priv,
-		.socket = socket
-	};
-	union {
-		struct ibv_exp_query_intf_params params;
-		struct ibv_exp_qp_init_attr init;
-		struct ibv_exp_res_domain_init_attr rd;
-		struct ibv_exp_cq_init_attr cq;
-		struct ibv_exp_qp_attr mod;
-	} attr;
-	enum ibv_exp_query_intf_status status;
-	int ret = 0;
-
-	(void)conf; /* Thresholds configuration (ignored). */
-	if (priv == NULL)
-		return EINVAL;
-	if ((desc == 0) || (desc % MLX4_PMD_SGE_WR_N)) {
-		ERROR("%p: invalid number of TX descriptors (must be a"
-		      " multiple of %d)", (void *)dev, MLX4_PMD_SGE_WR_N);
-		return EINVAL;
-	}
-	desc /= MLX4_PMD_SGE_WR_N;
-	/* MRs will be registered in mp2mr[] later. */
-	attr.rd = (struct ibv_exp_res_domain_init_attr){
-		.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
-			      IBV_EXP_RES_DOMAIN_MSG_MODEL),
-		.thread_model = IBV_EXP_THREAD_SINGLE,
-		.msg_model = IBV_EXP_MSG_HIGH_BW,
-	};
-	tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
-	if (tmpl.rd == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: RD creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	attr.cq = (struct ibv_exp_cq_init_attr){
-		.comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
-		.res_domain = tmpl.rd,
-	};
-	tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);
-	if (tmpl.cq == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: CQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	DEBUG("priv->device_attr.max_qp_wr is %d",
-	      priv->device_attr.max_qp_wr);
-	DEBUG("priv->device_attr.max_sge is %d",
-	      priv->device_attr.max_sge);
-	attr.init = (struct ibv_exp_qp_init_attr){
-		/* CQ to be associated with the send queue. */
-		.send_cq = tmpl.cq,
-		/* CQ to be associated with the receive queue. */
-		.recv_cq = tmpl.cq,
-		.cap = {
-			/* Max number of outstanding WRs. */
-			.max_send_wr = ((priv->device_attr.max_qp_wr < desc) ?
-					priv->device_attr.max_qp_wr :
-					desc),
-			/* Max number of scatter/gather elements in a WR. */
-			.max_send_sge = ((priv->device_attr.max_sge <
-					  MLX4_PMD_SGE_WR_N) ?
-					 priv->device_attr.max_sge :
-					 MLX4_PMD_SGE_WR_N),
-#if MLX4_PMD_MAX_INLINE > 0
-			.max_inline_data = MLX4_PMD_MAX_INLINE,
-#endif
-		},
-		.qp_type = IBV_QPT_RAW_PACKET,
-		/* Do *NOT* enable this, completions events are managed per
-		 * TX burst. */
-		.sq_sig_all = 0,
-		.pd = priv->pd,
-		.res_domain = tmpl.rd,
-		.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
-			      IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
-	};
-	tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
-	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-#if MLX4_PMD_MAX_INLINE > 0
-	/* ibv_create_qp() updates this value. */
-	tmpl.max_inline = attr.init.cap.max_inline_data;
-#endif
-	attr.mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod,
-				(IBV_EXP_QP_STATE | IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = txq_alloc_elts(&tmpl, desc);
-	if (ret) {
-		ERROR("%p: TXQ allocation failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	attr.mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	attr.mod.qp_state = IBV_QPS_RTS;
-	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_CQ,
-		.obj = tmpl.cq,
-	};
-	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_cq == NULL) {
-		ERROR("%p: CQ interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = tmpl.qp,
-#ifdef HAVE_EXP_QP_BURST_CREATE_DISABLE_ETH_LOOPBACK
-		/* MC loopback must be disabled when not using a VF. */
-		.family_flags =
-			(!priv->vf ?
-			 IBV_EXP_QP_BURST_CREATE_DISABLE_ETH_LOOPBACK :
-			 0),
-#endif
-	};
-	tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_qp == NULL) {
-		ERROR("%p: QP interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
-	/* Clean up txq in case we're reinitializing it. */
-	DEBUG("%p: cleaning-up old txq just in case", (void *)txq);
-	txq_cleanup(txq);
-	*txq = tmpl;
-	DEBUG("%p: txq updated with %p", (void *)txq, (void *)&tmpl);
-	/* Pre-register known mempools. */
-	rte_mempool_walk(txq_mp2mr_iter, txq);
-	assert(ret == 0);
-	return 0;
-error:
-	txq_cleanup(&tmpl);
-	assert(ret > 0);
-	return ret;
-}
-
-/**
- * DPDK callback to configure a TX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param idx
- *   TX queue index.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
-		    unsigned int socket, const struct rte_eth_txconf *conf)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct txq *txq = (*priv->txqs)[idx];
-	int ret;
-
-	if (mlx4_is_secondary())
-		return -E_RTE_SECONDARY;
-	priv_lock(priv);
-	DEBUG("%p: configuring queue %u for %u descriptors",
-	      (void *)dev, idx, desc);
-	if (idx >= priv->txqs_n) {
-		ERROR("%p: queue index out of range (%u >= %u)",
-		      (void *)dev, idx, priv->txqs_n);
-		priv_unlock(priv);
-		return -EOVERFLOW;
-	}
-	if (txq != NULL) {
-		DEBUG("%p: reusing already allocated queue index %u (%p)",
-		      (void *)dev, idx, (void *)txq);
-		if (priv->started) {
-			priv_unlock(priv);
-			return -EEXIST;
-		}
-		(*priv->txqs)[idx] = NULL;
-		txq_cleanup(txq);
-	} else {
-		txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0, socket);
-		if (txq == NULL) {
-			ERROR("%p: unable to allocate queue index %u",
-			      (void *)dev, idx);
-			priv_unlock(priv);
-			return -ENOMEM;
-		}
-	}
-	ret = txq_setup(dev, txq, desc, socket, conf);
-	if (ret)
-		rte_free(txq);
-	else {
-		txq->stats.idx = idx;
-		DEBUG("%p: adding TX queue %p to list",
-		      (void *)dev, (void *)txq);
-		(*priv->txqs)[idx] = txq;
-		/* Update send callback. */
-		dev->tx_pkt_burst = mlx4_tx_burst;
-	}
-	priv_unlock(priv);
-	return -ret;
-}
-
-/**
- * DPDK callback to release a TX queue.
- *
- * @param dpdk_txq
- *   Generic TX queue pointer.
- */
-static void
-mlx4_tx_queue_release(void *dpdk_txq)
-{
-	struct txq *txq = (struct txq *)dpdk_txq;
-	struct priv *priv;
-	unsigned int i;
-
-	if (mlx4_is_secondary())
-		return;
-	if (txq == NULL)
-		return;
-	priv = txq->priv;
-	priv_lock(priv);
-	for (i = 0; (i != priv->txqs_n); ++i)
-		if ((*priv->txqs)[i] == txq) {
-			DEBUG("%p: removing TX queue %p from list",
-			      (void *)priv->dev, (void *)txq);
-			(*priv->txqs)[i] = NULL;
-			break;
-		}
-	txq_cleanup(txq);
-	rte_free(txq);
-	priv_unlock(priv);
-}
-
-/* RX queues handling. */
-
-/**
- * Allocate RX queue elements with scattered packets support.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
- * @param[in] pool
- *   If not NULL, fetch buffers from this array instead of allocating them
- *   with rte_pktmbuf_alloc().
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
-		  struct rte_mbuf **pool)
-{
-	unsigned int i;
-	struct rxq_elt_sp (*elts)[elts_n] =
-		rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
-				  rxq->socket);
-	int ret = 0;
-
-	if (elts == NULL) {
-		ERROR("%p: can't allocate packets array", (void *)rxq);
-		ret = ENOMEM;
-		goto error;
-	}
-	/* For each WR (packet). */
-	for (i = 0; (i != elts_n); ++i) {
-		unsigned int j;
-		struct rxq_elt_sp *elt = &(*elts)[i];
-		struct ibv_recv_wr *wr = &elt->wr;
-		struct ibv_sge (*sges)[(elemof(elt->sges))] = &elt->sges;
-
-		/* These two arrays must have the same size. */
-		assert(elemof(elt->sges) == elemof(elt->bufs));
-		/* Configure WR. */
-		wr->wr_id = i;
-		wr->next = &(*elts)[(i + 1)].wr;
-		wr->sg_list = &(*sges)[0];
-		wr->num_sge = elemof(*sges);
-		/* For each SGE (segment). */
-		for (j = 0; (j != elemof(elt->bufs)); ++j) {
-			struct ibv_sge *sge = &(*sges)[j];
-			struct rte_mbuf *buf;
-
-			if (pool != NULL) {
-				buf = *(pool++);
-				assert(buf != NULL);
-				rte_pktmbuf_reset(buf);
-			} else
-				buf = rte_pktmbuf_alloc(rxq->mp);
-			if (buf == NULL) {
-				assert(pool == NULL);
-				ERROR("%p: empty mbuf pool", (void *)rxq);
-				ret = ENOMEM;
-				goto error;
-			}
-			elt->bufs[j] = buf;
-			/* Headroom is reserved by rte_pktmbuf_alloc(). */
-			assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
-			/* Buffer is supposed to be empty. */
-			assert(rte_pktmbuf_data_len(buf) == 0);
-			assert(rte_pktmbuf_pkt_len(buf) == 0);
-			/* sge->addr must be able to store a pointer. */
-			assert(sizeof(sge->addr) >= sizeof(uintptr_t));
-			if (j == 0) {
-				/* The first SGE keeps its headroom. */
-				sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
-				sge->length = (buf->buf_len -
-					       RTE_PKTMBUF_HEADROOM);
-			} else {
-				/* Subsequent SGEs lose theirs. */
-				assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
-				SET_DATA_OFF(buf, 0);
-				sge->addr = (uintptr_t)buf->buf_addr;
-				sge->length = buf->buf_len;
-			}
-			sge->lkey = rxq->mr->lkey;
-			/* Redundant check for tailroom. */
-			assert(sge->length == rte_pktmbuf_tailroom(buf));
-		}
-	}
-	/* The last WR pointer must be NULL. */
-	(*elts)[(i - 1)].wr.next = NULL;
-	DEBUG("%p: allocated and configured %u WRs (%zu segments)",
-	      (void *)rxq, elts_n, (elts_n * elemof((*elts)[0].sges)));
-	rxq->elts_n = elts_n;
-	rxq->elts_head = 0;
-	rxq->elts.sp = elts;
-	assert(ret == 0);
-	return 0;
-error:
-	if (elts != NULL) {
-		assert(pool == NULL);
-		for (i = 0; (i != elemof(*elts)); ++i) {
-			unsigned int j;
-			struct rxq_elt_sp *elt = &(*elts)[i];
-
-			for (j = 0; (j != elemof(elt->bufs)); ++j) {
-				struct rte_mbuf *buf = elt->bufs[j];
-
-				if (buf != NULL)
-					rte_pktmbuf_free_seg(buf);
-			}
-		}
-		rte_free(elts);
-	}
-	DEBUG("%p: failed, freed everything", (void *)rxq);
-	assert(ret > 0);
-	return ret;
-}
-
-/**
- * Free RX queue elements with scattered packets support.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- */
-static void
-rxq_free_elts_sp(struct rxq *rxq)
-{
-	unsigned int i;
-	unsigned int elts_n = rxq->elts_n;
-	struct rxq_elt_sp (*elts)[elts_n] = rxq->elts.sp;
-
-	DEBUG("%p: freeing WRs", (void *)rxq);
-	rxq->elts_n = 0;
-	rxq->elts.sp = NULL;
-	if (elts == NULL)
-		return;
-	for (i = 0; (i != elemof(*elts)); ++i) {
-		unsigned int j;
-		struct rxq_elt_sp *elt = &(*elts)[i];
-
-		for (j = 0; (j != elemof(elt->bufs)); ++j) {
-			struct rte_mbuf *buf = elt->bufs[j];
-
-			if (buf != NULL)
-				rte_pktmbuf_free_seg(buf);
-		}
-	}
-	rte_free(elts);
-}
-
-/**
- * Allocate RX queue elements.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
- * @param[in] pool
- *   If not NULL, fetch buffers from this array instead of allocating them
- *   with rte_pktmbuf_alloc().
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
-{
-	unsigned int i;
-	struct rxq_elt (*elts)[elts_n] =
-		rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
-				  rxq->socket);
-	int ret = 0;
-
-	if (elts == NULL) {
-		ERROR("%p: can't allocate packets array", (void *)rxq);
-		ret = ENOMEM;
-		goto error;
-	}
-	/* For each WR (packet). */
-	for (i = 0; (i != elts_n); ++i) {
-		struct rxq_elt *elt = &(*elts)[i];
-		struct ibv_recv_wr *wr = &elt->wr;
-		struct ibv_sge *sge = &(*elts)[i].sge;
-		struct rte_mbuf *buf;
-
-		if (pool != NULL) {
-			buf = *(pool++);
-			assert(buf != NULL);
-			rte_pktmbuf_reset(buf);
-		} else
-			buf = rte_pktmbuf_alloc(rxq->mp);
-		if (buf == NULL) {
-			assert(pool == NULL);
-			ERROR("%p: empty mbuf pool", (void *)rxq);
-			ret = ENOMEM;
-			goto error;
-		}
-		/* Configure WR. Work request ID contains its own index in
-		 * the elts array and the offset between SGE buffer header and
-		 * its data. */
-		WR_ID(wr->wr_id).id = i;
-		WR_ID(wr->wr_id).offset =
-			(((uintptr_t)buf->buf_addr + RTE_PKTMBUF_HEADROOM) -
-			 (uintptr_t)buf);
-		wr->next = &(*elts)[(i + 1)].wr;
-		wr->sg_list = sge;
-		wr->num_sge = 1;
-		/* Headroom is reserved by rte_pktmbuf_alloc(). */
-		assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
-		/* Buffer is supposed to be empty. */
-		assert(rte_pktmbuf_data_len(buf) == 0);
-		assert(rte_pktmbuf_pkt_len(buf) == 0);
-		/* sge->addr must be able to store a pointer. */
-		assert(sizeof(sge->addr) >= sizeof(uintptr_t));
-		/* SGE keeps its headroom. */
-		sge->addr = (uintptr_t)
-			((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
-		sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
-		sge->lkey = rxq->mr->lkey;
-		/* Redundant check for tailroom. */
-		assert(sge->length == rte_pktmbuf_tailroom(buf));
-		/* Make sure elts index and SGE mbuf pointer can be deduced
-		 * from WR ID. */
-		if ((WR_ID(wr->wr_id).id != i) ||
-		    ((void *)((uintptr_t)sge->addr -
-			WR_ID(wr->wr_id).offset) != buf)) {
-			ERROR("%p: cannot store index and offset in WR ID",
-			      (void *)rxq);
-			sge->addr = 0;
-			rte_pktmbuf_free(buf);
-			ret = EOVERFLOW;
-			goto error;
-		}
-	}
-	/* The last WR pointer must be NULL. */
-	(*elts)[(i - 1)].wr.next = NULL;
-	DEBUG("%p: allocated and configured %u single-segment WRs",
-	      (void *)rxq, elts_n);
-	rxq->elts_n = elts_n;
-	rxq->elts_head = 0;
-	rxq->elts.no_sp = elts;
-	assert(ret == 0);
-	return 0;
-error:
-	if (elts != NULL) {
-		assert(pool == NULL);
-		for (i = 0; (i != elemof(*elts)); ++i) {
-			struct rxq_elt *elt = &(*elts)[i];
-			struct rte_mbuf *buf;
-
-			if (elt->sge.addr == 0)
-				continue;
-			assert(WR_ID(elt->wr.wr_id).id == i);
-			buf = (void *)((uintptr_t)elt->sge.addr -
-				WR_ID(elt->wr.wr_id).offset);
-			rte_pktmbuf_free_seg(buf);
-		}
-		rte_free(elts);
-	}
-	DEBUG("%p: failed, freed everything", (void *)rxq);
-	assert(ret > 0);
-	return ret;
-}
-
-/**
- * Free RX queue elements.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- */
-static void
-rxq_free_elts(struct rxq *rxq)
-{
-	unsigned int i;
-	unsigned int elts_n = rxq->elts_n;
-	struct rxq_elt (*elts)[elts_n] = rxq->elts.no_sp;
-
-	DEBUG("%p: freeing WRs", (void *)rxq);
-	rxq->elts_n = 0;
-	rxq->elts.no_sp = NULL;
-	if (elts == NULL)
-		return;
-	for (i = 0; (i != elemof(*elts)); ++i) {
-		struct rxq_elt *elt = &(*elts)[i];
-		struct rte_mbuf *buf;
-
-		if (elt->sge.addr == 0)
-			continue;
-		assert(WR_ID(elt->wr.wr_id).id == i);
-		buf = (void *)((uintptr_t)elt->sge.addr -
-			WR_ID(elt->wr.wr_id).offset);
-		rte_pktmbuf_free_seg(buf);
-	}
-	rte_free(elts);
-}
-
-/**
- * Delete flow steering rule.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- * @param mac_index
- *   MAC address index.
- * @param vlan_index
- *   VLAN index.
- */
-static void
-rxq_del_flow(struct rxq *rxq, unsigned int mac_index, unsigned int vlan_index)
-{
-#ifndef NDEBUG
-	struct priv *priv = rxq->priv;
-	const uint8_t (*mac)[ETHER_ADDR_LEN] =
-		(const uint8_t (*)[ETHER_ADDR_LEN])
-		priv->mac[mac_index].addr_bytes;
-#endif
-	assert(rxq->mac_flow[mac_index][vlan_index] != NULL);
-	DEBUG("%p: removing MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
-	      " (VLAN ID %" PRIu16 ")",
-	      (void *)rxq,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
-	      mac_index, priv->vlan_filter[vlan_index].id);
-	claim_zero(ibv_destroy_flow(rxq->mac_flow[mac_index][vlan_index]));
-	rxq->mac_flow[mac_index][vlan_index] = NULL;
-}
-
-/**
- * Unregister a MAC address from a RX queue.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- * @param mac_index
- *   MAC address index.
- */
-static void
-rxq_mac_addr_del(struct rxq *rxq, unsigned int mac_index)
-{
-	struct priv *priv = rxq->priv;
-	unsigned int i;
-	unsigned int vlans = 0;
-
-	assert(mac_index < elemof(priv->mac));
-	if (!BITFIELD_ISSET(rxq->mac_configured, mac_index))
-		return;
-	for (i = 0; (i != elemof(priv->vlan_filter)); ++i) {
-		if (!priv->vlan_filter[i].enabled)
-			continue;
-		rxq_del_flow(rxq, mac_index, i);
-		vlans++;
-	}
-	if (!vlans) {
-		rxq_del_flow(rxq, mac_index, 0);
-	}
-	BITFIELD_RESET(rxq->mac_configured, mac_index);
-}
-
-/**
- * Unregister all MAC addresses from a RX queue.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- */
-static void
-rxq_mac_addrs_del(struct rxq *rxq)
-{
-	struct priv *priv = rxq->priv;
-	unsigned int i;
-
-	for (i = 0; (i != elemof(priv->mac)); ++i)
-		rxq_mac_addr_del(rxq, i);
-}
-
-static int rxq_promiscuous_enable(struct rxq *);
-static void rxq_promiscuous_disable(struct rxq *);
-
-/**
- * Add single flow steering rule.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- * @param mac_index
- *   MAC address index to register.
- * @param vlan_index
- *   VLAN index. Use -1 for a flow without VLAN.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_add_flow(struct rxq *rxq, unsigned int mac_index, unsigned int vlan_index)
-{
-	struct ibv_flow *flow;
-	struct priv *priv = rxq->priv;
-	const uint8_t (*mac)[ETHER_ADDR_LEN] =
-			(const uint8_t (*)[ETHER_ADDR_LEN])
-			priv->mac[mac_index].addr_bytes;
-
-	/* Allocate flow specification on the stack. */
-	struct __attribute__((packed)) {
-		struct ibv_flow_attr attr;
-		struct ibv_flow_spec_eth spec;
-	} data;
-	struct ibv_flow_attr *attr = &data.attr;
-	struct ibv_flow_spec_eth *spec = &data.spec;
-
-	assert(mac_index < elemof(priv->mac));
-	assert((vlan_index < elemof(priv->vlan_filter)) || (vlan_index == -1u));
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
-	*attr = (struct ibv_flow_attr){
-		.type = IBV_FLOW_ATTR_NORMAL,
-		.priority = 3,
-		.num_of_specs = 1,
-		.port = priv->port,
-		.flags = 0
-	};
-	*spec = (struct ibv_flow_spec_eth){
-		.type = IBV_FLOW_SPEC_ETH,
-		.size = sizeof(*spec),
-		.val = {
-			.dst_mac = {
-				(*mac)[0], (*mac)[1], (*mac)[2],
-				(*mac)[3], (*mac)[4], (*mac)[5]
-			},
-			.vlan_tag = ((vlan_index != -1u) ?
-				     htons(priv->vlan_filter[vlan_index].id) :
-				     0),
-		},
-		.mask = {
-			.dst_mac = "\xff\xff\xff\xff\xff\xff",
-			.vlan_tag = ((vlan_index != -1u) ? htons(0xfff) : 0),
-		}
-	};
-	DEBUG("%p: adding MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
-	      " (VLAN %s %" PRIu16 ")",
-	      (void *)rxq,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
-	      mac_index,
-	      ((vlan_index != -1u) ? "ID" : "index"),
-	      ((vlan_index != -1u) ? priv->vlan_filter[vlan_index].id : -1u));
-	/* Create related flow. */
-	errno = 0;
-	flow = ibv_create_flow(rxq->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	if (vlan_index == -1u)
-		vlan_index = 0;
-	assert(rxq->mac_flow[mac_index][vlan_index] == NULL);
-	rxq->mac_flow[mac_index][vlan_index] = flow;
-	return 0;
-}
-
-/**
- * Register a MAC address in a RX queue.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- * @param mac_index
- *   MAC address index to register.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_mac_addr_add(struct rxq *rxq, unsigned int mac_index)
-{
-	struct priv *priv = rxq->priv;
-	unsigned int i;
-	unsigned int vlans = 0;
-	int ret;
-
-	assert(mac_index < elemof(priv->mac));
-	if (BITFIELD_ISSET(rxq->mac_configured, mac_index))
-		rxq_mac_addr_del(rxq, mac_index);
-	/* Fill VLAN specifications. */
-	for (i = 0; (i != elemof(priv->vlan_filter)); ++i) {
-		if (!priv->vlan_filter[i].enabled)
-			continue;
-		/* Create related flow. */
-		ret = rxq_add_flow(rxq, mac_index, i);
-		if (!ret) {
-			vlans++;
-			continue;
-		}
-		/* Failure, rollback. */
-		while (i != 0)
-			if (priv->vlan_filter[--i].enabled)
-				rxq_del_flow(rxq, mac_index, i);
-		assert(ret > 0);
-		return ret;
-	}
-	/* In case there is no VLAN filter. */
-	if (!vlans) {
-		ret = rxq_add_flow(rxq, mac_index, -1);
-		if (ret)
-			return ret;
-	}
-	BITFIELD_SET(rxq->mac_configured, mac_index);
-	return 0;
-}
-
-/**
- * Register all MAC addresses in a RX queue.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_mac_addrs_add(struct rxq *rxq)
-{
-	struct priv *priv = rxq->priv;
-	unsigned int i;
-	int ret;
-
-	for (i = 0; (i != elemof(priv->mac)); ++i) {
-		if (!BITFIELD_ISSET(priv->mac_configured, i))
-			continue;
-		ret = rxq_mac_addr_add(rxq, i);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			rxq_mac_addr_del(rxq, --i);
-		assert(ret > 0);
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Unregister a MAC address.
- *
- * In RSS mode, the MAC address is unregistered from the parent queue,
- * otherwise it is unregistered from each queue directly.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mac_index
- *   MAC address index.
- */
-static void
-priv_mac_addr_del(struct priv *priv, unsigned int mac_index)
-{
-	unsigned int i;
-
-	assert(!priv->isolated);
-	assert(mac_index < elemof(priv->mac));
-	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
-		return;
-	if (priv->rss) {
-		rxq_mac_addr_del(LIST_FIRST(&priv->parents), mac_index);
-		goto end;
-	}
-	for (i = 0; (i != priv->dev->data->nb_rx_queues); ++i)
-		rxq_mac_addr_del((*priv->rxqs)[i], mac_index);
-end:
-	BITFIELD_RESET(priv->mac_configured, mac_index);
-}
-
-/**
- * Register a MAC address.
- *
- * In RSS mode, the MAC address is registered in the parent queue,
- * otherwise it is registered in each queue directly.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mac_index
- *   MAC address index to use.
- * @param mac
- *   MAC address to register.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_mac_addr_add(struct priv *priv, unsigned int mac_index,
-		  const uint8_t (*mac)[ETHER_ADDR_LEN])
-{
-	unsigned int i;
-	int ret;
-
-	assert(mac_index < elemof(priv->mac));
-	/* First, make sure this address isn't already configured. */
-	for (i = 0; (i != elemof(priv->mac)); ++i) {
-		/* Skip this index, it's going to be reconfigured. */
-		if (i == mac_index)
-			continue;
-		if (!BITFIELD_ISSET(priv->mac_configured, i))
-			continue;
-		if (memcmp(priv->mac[i].addr_bytes, *mac, sizeof(*mac)))
-			continue;
-		/* Address already configured elsewhere, return with error. */
-		return EADDRINUSE;
-	}
-	if (BITFIELD_ISSET(priv->mac_configured, mac_index))
-		priv_mac_addr_del(priv, mac_index);
-	priv->mac[mac_index] = (struct ether_addr){
-		{
-			(*mac)[0], (*mac)[1], (*mac)[2],
-			(*mac)[3], (*mac)[4], (*mac)[5]
-		}
-	};
-	/* If device isn't started, this is all we need to do. */
-	if (!priv->started) {
-#ifndef NDEBUG
-		/* Verify that all queues have this index disabled. */
-		for (i = 0; (i != priv->rxqs_n); ++i) {
-			if ((*priv->rxqs)[i] == NULL)
-				continue;
-			assert(!BITFIELD_ISSET
-			       ((*priv->rxqs)[i]->mac_configured, mac_index));
-		}
-#endif
-		goto end;
-	}
-	if (priv->rss) {
-		ret = rxq_mac_addr_add(LIST_FIRST(&priv->parents), mac_index);
-		if (ret)
-			return ret;
-		goto end;
-	}
-	for (i = 0; (i != priv->rxqs_n); ++i) {
-		if ((*priv->rxqs)[i] == NULL)
-			continue;
-		ret = rxq_mac_addr_add((*priv->rxqs)[i], mac_index);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			if ((*priv->rxqs)[(--i)] != NULL)
-				rxq_mac_addr_del((*priv->rxqs)[i], mac_index);
-		return ret;
-	}
-end:
-	BITFIELD_SET(priv->mac_configured, mac_index);
-	return 0;
-}
-
-/**
- * Enable allmulti mode in a RX queue.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_allmulticast_enable(struct rxq *rxq)
-{
-	struct ibv_flow *flow;
-	struct ibv_flow_attr attr = {
-		.type = IBV_FLOW_ATTR_MC_DEFAULT,
-		.num_of_specs = 0,
-		.port = rxq->priv->port,
-		.flags = 0
-	};
-
-	DEBUG("%p: enabling allmulticast mode", (void *)rxq);
-	if (rxq->allmulti_flow != NULL)
-		return EBUSY;
-	errno = 0;
-	flow = ibv_create_flow(rxq->qp, &attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	rxq->allmulti_flow = flow;
-	DEBUG("%p: allmulticast mode enabled", (void *)rxq);
-	return 0;
-}
-
-/**
- * Disable allmulti mode in a RX queue.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- */
-static void
-rxq_allmulticast_disable(struct rxq *rxq)
-{
-	DEBUG("%p: disabling allmulticast mode", (void *)rxq);
-	if (rxq->allmulti_flow == NULL)
-		return;
-	claim_zero(ibv_destroy_flow(rxq->allmulti_flow));
-	rxq->allmulti_flow = NULL;
-	DEBUG("%p: allmulticast mode disabled", (void *)rxq);
-}
-
-/**
- * Enable promiscuous mode in a RX queue.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_promiscuous_enable(struct rxq *rxq)
-{
-	struct ibv_flow *flow;
-	struct ibv_flow_attr attr = {
-		.type = IBV_FLOW_ATTR_ALL_DEFAULT,
-		.num_of_specs = 0,
-		.port = rxq->priv->port,
-		.flags = 0
-	};
-
-	if (rxq->priv->vf)
-		return 0;
-	DEBUG("%p: enabling promiscuous mode", (void *)rxq);
-	if (rxq->promisc_flow != NULL)
-		return EBUSY;
-	errno = 0;
-	flow = ibv_create_flow(rxq->qp, &attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	rxq->promisc_flow = flow;
-	DEBUG("%p: promiscuous mode enabled", (void *)rxq);
-	return 0;
-}
-
-/**
- * Disable promiscuous mode in a RX queue.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- */
-static void
-rxq_promiscuous_disable(struct rxq *rxq)
-{
-	if (rxq->priv->vf)
-		return;
-	DEBUG("%p: disabling promiscuous mode", (void *)rxq);
-	if (rxq->promisc_flow == NULL)
-		return;
-	claim_zero(ibv_destroy_flow(rxq->promisc_flow));
-	rxq->promisc_flow = NULL;
-	DEBUG("%p: promiscuous mode disabled", (void *)rxq);
-}
-
-/**
- * Clean up a RX queue.
- *
- * Destroy objects, free allocated memory and reset the structure for reuse.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- */
-static void
-rxq_cleanup(struct rxq *rxq)
-{
-	struct ibv_exp_release_intf_params params;
-
-	DEBUG("cleaning up %p", (void *)rxq);
-	if (rxq->sp)
-		rxq_free_elts_sp(rxq);
-	else
-		rxq_free_elts(rxq);
-	if (rxq->if_qp != NULL) {
-		assert(rxq->priv != NULL);
-		assert(rxq->priv->ctx != NULL);
-		assert(rxq->qp != NULL);
-		params = (struct ibv_exp_release_intf_params){
-			.comp_mask = 0,
-		};
-		claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
-						rxq->if_qp,
-						&params));
-	}
-	if (rxq->if_cq != NULL) {
-		assert(rxq->priv != NULL);
-		assert(rxq->priv->ctx != NULL);
-		assert(rxq->cq != NULL);
-		params = (struct ibv_exp_release_intf_params){
-			.comp_mask = 0,
-		};
-		claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
-						rxq->if_cq,
-						&params));
-	}
-	if (rxq->qp != NULL && !rxq->priv->isolated) {
-		rxq_promiscuous_disable(rxq);
-		rxq_allmulticast_disable(rxq);
-		rxq_mac_addrs_del(rxq);
-	}
-	if (rxq->qp != NULL)
-		claim_zero(ibv_destroy_qp(rxq->qp));
-	if (rxq->cq != NULL)
-		claim_zero(ibv_destroy_cq(rxq->cq));
-	if (rxq->channel != NULL)
-		claim_zero(ibv_destroy_comp_channel(rxq->channel));
-	if (rxq->rd != NULL) {
-		struct ibv_exp_destroy_res_domain_attr attr = {
-			.comp_mask = 0,
-		};
-
-		assert(rxq->priv != NULL);
-		assert(rxq->priv->ctx != NULL);
-		claim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx,
-						      rxq->rd,
-						      &attr));
-	}
-	if (rxq->mr != NULL)
-		claim_zero(ibv_dereg_mr(rxq->mr));
-	memset(rxq, 0, sizeof(*rxq));
-}
-
-/**
- * Translate RX completion flags to packet type.
- *
- * @param flags
- *   RX completion flags returned by poll_length_flags().
- *
- * @note: fix mlx4_dev_supported_ptypes_get() if any change here.
- *
- * @return
- *   Packet type for struct rte_mbuf.
- */
-static inline uint32_t
-rxq_cq_to_pkt_type(uint32_t flags)
-{
-	uint32_t pkt_type;
-
-	if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET)
-		pkt_type =
-			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
-				  RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) |
-			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
-				  RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) |
-			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_IPV4_PACKET,
-				  RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN) |
-			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_IPV6_PACKET,
-				  RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN);
-	else
-		pkt_type =
-			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_IPV4_PACKET,
-				  RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) |
-			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_IPV6_PACKET,
-				  RTE_PTYPE_L3_IPV6_EXT_UNKNOWN);
-	return pkt_type;
-}
-
-/**
- * Translate RX completion flags to offload flags.
- *
- * @param[in] rxq
- *   Pointer to RX queue structure.
- * @param flags
- *   RX completion flags returned by poll_length_flags().
- *
- * @return
- *   Offload flags (ol_flags) for struct rte_mbuf.
- */
-static inline uint32_t
-rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags)
-{
-	uint32_t ol_flags = 0;
-
-	if (rxq->csum)
-		ol_flags |=
-			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_IP_CSUM_OK,
-				  PKT_RX_IP_CKSUM_GOOD) |
-			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK,
-				  PKT_RX_L4_CKSUM_GOOD);
-	if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
-		ol_flags |=
-			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
-				  PKT_RX_IP_CKSUM_GOOD) |
-			TRANSPOSE(flags,
-				  IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
-				  PKT_RX_L4_CKSUM_GOOD);
-	return ol_flags;
-}
-
-static uint16_t
-mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
-
-/**
- * DPDK callback for RX with scattered packets support.
- *
- * @param dpdk_rxq
- *   Generic pointer to RX queue structure.
- * @param[out] pkts
- *   Array to store received packets.
- * @param pkts_n
- *   Maximum number of packets in array.
- *
- * @return
- *   Number of packets successfully received (<= pkts_n).
- */
-static uint16_t
-mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
-{
-	struct rxq *rxq = (struct rxq *)dpdk_rxq;
-	struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
-	const unsigned int elts_n = rxq->elts_n;
-	unsigned int elts_head = rxq->elts_head;
-	struct ibv_recv_wr head;
-	struct ibv_recv_wr **next = &head.next;
-	struct ibv_recv_wr *bad_wr;
-	unsigned int i;
-	unsigned int pkts_ret = 0;
-	int ret;
-
-	if (unlikely(!rxq->sp))
-		return mlx4_rx_burst(dpdk_rxq, pkts, pkts_n);
-	if (unlikely(elts == NULL)) /* See RTE_DEV_CMD_SET_MTU. */
-		return 0;
-	for (i = 0; (i != pkts_n); ++i) {
-		struct rxq_elt_sp *elt = &(*elts)[elts_head];
-		struct ibv_recv_wr *wr = &elt->wr;
-		uint64_t wr_id = wr->wr_id;
-		unsigned int len;
-		unsigned int pkt_buf_len;
-		struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */
-		struct rte_mbuf **pkt_buf_next = &pkt_buf;
-		unsigned int seg_headroom = RTE_PKTMBUF_HEADROOM;
-		unsigned int j = 0;
-		uint32_t flags;
-
-		/* Sanity checks. */
-#ifdef NDEBUG
-		(void)wr_id;
-#endif
-		assert(wr_id < rxq->elts_n);
-		assert(wr->sg_list == elt->sges);
-		assert(wr->num_sge == elemof(elt->sges));
-		assert(elts_head < rxq->elts_n);
-		assert(rxq->elts_head < rxq->elts_n);
-		ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
-						    &flags);
-		if (unlikely(ret < 0)) {
-			struct ibv_wc wc;
-			int wcs_n;
-
-			DEBUG("rxq=%p, poll_length() failed (ret=%d)",
-			      (void *)rxq, ret);
-			/* ibv_poll_cq() must be used in case of failure. */
-			wcs_n = ibv_poll_cq(rxq->cq, 1, &wc);
-			if (unlikely(wcs_n == 0))
-				break;
-			if (unlikely(wcs_n < 0)) {
-				DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)",
-				      (void *)rxq, wcs_n);
-				break;
-			}
-			assert(wcs_n == 1);
-			if (unlikely(wc.status != IBV_WC_SUCCESS)) {
-				/* Whatever, just repost the offending WR. */
-				DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work"
-				      " completion status (%d): %s",
-				      (void *)rxq, wc.wr_id, wc.status,
-				      ibv_wc_status_str(wc.status));
-#ifdef MLX4_PMD_SOFT_COUNTERS
-				/* Increment dropped packets counter. */
-				++rxq->stats.idropped;
-#endif
-				/* Link completed WRs together for repost. */
-				*next = wr;
-				next = &wr->next;
-				goto repost;
-			}
-			ret = wc.byte_len;
-		}
-		if (ret == 0)
-			break;
-		len = ret;
-		pkt_buf_len = len;
-		/* Link completed WRs together for repost. */
-		*next = wr;
-		next = &wr->next;
-		/*
-		 * Replace spent segments with new ones, concatenate and
-		 * return them as pkt_buf.
-		 */
-		while (1) {
-			struct ibv_sge *sge = &elt->sges[j];
-			struct rte_mbuf *seg = elt->bufs[j];
-			struct rte_mbuf *rep;
-			unsigned int seg_tailroom;
-
-			/*
-			 * Fetch initial bytes of packet descriptor into a
-			 * cacheline while allocating rep.
-			 */
-			rte_prefetch0(seg);
-			rep = rte_mbuf_raw_alloc(rxq->mp);
-			if (unlikely(rep == NULL)) {
-				/*
-				 * Unable to allocate a replacement mbuf,
-				 * repost WR.
-				 */
-				DEBUG("rxq=%p, wr_id=%" PRIu64 ":"
-				      " can't allocate a new mbuf",
-				      (void *)rxq, wr_id);
-				if (pkt_buf != NULL) {
-					*pkt_buf_next = NULL;
-					rte_pktmbuf_free(pkt_buf);
-				}
-				/* Increase out of memory counters. */
-				++rxq->stats.rx_nombuf;
-				++rxq->priv->dev->data->rx_mbuf_alloc_failed;
-				goto repost;
-			}
-#ifndef NDEBUG
-			/* Poison user-modifiable fields in rep. */
-			NEXT(rep) = (void *)((uintptr_t)-1);
-			SET_DATA_OFF(rep, 0xdead);
-			DATA_LEN(rep) = 0xd00d;
-			PKT_LEN(rep) = 0xdeadd00d;
-			NB_SEGS(rep) = 0x2a;
-			PORT(rep) = 0x2a;
-			rep->ol_flags = -1;
-			/*
-			 * Clear special flags in mbuf to avoid
-			 * crashing while freeing.
-			 */
-			rep->ol_flags &=
-				~(uint64_t)(IND_ATTACHED_MBUF |
-					    CTRL_MBUF_FLAG);
-#endif
-			assert(rep->buf_len == seg->buf_len);
-			/* Reconfigure sge to use rep instead of seg. */
-			assert(sge->lkey == rxq->mr->lkey);
-			sge->addr = ((uintptr_t)rep->buf_addr + seg_headroom);
-			elt->bufs[j] = rep;
-			++j;
-			/* Update pkt_buf if it's the first segment, or link
-			 * seg to the previous one and update pkt_buf_next. */
-			*pkt_buf_next = seg;
-			pkt_buf_next = &NEXT(seg);
-			/* Update seg information. */
-			seg_tailroom = (seg->buf_len - seg_headroom);
-			assert(sge->length == seg_tailroom);
-			SET_DATA_OFF(seg, seg_headroom);
-			if (likely(len <= seg_tailroom)) {
-				/* Last segment. */
-				DATA_LEN(seg) = len;
-				PKT_LEN(seg) = len;
-				/* Sanity check. */
-				assert(rte_pktmbuf_headroom(seg) ==
-				       seg_headroom);
-				assert(rte_pktmbuf_tailroom(seg) ==
-				       (seg_tailroom - len));
-				break;
-			}
-			DATA_LEN(seg) = seg_tailroom;
-			PKT_LEN(seg) = seg_tailroom;
-			/* Sanity check. */
-			assert(rte_pktmbuf_headroom(seg) == seg_headroom);
-			assert(rte_pktmbuf_tailroom(seg) == 0);
-			/* Fix len and clear headroom for next segments. */
-			len -= seg_tailroom;
-			seg_headroom = 0;
-		}
-		/* Update head and tail segments. */
-		*pkt_buf_next = NULL;
-		assert(pkt_buf != NULL);
-		assert(j != 0);
-		NB_SEGS(pkt_buf) = j;
-		PORT(pkt_buf) = rxq->port_id;
-		PKT_LEN(pkt_buf) = pkt_buf_len;
-		pkt_buf->packet_type = rxq_cq_to_pkt_type(flags);
-		pkt_buf->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
-
-		/* Return packet. */
-		*(pkts++) = pkt_buf;
-		++pkts_ret;
-#ifdef MLX4_PMD_SOFT_COUNTERS
-		/* Increase bytes counter. */
-		rxq->stats.ibytes += pkt_buf_len;
-#endif
-repost:
-		if (++elts_head >= elts_n)
-			elts_head = 0;
-		continue;
-	}
-	if (unlikely(i == 0))
-		return 0;
-	*next = NULL;
-	/* Repost WRs. */
-#ifdef DEBUG_RECV
-	DEBUG("%p: reposting %d WRs", (void *)rxq, i);
-#endif
-	ret = ibv_post_recv(rxq->qp, head.next, &bad_wr);
-	if (unlikely(ret)) {
-		/* Inability to repost WRs is fatal. */
-		DEBUG("%p: ibv_post_recv(): failed for WR %p: %s",
-		      (void *)rxq->priv,
-		      (void *)bad_wr,
-		      strerror(ret));
-		abort();
-	}
-	rxq->elts_head = elts_head;
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	/* Increase packets counter. */
-	rxq->stats.ipackets += pkts_ret;
-#endif
-	return pkts_ret;
-}
-
-/**
- * DPDK callback for RX.
- *
- * The following function is the same as mlx4_rx_burst_sp(), except it doesn't
- * manage scattered packets. Improves performance when MRU is lower than the
- * size of the first segment.
- *
- * @param dpdk_rxq
- *   Generic pointer to RX queue structure.
- * @param[out] pkts
- *   Array to store received packets.
- * @param pkts_n
- *   Maximum number of packets in array.
- *
- * @return
- *   Number of packets successfully received (<= pkts_n).
- */
-static uint16_t
-mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
-{
-	struct rxq *rxq = (struct rxq *)dpdk_rxq;
-	struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
-	const unsigned int elts_n = rxq->elts_n;
-	unsigned int elts_head = rxq->elts_head;
-	struct ibv_sge sges[pkts_n];
-	unsigned int i;
-	unsigned int pkts_ret = 0;
-	int ret;
-
-	if (unlikely(rxq->sp))
-		return mlx4_rx_burst_sp(dpdk_rxq, pkts, pkts_n);
-	for (i = 0; (i != pkts_n); ++i) {
-		struct rxq_elt *elt = &(*elts)[elts_head];
-		struct ibv_recv_wr *wr = &elt->wr;
-		uint64_t wr_id = wr->wr_id;
-		unsigned int len;
-		struct rte_mbuf *seg = (void *)((uintptr_t)elt->sge.addr -
-			WR_ID(wr_id).offset);
-		struct rte_mbuf *rep;
-		uint32_t flags;
-
-		/* Sanity checks. */
-		assert(WR_ID(wr_id).id < rxq->elts_n);
-		assert(wr->sg_list == &elt->sge);
-		assert(wr->num_sge == 1);
-		assert(elts_head < rxq->elts_n);
-		assert(rxq->elts_head < rxq->elts_n);
-		/*
-		 * Fetch initial bytes of packet descriptor into a
-		 * cacheline while allocating rep.
-		 */
-		rte_mbuf_prefetch_part1(seg);
-		rte_mbuf_prefetch_part2(seg);
-		ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
-						    &flags);
-		if (unlikely(ret < 0)) {
-			struct ibv_wc wc;
-			int wcs_n;
-
-			DEBUG("rxq=%p, poll_length() failed (ret=%d)",
-			      (void *)rxq, ret);
-			/* ibv_poll_cq() must be used in case of failure. */
-			wcs_n = ibv_poll_cq(rxq->cq, 1, &wc);
-			if (unlikely(wcs_n == 0))
-				break;
-			if (unlikely(wcs_n < 0)) {
-				DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)",
-				      (void *)rxq, wcs_n);
-				break;
-			}
-			assert(wcs_n == 1);
-			if (unlikely(wc.status != IBV_WC_SUCCESS)) {
-				/* Whatever, just repost the offending WR. */
-				DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work"
-				      " completion status (%d): %s",
-				      (void *)rxq, wc.wr_id, wc.status,
-				      ibv_wc_status_str(wc.status));
-#ifdef MLX4_PMD_SOFT_COUNTERS
-				/* Increment dropped packets counter. */
-				++rxq->stats.idropped;
-#endif
-				/* Add SGE to array for repost. */
-				sges[i] = elt->sge;
-				goto repost;
-			}
-			ret = wc.byte_len;
-		}
-		if (ret == 0)
-			break;
-		len = ret;
-		rep = rte_mbuf_raw_alloc(rxq->mp);
-		if (unlikely(rep == NULL)) {
-			/*
-			 * Unable to allocate a replacement mbuf,
-			 * repost WR.
-			 */
-			DEBUG("rxq=%p, wr_id=%" PRIu32 ":"
-			      " can't allocate a new mbuf",
-			      (void *)rxq, WR_ID(wr_id).id);
-			/* Increase out of memory counters. */
-			++rxq->stats.rx_nombuf;
-			++rxq->priv->dev->data->rx_mbuf_alloc_failed;
-			/* Add SGE to array for repost. */
-			sges[i] = elt->sge;
-			goto repost;
-		}
-
-		/* Reconfigure sge to use rep instead of seg. */
-		elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
-		assert(elt->sge.lkey == rxq->mr->lkey);
-		WR_ID(wr->wr_id).offset =
-			(((uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM) -
-			 (uintptr_t)rep);
-		assert(WR_ID(wr->wr_id).id == WR_ID(wr_id).id);
-
-		/* Add SGE to array for repost. */
-		sges[i] = elt->sge;
-
-		/* Update seg information. */
-		SET_DATA_OFF(seg, RTE_PKTMBUF_HEADROOM);
-		NB_SEGS(seg) = 1;
-		PORT(seg) = rxq->port_id;
-		NEXT(seg) = NULL;
-		PKT_LEN(seg) = len;
-		DATA_LEN(seg) = len;
-		seg->packet_type = rxq_cq_to_pkt_type(flags);
-		seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
-
-		/* Return packet. */
-		*(pkts++) = seg;
-		++pkts_ret;
-#ifdef MLX4_PMD_SOFT_COUNTERS
-		/* Increase bytes counter. */
-		rxq->stats.ibytes += len;
-#endif
-repost:
-		if (++elts_head >= elts_n)
-			elts_head = 0;
-		continue;
-	}
-	if (unlikely(i == 0))
-		return 0;
-	/* Repost WRs. */
-#ifdef DEBUG_RECV
-	DEBUG("%p: reposting %u WRs", (void *)rxq, i);
-#endif
-	ret = rxq->if_qp->recv_burst(rxq->qp, sges, i);
-	if (unlikely(ret)) {
-		/* Inability to repost WRs is fatal. */
-		DEBUG("%p: recv_burst(): failed (ret=%d)",
-		      (void *)rxq->priv,
-		      ret);
-		abort();
-	}
-	rxq->elts_head = elts_head;
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	/* Increase packets counter. */
-	rxq->stats.ipackets += pkts_ret;
-#endif
-	return pkts_ret;
-}
-
-/**
- * DPDK callback for RX in secondary processes.
- *
- * This function configures all queues from primary process information
- * if necessary before reverting to the normal RX burst callback.
- *
- * @param dpdk_rxq
- *   Generic pointer to RX queue structure.
- * @param[out] pkts
- *   Array to store received packets.
- * @param pkts_n
- *   Maximum number of packets in array.
- *
- * @return
- *   Number of packets successfully received (<= pkts_n).
- */
-static uint16_t
-mlx4_rx_burst_secondary_setup(void *dpdk_rxq, struct rte_mbuf **pkts,
-			      uint16_t pkts_n)
-{
-	struct rxq *rxq = dpdk_rxq;
-	struct priv *priv = mlx4_secondary_data_setup(rxq->priv);
-	struct priv *primary_priv;
-	unsigned int index;
-
-	if (priv == NULL)
-		return 0;
-	primary_priv =
-		mlx4_secondary_data[priv->dev->data->port_id].primary_priv;
-	/* Look for queue index in both private structures. */
-	for (index = 0; index != priv->rxqs_n; ++index)
-		if (((*primary_priv->rxqs)[index] == rxq) ||
-		    ((*priv->rxqs)[index] == rxq))
-			break;
-	if (index == priv->rxqs_n)
-		return 0;
-	rxq = (*priv->rxqs)[index];
-	return priv->dev->rx_pkt_burst(rxq, pkts, pkts_n);
-}
-
-/**
- * Allocate a Queue Pair.
- * Optionally setup inline receive if supported.
- *
- * @param priv
- *   Pointer to private structure.
- * @param cq
- *   Completion queue to associate with QP.
- * @param desc
- *   Number of descriptors in QP (hint only).
- *
- * @return
- *   QP pointer or NULL in case of error.
- */
-static struct ibv_qp *
-rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-	     struct ibv_exp_res_domain *rd)
-{
-	struct ibv_exp_qp_init_attr attr = {
-		/* CQ to be associated with the send queue. */
-		.send_cq = cq,
-		/* CQ to be associated with the receive queue. */
-		.recv_cq = cq,
-		.cap = {
-			/* Max number of outstanding WRs. */
-			.max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?
-					priv->device_attr.max_qp_wr :
-					desc),
-			/* Max number of scatter/gather elements in a WR. */
-			.max_recv_sge = ((priv->device_attr.max_sge <
-					  MLX4_PMD_SGE_WR_N) ?
-					 priv->device_attr.max_sge :
-					 MLX4_PMD_SGE_WR_N),
-		},
-		.qp_type = IBV_QPT_RAW_PACKET,
-		.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
-			      IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
-		.pd = priv->pd,
-		.res_domain = rd,
-	};
-
-#ifdef INLINE_RECV
-	attr.max_inl_recv = priv->inl_recv_size;
-	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
-#endif
-	return ibv_exp_create_qp(priv->ctx, &attr);
-}
-
-#ifdef RSS_SUPPORT
-
-/**
- * Allocate a RSS Queue Pair.
- * Optionally setup inline receive if supported.
- *
- * @param priv
- *   Pointer to private structure.
- * @param cq
- *   Completion queue to associate with QP.
- * @param desc
- *   Number of descriptors in QP (hint only).
- * @param children_n
- *   If nonzero, a number of children for parent QP and zero for a child.
- * @param rxq_parent
- *   Pointer for a parent in a child case, NULL otherwise.
- *
- * @return
- *   QP pointer or NULL in case of error.
- */
-static struct ibv_qp *
-rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
-		 int children_n, struct ibv_exp_res_domain *rd,
-		 struct rxq *rxq_parent)
-{
-	struct ibv_exp_qp_init_attr attr = {
-		/* CQ to be associated with the send queue. */
-		.send_cq = cq,
-		/* CQ to be associated with the receive queue. */
-		.recv_cq = cq,
-		.cap = {
-			/* Max number of outstanding WRs. */
-			.max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?
-					priv->device_attr.max_qp_wr :
-					desc),
-			/* Max number of scatter/gather elements in a WR. */
-			.max_recv_sge = ((priv->device_attr.max_sge <
-					  MLX4_PMD_SGE_WR_N) ?
-					 priv->device_attr.max_sge :
-					 MLX4_PMD_SGE_WR_N),
-		},
-		.qp_type = IBV_QPT_RAW_PACKET,
-		.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
-			      IBV_EXP_QP_INIT_ATTR_RES_DOMAIN |
-			      IBV_EXP_QP_INIT_ATTR_QPG),
-		.pd = priv->pd,
-		.res_domain = rd,
-	};
-
-#ifdef INLINE_RECV
-	attr.max_inl_recv = priv->inl_recv_size,
-	attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV;
-#endif
-	if (children_n > 0) {
-		attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
-		/* TSS isn't necessary. */
-		attr.qpg.parent_attrib.tss_child_count = 0;
-		attr.qpg.parent_attrib.rss_child_count =
-			rte_align32pow2(children_n + 1) >> 1;
-		DEBUG("initializing parent RSS queue");
-	} else {
-		attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
-		attr.qpg.qpg_parent = rxq_parent->qp;
-		DEBUG("initializing child RSS queue");
-	}
-	return ibv_exp_create_qp(priv->ctx, &attr);
-}
-
-#endif /* RSS_SUPPORT */
-
-/**
- * Reconfigure a RX queue with new parameters.
- *
- * rxq_rehash() does not allocate mbufs, which, if not done from the right
- * thread (such as a control thread), may corrupt the pool.
- * In case of failure, the queue is left untouched.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param rxq
- *   RX queue pointer.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
-{
-	struct priv *priv = rxq->priv;
-	struct rxq tmpl = *rxq;
-	unsigned int mbuf_n;
-	unsigned int desc_n;
-	struct rte_mbuf **pool;
-	unsigned int i, k;
-	struct ibv_exp_qp_attr mod;
-	struct ibv_recv_wr *bad_wr;
-	unsigned int mb_len;
-	int err;
-
-	mb_len = rte_pktmbuf_data_room_size(rxq->mp);
-	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
-	/* Number of descriptors and mbufs currently allocated. */
-	desc_n = (tmpl.elts_n * (tmpl.sp ? MLX4_PMD_SGE_WR_N : 1));
-	mbuf_n = desc_n;
-	/* Toggle RX checksum offload if hardware supports it. */
-	if (priv->hw_csum) {
-		tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
-		rxq->csum = tmpl.csum;
-	}
-	if (priv->hw_csum_l2tun) {
-		tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
-		rxq->csum_l2tun = tmpl.csum_l2tun;
-	}
-	/* Enable scattered packets support for this queue if necessary. */
-	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
-	if (dev->data->dev_conf.rxmode.enable_scatter &&
-	    (dev->data->dev_conf.rxmode.max_rx_pkt_len >
-	     (mb_len - RTE_PKTMBUF_HEADROOM))) {
-		tmpl.sp = 1;
-		desc_n /= MLX4_PMD_SGE_WR_N;
-	} else
-		tmpl.sp = 0;
-	DEBUG("%p: %s scattered packets support (%u WRs)",
-	      (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n);
-	/* If scatter mode is the same as before, nothing to do. */
-	if (tmpl.sp == rxq->sp) {
-		DEBUG("%p: nothing to do", (void *)dev);
-		return 0;
-	}
-	/* Remove attached flows if RSS is disabled (no parent queue). */
-	if (!priv->rss && !priv->isolated) {
-		rxq_allmulticast_disable(&tmpl);
-		rxq_promiscuous_disable(&tmpl);
-		rxq_mac_addrs_del(&tmpl);
-		/* Update original queue in case of failure. */
-		rxq->allmulti_flow = tmpl.allmulti_flow;
-		rxq->promisc_flow = tmpl.promisc_flow;
-		memcpy(rxq->mac_configured, tmpl.mac_configured,
-		       sizeof(rxq->mac_configured));
-		memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
-	}
-	/* From now on, any failure will render the queue unusable.
-	 * Reinitialize QP. */
-	if (!tmpl.qp)
-		goto skip_init;
-	mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
-	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (err) {
-		ERROR("%p: cannot reset QP: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	err = ibv_exp_modify_qp(tmpl.qp, &mod,
-				(IBV_EXP_QP_STATE |
-				 IBV_EXP_QP_PORT));
-	if (err) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	};
-skip_init:
-	err = ibv_resize_cq(tmpl.cq, desc_n);
-	if (err) {
-		ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
-		assert(err > 0);
-		return err;
-	}
-	/* Reconfigure flows. Do not care for errors. */
-	if (!priv->rss && !priv->isolated) {
-		rxq_mac_addrs_add(&tmpl);
-		if (priv->promisc)
-			rxq_promiscuous_enable(&tmpl);
-		if (priv->allmulti)
-			rxq_allmulticast_enable(&tmpl);
-		/* Update original queue in case of failure. */
-		rxq->allmulti_flow = tmpl.allmulti_flow;
-		rxq->promisc_flow = tmpl.promisc_flow;
-		memcpy(rxq->mac_configured, tmpl.mac_configured,
-		       sizeof(rxq->mac_configured));
-		memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
-	}
-	/* Allocate pool. */
-	pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
-	if (pool == NULL) {
-		ERROR("%p: cannot allocate memory", (void *)dev);
-		return ENOBUFS;
-	}
-	/* Snatch mbufs from original queue. */
-	k = 0;
-	if (rxq->sp) {
-		struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
-
-		for (i = 0; (i != elemof(*elts)); ++i) {
-			struct rxq_elt_sp *elt = &(*elts)[i];
-			unsigned int j;
-
-			for (j = 0; (j != elemof(elt->bufs)); ++j) {
-				assert(elt->bufs[j] != NULL);
-				pool[k++] = elt->bufs[j];
-			}
-		}
-	} else {
-		struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
-
-		for (i = 0; (i != elemof(*elts)); ++i) {
-			struct rxq_elt *elt = &(*elts)[i];
-			struct rte_mbuf *buf = (void *)
-				((uintptr_t)elt->sge.addr -
-				 WR_ID(elt->wr.wr_id).offset);
-
-			assert(WR_ID(elt->wr.wr_id).id == i);
-			pool[k++] = buf;
-		}
-	}
-	assert(k == mbuf_n);
-	tmpl.elts_n = 0;
-	tmpl.elts.sp = NULL;
-	assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp);
-	err = ((tmpl.sp) ?
-	       rxq_alloc_elts_sp(&tmpl, desc_n, pool) :
-	       rxq_alloc_elts(&tmpl, desc_n, pool));
-	if (err) {
-		ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
-		rte_free(pool);
-		assert(err > 0);
-		return err;
-	}
-	assert(tmpl.elts_n == desc_n);
-	assert(tmpl.elts.sp != NULL);
-	rte_free(pool);
-	/* Clean up original data. */
-	rxq->elts_n = 0;
-	rte_free(rxq->elts.sp);
-	rxq->elts.sp = NULL;
-	if (!tmpl.qp)
-		goto skip_rtr;
-	/* Post WRs. */
-	err = ibv_post_recv(tmpl.qp,
-			    (tmpl.sp ?
-			     &(*tmpl.elts.sp)[0].wr :
-			     &(*tmpl.elts.no_sp)[0].wr),
-			    &bad_wr);
-	if (err) {
-		ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-		      (void *)dev,
-		      (void *)bad_wr,
-		      strerror(err));
-		goto skip_rtr;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-	if (err)
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(err));
-skip_rtr:
-	*rxq = tmpl;
-	assert(err >= 0);
-	return err;
-}
-
-/**
- * Create verbs QP resources associated with a rxq.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param inactive
- *   If true, the queue is disabled because its index is higher or
- *   equal to the real number of queues, which must be a power of 2.
- * @param children_n
- *   The number of children in a parent case, zero for a child.
- * @param rxq_parent
- *   The pointer to a parent RX structure for a child in RSS case,
- *   NULL for parent.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-rxq_create_qp(struct rxq *rxq,
-	      uint16_t desc,
-	      int inactive,
-	      int children_n,
-	      struct rxq *rxq_parent)
-{
-	int ret;
-	struct ibv_exp_qp_attr mod;
-	struct ibv_exp_query_intf_params params;
-	enum ibv_exp_query_intf_status status;
-	struct ibv_recv_wr *bad_wr;
-	int parent = (children_n > 0);
-	struct priv *priv = rxq->priv;
-
-#ifdef RSS_SUPPORT
-	if (priv->rss && !inactive && (rxq_parent || parent))
-		rxq->qp = rxq_setup_qp_rss(priv, rxq->cq, desc,
-					   children_n, rxq->rd,
-					   rxq_parent);
-	else
-#endif /* RSS_SUPPORT */
-		rxq->qp = rxq_setup_qp(priv, rxq->cq, desc, rxq->rd);
-	if (rxq->qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("QP creation failure: %s",
-		      strerror(ret));
-		return ret;
-	}
-	mod = (struct ibv_exp_qp_attr){
-		/* Move the QP to this state. */
-		.qp_state = IBV_QPS_INIT,
-		/* Primary port number. */
-		.port_num = priv->port
-	};
-	ret = ibv_exp_modify_qp(rxq->qp, &mod,
-				(IBV_EXP_QP_STATE |
-#ifdef RSS_SUPPORT
-				 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
-#endif /* RSS_SUPPORT */
-				 IBV_EXP_QP_PORT));
-	if (ret) {
-		ERROR("QP state to IBV_QPS_INIT failed: %s",
-		      strerror(ret));
-		return ret;
-	}
-	if (!priv->isolated && (parent || !priv->rss)) {
-		/* Configure MAC and broadcast addresses. */
-		ret = rxq_mac_addrs_add(rxq);
-		if (ret) {
-			ERROR("QP flow attachment failed: %s",
-			      strerror(ret));
-			return ret;
-		}
-	}
-	if (!parent) {
-		ret = ibv_post_recv(rxq->qp,
-				    (rxq->sp ?
-				     &(*rxq->elts.sp)[0].wr :
-				     &(*rxq->elts.no_sp)[0].wr),
-				    &bad_wr);
-		if (ret) {
-			ERROR("ibv_post_recv() failed for WR %p: %s",
-			      (void *)bad_wr,
-			      strerror(ret));
-			return ret;
-		}
-	}
-	mod = (struct ibv_exp_qp_attr){
-		.qp_state = IBV_QPS_RTR
-	};
-	ret = ibv_exp_modify_qp(rxq->qp, &mod, IBV_EXP_QP_STATE);
-	if (ret) {
-		ERROR("QP state to IBV_QPS_RTR failed: %s",
-		      strerror(ret));
-		return ret;
-	}
-	params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_QP_BURST,
-		.obj = rxq->qp,
-	};
-	rxq->if_qp = ibv_exp_query_intf(priv->ctx, &params, &status);
-	if (rxq->if_qp == NULL) {
-		ERROR("QP interface family query failed with status %d",
-		      status);
-		return errno;
-	}
-	return 0;
-}
-
-/**
- * Configure a RX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param rxq
- *   Pointer to RX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param inactive
- *   If true, the queue is disabled because its index is higher or
- *   equal to the real number of queues, which must be a power of 2.
- * @param[in] conf
- *   Thresholds parameters.
- * @param mp
- *   Memory pool for buffer allocations.
- * @param children_n
- *   The number of children in a parent case, zero for a child.
- * @param rxq_parent
- *   The pointer to a parent RX structure (or NULL) in a child case,
- *   NULL for parent.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
-	  unsigned int socket, int inactive,
-	  const struct rte_eth_rxconf *conf,
-	  struct rte_mempool *mp, int children_n,
-	  struct rxq *rxq_parent)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct rxq tmpl = {
-		.priv = priv,
-		.mp = mp,
-		.socket = socket
-	};
-	union {
-		struct ibv_exp_query_intf_params params;
-		struct ibv_exp_cq_init_attr cq;
-		struct ibv_exp_res_domain_init_attr rd;
-	} attr;
-	enum ibv_exp_query_intf_status status;
-	unsigned int mb_len;
-	int ret = 0;
-	int parent = (children_n > 0);
-
-	(void)conf; /* Thresholds configuration (ignored). */
-	/*
-	 * If this is a parent queue, hardware must support RSS and
-	 * RSS must be enabled.
-	 */
-	assert((!parent) || ((priv->hw_rss) && (priv->rss)));
-	if (parent) {
-		/* Even if unused, ibv_create_cq() requires at least one
-		 * descriptor. */
-		desc = 1;
-		goto skip_mr;
-	}
-	mb_len = rte_pktmbuf_data_room_size(mp);
-	if ((desc == 0) || (desc % MLX4_PMD_SGE_WR_N)) {
-		ERROR("%p: invalid number of RX descriptors (must be a"
-		      " multiple of %d)", (void *)dev, MLX4_PMD_SGE_WR_N);
-		return EINVAL;
-	}
-	/* Toggle RX checksum offload if hardware supports it. */
-	if (priv->hw_csum)
-		tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	if (priv->hw_csum_l2tun)
-		tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	/* Enable scattered packets support for this queue if necessary. */
-	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
-	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
-	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
-		tmpl.sp = 0;
-	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
-		tmpl.sp = 1;
-		desc /= MLX4_PMD_SGE_WR_N;
-	} else {
-		WARN("%p: the requested maximum Rx packet size (%u) is"
-		     " larger than a single mbuf (%u) and scattered"
-		     " mode has not been requested",
-		     (void *)dev,
-		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
-		     mb_len - RTE_PKTMBUF_HEADROOM);
-	}
-	DEBUG("%p: %s scattered packets support (%u WRs)",
-	      (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
-	/* Use the entire RX mempool as the memory region. */
-	tmpl.mr = mlx4_mp2mr(priv->pd, mp);
-	if (tmpl.mr == NULL) {
-		ret = EINVAL;
-		ERROR("%p: MR creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-skip_mr:
-	attr.rd = (struct ibv_exp_res_domain_init_attr){
-		.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
-			      IBV_EXP_RES_DOMAIN_MSG_MODEL),
-		.thread_model = IBV_EXP_THREAD_SINGLE,
-		.msg_model = IBV_EXP_MSG_HIGH_BW,
-	};
-	tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
-	if (tmpl.rd == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: RD creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	if (dev->data->dev_conf.intr_conf.rxq) {
-		tmpl.channel = ibv_create_comp_channel(priv->ctx);
-		if (tmpl.channel == NULL) {
-			ret = ENOMEM;
-			ERROR("%p: Rx interrupt completion channel creation"
-			      " failure: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
-	attr.cq = (struct ibv_exp_cq_init_attr){
-		.comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
-		.res_domain = tmpl.rd,
-	};
-	tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, tmpl.channel, 0,
-				    &attr.cq);
-	if (tmpl.cq == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: CQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	DEBUG("priv->device_attr.max_qp_wr is %d",
-	      priv->device_attr.max_qp_wr);
-	DEBUG("priv->device_attr.max_sge is %d",
-	      priv->device_attr.max_sge);
-	/* Allocate descriptors for RX queues, except for the RSS parent. */
-	if (parent)
-		goto skip_alloc;
-	if (tmpl.sp)
-		ret = rxq_alloc_elts_sp(&tmpl, desc, NULL);
-	else
-		ret = rxq_alloc_elts(&tmpl, desc, NULL);
+	/* Prepare internal flow rules. */
+	ret = mlx4_flow_sync(priv, &error);
 	if (ret) {
-		ERROR("%p: RXQ allocation failed: %s",
-		      (void *)dev, strerror(ret));
-		return ret;
+		ERROR("cannot set up internal flow rules (code %d, \"%s\"),"
+		      " flow error type %d, cause %p, message: %s",
+		      -ret, strerror(-ret), error.type, error.cause,
+		      error.message ? error.message : "(unspecified)");
 	}
-skip_alloc:
-	if (parent || rxq_parent || !priv->rss) {
-		ret = rxq_create_qp(&tmpl, desc, inactive,
-				    children_n, rxq_parent);
-		if (ret)
-			goto error;
-	}
-	/* Save port ID. */
-	tmpl.port_id = dev->data->port_id;
-	DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
-	attr.params = (struct ibv_exp_query_intf_params){
-		.intf_scope = IBV_EXP_INTF_GLOBAL,
-		.intf = IBV_EXP_INTF_CQ,
-		.obj = tmpl.cq,
-	};
-	tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-	if (tmpl.if_cq == NULL) {
-		ret = EINVAL;
-		ERROR("%p: CQ interface family query failed with status %d",
-		      (void *)dev, status);
-		goto error;
-	}
-	/* Clean up rxq in case we're reinitializing it. */
-	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
-	rxq_cleanup(rxq);
-	*rxq = tmpl;
-	DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl);
-	assert(ret == 0);
-	return 0;
-error:
-	rxq_cleanup(&tmpl);
-	assert(ret > 0);
 	return ret;
 }
 
 /**
- * DPDK callback to configure a RX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param idx
- *   RX queue index.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
- * @param mp
- *   Memory pool for buffer allocations.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
-		    unsigned int socket, const struct rte_eth_rxconf *conf,
-		    struct rte_mempool *mp)
-{
-	struct rxq *parent;
-	struct priv *priv = dev->data->dev_private;
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	int inactive = 0;
-	int ret;
-
-	if (mlx4_is_secondary())
-		return -E_RTE_SECONDARY;
-	priv_lock(priv);
-	DEBUG("%p: configuring queue %u for %u descriptors",
-	      (void *)dev, idx, desc);
-	if (idx >= priv->rxqs_n) {
-		ERROR("%p: queue index out of range (%u >= %u)",
-		      (void *)dev, idx, priv->rxqs_n);
-		priv_unlock(priv);
-		return -EOVERFLOW;
-	}
-	if (rxq != NULL) {
-		DEBUG("%p: reusing already allocated queue index %u (%p)",
-		      (void *)dev, idx, (void *)rxq);
-		if (priv->started) {
-			priv_unlock(priv);
-			return -EEXIST;
-		}
-		(*priv->rxqs)[idx] = NULL;
-		rxq_cleanup(rxq);
-	} else {
-		rxq = rte_calloc_socket("RXQ", 1, sizeof(*rxq), 0, socket);
-		if (rxq == NULL) {
-			ERROR("%p: unable to allocate queue index %u",
-			      (void *)dev, idx);
-			priv_unlock(priv);
-			return -ENOMEM;
-		}
-	}
-	if (priv->rss && !priv->isolated) {
-		/* The list consists of the single default one. */
-		parent = LIST_FIRST(&priv->parents);
-		if (idx >= rte_align32pow2(priv->rxqs_n + 1) >> 1)
-			inactive = 1;
-	} else {
-		parent = NULL;
-	}
-	ret = rxq_setup(dev, rxq, desc, socket,
-			inactive, conf, mp, 0, parent);
-	if (ret)
-		rte_free(rxq);
-	else {
-		rxq->stats.idx = idx;
-		DEBUG("%p: adding RX queue %p to list",
-		      (void *)dev, (void *)rxq);
-		(*priv->rxqs)[idx] = rxq;
-		/* Update receive callback. */
-		if (rxq->sp)
-			dev->rx_pkt_burst = mlx4_rx_burst_sp;
-		else
-			dev->rx_pkt_burst = mlx4_rx_burst;
-	}
-	priv_unlock(priv);
-	return -ret;
-}
-
-/**
- * DPDK callback to release a RX queue.
- *
- * @param dpdk_rxq
- *   Generic RX queue pointer.
- */
-static void
-mlx4_rx_queue_release(void *dpdk_rxq)
-{
-	struct rxq *rxq = (struct rxq *)dpdk_rxq;
-	struct priv *priv;
-	unsigned int i;
-
-	if (mlx4_is_secondary())
-		return;
-	if (rxq == NULL)
-		return;
-	priv = rxq->priv;
-	priv_lock(priv);
-	for (i = 0; (i != priv->rxqs_n); ++i)
-		if ((*priv->rxqs)[i] == rxq) {
-			DEBUG("%p: removing RX queue %p from list",
-			      (void *)priv->dev, (void *)rxq);
-			(*priv->rxqs)[i] = NULL;
-			break;
-		}
-	rxq_cleanup(rxq);
-	rte_free(rxq);
-	priv_unlock(priv);
-}
-
-static int
-priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
-
-static int
-priv_dev_removal_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
-
-static int
-priv_dev_link_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
-
-/**
  * DPDK callback to start the device.
  *
- * Simulate device start by attaching all configured flows.
+ * Simulate device start by initializing common RSS resources and attaching
+ * all configured flows.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx4_dev_start(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
-	unsigned int i = 0;
-	unsigned int r;
-	struct rxq *rxq;
+	struct rte_flow_error error;
 	int ret;
 
-	if (mlx4_is_secondary())
-		return -E_RTE_SECONDARY;
-	priv_lock(priv);
-	if (priv->started) {
-		priv_unlock(priv);
+	if (priv->started)
 		return 0;
-	}
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	if (priv->isolated) {
-		rxq = NULL;
-		r = 1;
-	} else if (priv->rss) {
-		rxq = LIST_FIRST(&priv->parents);
-		r = 1;
-	} else {
-		rxq = (*priv->rxqs)[0];
-		r = priv->rxqs_n;
-	}
-	/* Iterate only once when RSS is enabled. */
-	do {
-		/* Ignore nonexistent RX queues. */
-		if (rxq == NULL)
-			continue;
-		ret = rxq_mac_addrs_add(rxq);
-		if (!ret && priv->promisc)
-			ret = rxq_promiscuous_enable(rxq);
-		if (!ret && priv->allmulti)
-			ret = rxq_allmulticast_enable(rxq);
-		if (!ret)
-			continue;
-		WARN("%p: QP flow attachment failed: %s",
-		     (void *)dev, strerror(ret));
-		goto err;
-	} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
-	ret = priv_dev_link_interrupt_handler_install(priv, dev);
+	ret = mlx4_rss_init(priv);
 	if (ret) {
-		ERROR("%p: LSC handler install failed",
-		     (void *)dev);
+		ERROR("%p: cannot initialize RSS resources: %s",
+		      (void *)dev, strerror(-ret));
 		goto err;
 	}
-	ret = priv_dev_removal_interrupt_handler_install(priv, dev);
+	ret = mlx4_intr_install(priv);
 	if (ret) {
-		ERROR("%p: RMV handler install failed",
+		ERROR("%p: interrupt handler installation failed",
 		     (void *)dev);
 		goto err;
 	}
-	ret = priv_rx_intr_vec_enable(priv);
-	if (ret) {
-		ERROR("%p: Rx interrupt vector creation failed",
-		      (void *)dev);
-		goto err;
-	}
-	ret = mlx4_priv_flow_start(priv);
+	ret = mlx4_flow_sync(priv, &error);
 	if (ret) {
-		ERROR("%p: flow start failed: %s",
-		      (void *)dev, strerror(ret));
+		ERROR("%p: cannot attach flow rules (code %d, \"%s\"),"
+		      " flow error type %d, cause %p, message: %s",
+		      (void *)dev,
+		      -ret, strerror(-ret), error.type, error.cause,
+		      error.message ? error.message : "(unspecified)");
 		goto err;
 	}
-	priv_unlock(priv);
+	rte_wmb();
+	dev->tx_pkt_burst = mlx4_tx_burst;
+	dev->rx_pkt_burst = mlx4_rx_burst;
 	return 0;
 err:
 	/* Rollback. */
-	while (i != 0) {
-		rxq = (*priv->rxqs)[i--];
-		if (rxq != NULL) {
-			rxq_allmulticast_disable(rxq);
-			rxq_promiscuous_disable(rxq);
-			rxq_mac_addrs_del(rxq);
-		}
-	}
 	priv->started = 0;
-	priv_unlock(priv);
-	return -ret;
+	return ret;
 }
 
 /**
@@ -4220,102 +178,19 @@ static void
 mlx4_dev_stop(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
-	unsigned int i = 0;
-	unsigned int r;
-	struct rxq *rxq;
 
-	if (mlx4_is_secondary())
-		return;
-	priv_lock(priv);
-	if (!priv->started) {
-		priv_unlock(priv);
+	if (!priv->started)
 		return;
-	}
 	DEBUG("%p: detaching flows from all RX queues", (void *)dev);
 	priv->started = 0;
-	if (priv->isolated) {
-		rxq = NULL;
-		r = 1;
-	} else if (priv->rss) {
-		rxq = LIST_FIRST(&priv->parents);
-		r = 1;
-	} else {
-		rxq = (*priv->rxqs)[0];
-		r = priv->rxqs_n;
-	}
-	mlx4_priv_flow_stop(priv);
-	/* Iterate only once when RSS is enabled. */
-	do {
-		/* Ignore nonexistent RX queues. */
-		if (rxq == NULL)
-			continue;
-		rxq_allmulticast_disable(rxq);
-		rxq_promiscuous_disable(rxq);
-		rxq_mac_addrs_del(rxq);
-	} while ((--r) && ((rxq = (*priv->rxqs)[++i]), i));
-	priv_unlock(priv);
-}
-
-/**
- * Dummy DPDK callback for TX.
- *
- * This function is used to temporarily replace the real callback during
- * unsafe control operations on the queue, or in case of error.
- *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
- * @param[in] pkts
- *   Packets to transmit.
- * @param pkts_n
- *   Number of packets in array.
- *
- * @return
- *   Number of packets successfully transmitted (<= pkts_n).
- */
-static uint16_t
-removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-{
-	(void)dpdk_txq;
-	(void)pkts;
-	(void)pkts_n;
-	return 0;
-}
-
-/**
- * Dummy DPDK callback for RX.
- *
- * This function is used to temporarily replace the real callback during
- * unsafe control operations on the queue, or in case of error.
- *
- * @param dpdk_rxq
- *   Generic pointer to RX queue structure.
- * @param[out] pkts
- *   Array to store received packets.
- * @param pkts_n
- *   Maximum number of packets in array.
- *
- * @return
- *   Number of packets successfully received (<= pkts_n).
- */
-static uint16_t
-removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
-{
-	(void)dpdk_rxq;
-	(void)pkts;
-	(void)pkts_n;
-	return 0;
+	dev->tx_pkt_burst = mlx4_tx_burst_removed;
+	dev->rx_pkt_burst = mlx4_rx_burst_removed;
+	rte_wmb();
+	mlx4_flow_sync(priv, NULL);
+	mlx4_intr_uninstall(priv);
+	mlx4_rss_deinit(priv);
 }
 
-static int
-priv_dev_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *);
-
-static int
-priv_dev_removal_interrupt_handler_uninstall(struct priv *,
-					     struct rte_eth_dev *);
-
-static int
-priv_dev_link_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *);
-
 /**
  * DPDK callback to close the device.
  *
@@ -4327,1047 +202,58 @@ priv_dev_link_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *);
 static void
 mlx4_dev_close(struct rte_eth_dev *dev)
 {
-	struct priv *priv = mlx4_get_priv(dev);
-	void *tmp;
+	struct priv *priv = dev->data->dev_private;
 	unsigned int i;
 
-	if (priv == NULL)
-		return;
-	priv_lock(priv);
 	DEBUG("%p: closing device \"%s\"",
 	      (void *)dev,
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
-	/* Prevent crashes when queues are still in use. This is unfortunately
-	 * still required for DPDK 1.3 because some programs (such as testpmd)
-	 * never release them before closing the device. */
-	dev->rx_pkt_burst = removed_rx_burst;
-	dev->tx_pkt_burst = removed_tx_burst;
-	if (priv->rxqs != NULL) {
-		/* XXX race condition if mlx4_rx_burst() is still running. */
-		usleep(1000);
-		for (i = 0; (i != priv->rxqs_n); ++i) {
-			tmp = (*priv->rxqs)[i];
-			if (tmp == NULL)
-				continue;
-			(*priv->rxqs)[i] = NULL;
-			rxq_cleanup(tmp);
-			rte_free(tmp);
-		}
-		priv->rxqs_n = 0;
-		priv->rxqs = NULL;
-	}
-	if (priv->txqs != NULL) {
-		/* XXX race condition if mlx4_tx_burst() is still running. */
-		usleep(1000);
-		for (i = 0; (i != priv->txqs_n); ++i) {
-			tmp = (*priv->txqs)[i];
-			if (tmp == NULL)
-				continue;
-			(*priv->txqs)[i] = NULL;
-			txq_cleanup(tmp);
-			rte_free(tmp);
-		}
-		priv->txqs_n = 0;
-		priv->txqs = NULL;
-	}
-	if (priv->rss)
-		priv_parent_list_cleanup(priv);
+	dev->rx_pkt_burst = mlx4_rx_burst_removed;
+	dev->tx_pkt_burst = mlx4_tx_burst_removed;
+	rte_wmb();
+	mlx4_flow_clean(priv);
+	for (i = 0; i != dev->data->nb_rx_queues; ++i)
+		mlx4_rx_queue_release(dev->data->rx_queues[i]);
+	for (i = 0; i != dev->data->nb_tx_queues; ++i)
+		mlx4_tx_queue_release(dev->data->tx_queues[i]);
 	if (priv->pd != NULL) {
 		assert(priv->ctx != NULL);
 		claim_zero(ibv_dealloc_pd(priv->pd));
 		claim_zero(ibv_close_device(priv->ctx));
 	} else
 		assert(priv->ctx == NULL);
-	priv_dev_removal_interrupt_handler_uninstall(priv, dev);
-	priv_dev_link_interrupt_handler_uninstall(priv, dev);
-	priv_rx_intr_vec_disable(priv);
-	priv_unlock(priv);
+	mlx4_intr_uninstall(priv);
 	memset(priv, 0, sizeof(*priv));
 }
 
-/**
- * Change the link state (UP / DOWN).
- *
- * @param priv
- *   Pointer to Ethernet device private data.
- * @param up
- *   Nonzero for link up, otherwise link down.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_set_link(struct priv *priv, int up)
-{
-	struct rte_eth_dev *dev = priv->dev;
-	int err;
-	unsigned int i;
-
-	if (up) {
-		err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
-		if (err)
-			return err;
-		for (i = 0; i < priv->rxqs_n; i++)
-			if ((*priv->rxqs)[i]->sp)
-				break;
-		/* Check if an sp queue exists.
-		 * Note: Some old frames might be received.
-		 */
-		if (i == priv->rxqs_n)
-			dev->rx_pkt_burst = mlx4_rx_burst;
-		else
-			dev->rx_pkt_burst = mlx4_rx_burst_sp;
-		dev->tx_pkt_burst = mlx4_tx_burst;
-	} else {
-		err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP);
-		if (err)
-			return err;
-		dev->rx_pkt_burst = removed_rx_burst;
-		dev->tx_pkt_burst = removed_tx_burst;
-	}
-	return 0;
-}
-
-/**
- * DPDK callback to bring the link DOWN.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-mlx4_set_link_down(struct rte_eth_dev *dev)
-{
-	struct priv *priv = dev->data->dev_private;
-	int err;
-
-	priv_lock(priv);
-	err = priv_set_link(priv, 0);
-	priv_unlock(priv);
-	return err;
-}
-
-/**
- * DPDK callback to bring the link UP.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-mlx4_set_link_up(struct rte_eth_dev *dev)
-{
-	struct priv *priv = dev->data->dev_private;
-	int err;
-
-	priv_lock(priv);
-	err = priv_set_link(priv, 1);
-	priv_unlock(priv);
-	return err;
-}
-/**
- * DPDK callback to get information about the device.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param[out] info
- *   Info structure output buffer.
- */
-static void
-mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
-{
-	struct priv *priv = mlx4_get_priv(dev);
-	unsigned int max;
-	char ifname[IF_NAMESIZE];
-
-	info->pci_dev = RTE_ETH_DEV_TO_PCI(dev);
-
-	if (priv == NULL)
-		return;
-	priv_lock(priv);
-	/* FIXME: we should ask the device for these values. */
-	info->min_rx_bufsize = 32;
-	info->max_rx_pktlen = 65536;
-	/*
-	 * Since we need one CQ per QP, the limit is the minimum number
-	 * between the two values.
-	 */
-	max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ?
-	       priv->device_attr.max_qp : priv->device_attr.max_cq);
-	/* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */
-	if (max >= 65535)
-		max = 65535;
-	info->max_rx_queues = max;
-	info->max_tx_queues = max;
-	/* Last array entry is reserved for broadcast. */
-	info->max_mac_addrs = (elemof(priv->mac) - 1);
-	info->rx_offload_capa =
-		(priv->hw_csum ?
-		 (DEV_RX_OFFLOAD_IPV4_CKSUM |
-		  DEV_RX_OFFLOAD_UDP_CKSUM |
-		  DEV_RX_OFFLOAD_TCP_CKSUM) :
-		 0);
-	info->tx_offload_capa =
-		(priv->hw_csum ?
-		 (DEV_TX_OFFLOAD_IPV4_CKSUM |
-		  DEV_TX_OFFLOAD_UDP_CKSUM |
-		  DEV_TX_OFFLOAD_TCP_CKSUM) :
-		 0);
-	if (priv_get_ifname(priv, &ifname) == 0)
-		info->if_index = if_nametoindex(ifname);
-	info->speed_capa =
-			ETH_LINK_SPEED_1G |
-			ETH_LINK_SPEED_10G |
-			ETH_LINK_SPEED_20G |
-			ETH_LINK_SPEED_40G |
-			ETH_LINK_SPEED_56G;
-	priv_unlock(priv);
-}
-
-static const uint32_t *
-mlx4_dev_supported_ptypes_get(struct rte_eth_dev *dev)
-{
-	static const uint32_t ptypes[] = {
-		/* refers to rxq_cq_to_pkt_type() */
-		RTE_PTYPE_L3_IPV4,
-		RTE_PTYPE_L3_IPV6,
-		RTE_PTYPE_INNER_L3_IPV4,
-		RTE_PTYPE_INNER_L3_IPV6,
-		RTE_PTYPE_UNKNOWN
-	};
-
-	if (dev->rx_pkt_burst == mlx4_rx_burst ||
-	    dev->rx_pkt_burst == mlx4_rx_burst_sp)
-		return ptypes;
-	return NULL;
-}
-
-/**
- * DPDK callback to get device statistics.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param[out] stats
- *   Stats structure output buffer.
- */
-static void
-mlx4_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
-{
-	struct priv *priv = mlx4_get_priv(dev);
-	struct rte_eth_stats tmp = {0};
-	unsigned int i;
-	unsigned int idx;
-
-	if (priv == NULL)
-		return;
-	priv_lock(priv);
-	/* Add software counters. */
-	for (i = 0; (i != priv->rxqs_n); ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
-
-		if (rxq == NULL)
-			continue;
-		idx = rxq->stats.idx;
-		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-#ifdef MLX4_PMD_SOFT_COUNTERS
-			tmp.q_ipackets[idx] += rxq->stats.ipackets;
-			tmp.q_ibytes[idx] += rxq->stats.ibytes;
-#endif
-			tmp.q_errors[idx] += (rxq->stats.idropped +
-					      rxq->stats.rx_nombuf);
-		}
-#ifdef MLX4_PMD_SOFT_COUNTERS
-		tmp.ipackets += rxq->stats.ipackets;
-		tmp.ibytes += rxq->stats.ibytes;
-#endif
-		tmp.ierrors += rxq->stats.idropped;
-		tmp.rx_nombuf += rxq->stats.rx_nombuf;
-	}
-	for (i = 0; (i != priv->txqs_n); ++i) {
-		struct txq *txq = (*priv->txqs)[i];
-
-		if (txq == NULL)
-			continue;
-		idx = txq->stats.idx;
-		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-#ifdef MLX4_PMD_SOFT_COUNTERS
-			tmp.q_opackets[idx] += txq->stats.opackets;
-			tmp.q_obytes[idx] += txq->stats.obytes;
-#endif
-			tmp.q_errors[idx] += txq->stats.odropped;
-		}
-#ifdef MLX4_PMD_SOFT_COUNTERS
-		tmp.opackets += txq->stats.opackets;
-		tmp.obytes += txq->stats.obytes;
-#endif
-		tmp.oerrors += txq->stats.odropped;
-	}
-#ifndef MLX4_PMD_SOFT_COUNTERS
-	/* FIXME: retrieve and add hardware counters. */
-#endif
-	*stats = tmp;
-	priv_unlock(priv);
-}
-
-/**
- * DPDK callback to clear device statistics.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- */
-static void
-mlx4_stats_reset(struct rte_eth_dev *dev)
-{
-	struct priv *priv = mlx4_get_priv(dev);
-	unsigned int i;
-	unsigned int idx;
-
-	if (priv == NULL)
-		return;
-	priv_lock(priv);
-	for (i = 0; (i != priv->rxqs_n); ++i) {
-		if ((*priv->rxqs)[i] == NULL)
-			continue;
-		idx = (*priv->rxqs)[i]->stats.idx;
-		(*priv->rxqs)[i]->stats =
-			(struct mlx4_rxq_stats){ .idx = idx };
-	}
-	for (i = 0; (i != priv->txqs_n); ++i) {
-		if ((*priv->txqs)[i] == NULL)
-			continue;
-		idx = (*priv->txqs)[i]->stats.idx;
-		(*priv->txqs)[i]->stats =
-			(struct mlx4_txq_stats){ .idx = idx };
-	}
-#ifndef MLX4_PMD_SOFT_COUNTERS
-	/* FIXME: reset hardware counters. */
-#endif
-	priv_unlock(priv);
-}
-
-/**
- * DPDK callback to remove a MAC address.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param index
- *   MAC address index.
- */
-static void
-mlx4_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
-{
-	struct priv *priv = dev->data->dev_private;
-
-	if (mlx4_is_secondary())
-		return;
-	priv_lock(priv);
-	if (priv->isolated)
-		goto end;
-	DEBUG("%p: removing MAC address from index %" PRIu32,
-	      (void *)dev, index);
-	/* Last array entry is reserved for broadcast. */
-	if (index >= (elemof(priv->mac) - 1))
-		goto end;
-	priv_mac_addr_del(priv, index);
-end:
-	priv_unlock(priv);
-}
-
-/**
- * DPDK callback to add a MAC address.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param mac_addr
- *   MAC address to register.
- * @param index
- *   MAC address index.
- * @param vmdq
- *   VMDq pool index to associate address with (ignored).
- */
-static int
-mlx4_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
-		  uint32_t index, uint32_t vmdq)
-{
-	struct priv *priv = dev->data->dev_private;
-	int re;
-
-	if (mlx4_is_secondary())
-		return -ENOTSUP;
-	(void)vmdq;
-	priv_lock(priv);
-	if (priv->isolated) {
-		DEBUG("%p: cannot add MAC address, "
-		      "device is in isolated mode", (void *)dev);
-		re = EPERM;
-		goto end;
-	}
-	DEBUG("%p: adding MAC address at index %" PRIu32,
-	      (void *)dev, index);
-	/* Last array entry is reserved for broadcast. */
-	if (index >= (elemof(priv->mac) - 1)) {
-		re = EINVAL;
-		goto end;
-	}
-	re = priv_mac_addr_add(priv, index,
-			       (const uint8_t (*)[ETHER_ADDR_LEN])
-			       mac_addr->addr_bytes);
-end:
-	priv_unlock(priv);
-	return -re;
-}
-
-/**
- * DPDK callback to set the primary MAC address.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param mac_addr
- *   MAC address to register.
- */
-static void
-mlx4_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
-{
-	DEBUG("%p: setting primary MAC address", (void *)dev);
-	mlx4_mac_addr_remove(dev, 0);
-	mlx4_mac_addr_add(dev, mac_addr, 0, 0);
-}
-
-/**
- * DPDK callback to enable promiscuous mode.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- */
-static void
-mlx4_promiscuous_enable(struct rte_eth_dev *dev)
-{
-	struct priv *priv = dev->data->dev_private;
-	unsigned int i;
-	int ret;
-
-	if (mlx4_is_secondary())
-		return;
-	priv_lock(priv);
-	if (priv->isolated) {
-		DEBUG("%p: cannot enable promiscuous, "
-		      "device is in isolated mode", (void *)dev);
-		priv_unlock(priv);
-		return;
-	}
-	if (priv->promisc) {
-		priv_unlock(priv);
-		return;
-	}
-	/* If device isn't started, this is all we need to do. */
-	if (!priv->started)
-		goto end;
-	if (priv->rss) {
-		ret = rxq_promiscuous_enable(LIST_FIRST(&priv->parents));
-		if (ret) {
-			priv_unlock(priv);
-			return;
-		}
-		goto end;
-	}
-	for (i = 0; (i != priv->rxqs_n); ++i) {
-		if ((*priv->rxqs)[i] == NULL)
-			continue;
-		ret = rxq_promiscuous_enable((*priv->rxqs)[i]);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			if ((*priv->rxqs)[--i] != NULL)
-				rxq_promiscuous_disable((*priv->rxqs)[i]);
-		priv_unlock(priv);
-		return;
-	}
-end:
-	priv->promisc = 1;
-	priv_unlock(priv);
-}
-
-/**
- * DPDK callback to disable promiscuous mode.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- */
-static void
-mlx4_promiscuous_disable(struct rte_eth_dev *dev)
-{
-	struct priv *priv = dev->data->dev_private;
-	unsigned int i;
-
-	if (mlx4_is_secondary())
-		return;
-	priv_lock(priv);
-	if (!priv->promisc || priv->isolated) {
-		priv_unlock(priv);
-		return;
-	}
-	if (priv->rss) {
-		rxq_promiscuous_disable(LIST_FIRST(&priv->parents));
-		goto end;
-	}
-	for (i = 0; (i != priv->rxqs_n); ++i)
-		if ((*priv->rxqs)[i] != NULL)
-			rxq_promiscuous_disable((*priv->rxqs)[i]);
-end:
-	priv->promisc = 0;
-	priv_unlock(priv);
-}
-
-/**
- * DPDK callback to enable allmulti mode.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- */
-static void
-mlx4_allmulticast_enable(struct rte_eth_dev *dev)
-{
-	struct priv *priv = dev->data->dev_private;
-	unsigned int i;
-	int ret;
-
-	if (mlx4_is_secondary())
-		return;
-	priv_lock(priv);
-	if (priv->isolated) {
-		DEBUG("%p: cannot enable allmulticast, "
-		      "device is in isolated mode", (void *)dev);
-		priv_unlock(priv);
-		return;
-	}
-	if (priv->allmulti) {
-		priv_unlock(priv);
-		return;
-	}
-	/* If device isn't started, this is all we need to do. */
-	if (!priv->started)
-		goto end;
-	if (priv->rss) {
-		ret = rxq_allmulticast_enable(LIST_FIRST(&priv->parents));
-		if (ret) {
-			priv_unlock(priv);
-			return;
-		}
-		goto end;
-	}
-	for (i = 0; (i != priv->rxqs_n); ++i) {
-		if ((*priv->rxqs)[i] == NULL)
-			continue;
-		ret = rxq_allmulticast_enable((*priv->rxqs)[i]);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			if ((*priv->rxqs)[--i] != NULL)
-				rxq_allmulticast_disable((*priv->rxqs)[i]);
-		priv_unlock(priv);
-		return;
-	}
-end:
-	priv->allmulti = 1;
-	priv_unlock(priv);
-}
-
-/**
- * DPDK callback to disable allmulti mode.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- */
-static void
-mlx4_allmulticast_disable(struct rte_eth_dev *dev)
-{
-	struct priv *priv = dev->data->dev_private;
-	unsigned int i;
-
-	if (mlx4_is_secondary())
-		return;
-	priv_lock(priv);
-	if (!priv->allmulti || priv->isolated) {
-		priv_unlock(priv);
-		return;
-	}
-	if (priv->rss) {
-		rxq_allmulticast_disable(LIST_FIRST(&priv->parents));
-		goto end;
-	}
-	for (i = 0; (i != priv->rxqs_n); ++i)
-		if ((*priv->rxqs)[i] != NULL)
-			rxq_allmulticast_disable((*priv->rxqs)[i]);
-end:
-	priv->allmulti = 0;
-	priv_unlock(priv);
-}
-
-/**
- * DPDK callback to retrieve physical link information.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param wait_to_complete
- *   Wait for request completion (ignored).
- */
-static int
-mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete)
-{
-	const struct priv *priv = mlx4_get_priv(dev);
-	struct ethtool_cmd edata = {
-		.cmd = ETHTOOL_GSET
-	};
-	struct ifreq ifr;
-	struct rte_eth_link dev_link;
-	int link_speed = 0;
-
-	/* priv_lock() is not taken to allow concurrent calls. */
-
-	if (priv == NULL)
-		return -EINVAL;
-	(void)wait_to_complete;
-	if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
-		WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
-		return -1;
-	}
-	memset(&dev_link, 0, sizeof(dev_link));
-	dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
-				(ifr.ifr_flags & IFF_RUNNING));
-	ifr.ifr_data = (void *)&edata;
-	if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
-		WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
-		     strerror(errno));
-		return -1;
-	}
-	link_speed = ethtool_cmd_speed(&edata);
-	if (link_speed == -1)
-		dev_link.link_speed = 0;
-	else
-		dev_link.link_speed = link_speed;
-	dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
-				ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
-	dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
-			ETH_LINK_SPEED_FIXED);
-	if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
-		/* Link status changed. */
-		dev->data->dev_link = dev_link;
-		return 0;
-	}
-	/* Link status is still the same. */
-	return -1;
-}
-
-static int
-mlx4_ibv_device_to_pci_addr(const struct ibv_device *device,
-			    struct rte_pci_addr *pci_addr);
-
-/**
- * DPDK callback to change the MTU.
- *
- * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be
- * received). Use this as a hint to enable/disable scattered packets support
- * and improve performance when not needed.
- * Since failure is not an option, reconfiguring queues on the fly is not
- * recommended.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param in_mtu
- *   New MTU.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-mlx4_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
-{
-	struct priv *priv = dev->data->dev_private;
-	int ret = 0;
-	unsigned int i;
-	uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
-		mlx4_rx_burst;
-
-	if (mlx4_is_secondary())
-		return -E_RTE_SECONDARY;
-	priv_lock(priv);
-	/* Set kernel interface MTU first. */
-	if (priv_set_mtu(priv, mtu)) {
-		ret = errno;
-		WARN("cannot set port %u MTU to %u: %s", priv->port, mtu,
-		     strerror(ret));
-		goto out;
-	} else
-		DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
-	priv->mtu = mtu;
-	/* Temporarily replace RX handler with a fake one, assuming it has not
-	 * been copied elsewhere. */
-	dev->rx_pkt_burst = removed_rx_burst;
-	/* Make sure everyone has left mlx4_rx_burst() and uses
-	 * removed_rx_burst() instead. */
-	rte_wmb();
-	usleep(1000);
-	/* Reconfigure each RX queue. */
-	for (i = 0; (i != priv->rxqs_n); ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
-		unsigned int max_frame_len;
-
-		if (rxq == NULL)
-			continue;
-		/* Calculate new maximum frame length according to MTU. */
-		max_frame_len = (priv->mtu + ETHER_HDR_LEN +
-				 (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN));
-		/* Provide new values to rxq_setup(). */
-		dev->data->dev_conf.rxmode.jumbo_frame =
-			(max_frame_len > ETHER_MAX_LEN);
-		dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len;
-		ret = rxq_rehash(dev, rxq);
-		if (ret) {
-			/* Force SP RX if that queue requires it and abort. */
-			if (rxq->sp)
-				rx_func = mlx4_rx_burst_sp;
-			break;
-		}
-		/* Reenable non-RSS queue attributes. No need to check
-		 * for errors at this stage. */
-		if (!priv->rss && !priv->isolated) {
-			rxq_mac_addrs_add(rxq);
-			if (priv->promisc)
-				rxq_promiscuous_enable(rxq);
-			if (priv->allmulti)
-				rxq_allmulticast_enable(rxq);
-		}
-		/* Scattered burst function takes priority. */
-		if (rxq->sp)
-			rx_func = mlx4_rx_burst_sp;
-	}
-	/* Burst functions can now be called again. */
-	rte_wmb();
-	dev->rx_pkt_burst = rx_func;
-out:
-	priv_unlock(priv);
-	assert(ret >= 0);
-	return -ret;
-}
-
-/**
- * DPDK callback to get flow control status.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param[out] fc_conf
- *   Flow control output buffer.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-mlx4_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct ifreq ifr;
-	struct ethtool_pauseparam ethpause = {
-		.cmd = ETHTOOL_GPAUSEPARAM
-	};
-	int ret;
-
-	if (mlx4_is_secondary())
-		return -E_RTE_SECONDARY;
-	ifr.ifr_data = (void *)&ethpause;
-	priv_lock(priv);
-	if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
-		ret = errno;
-		WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)"
-		     " failed: %s",
-		     strerror(ret));
-		goto out;
-	}
-
-	fc_conf->autoneg = ethpause.autoneg;
-	if (ethpause.rx_pause && ethpause.tx_pause)
-		fc_conf->mode = RTE_FC_FULL;
-	else if (ethpause.rx_pause)
-		fc_conf->mode = RTE_FC_RX_PAUSE;
-	else if (ethpause.tx_pause)
-		fc_conf->mode = RTE_FC_TX_PAUSE;
-	else
-		fc_conf->mode = RTE_FC_NONE;
-	ret = 0;
-
-out:
-	priv_unlock(priv);
-	assert(ret >= 0);
-	return -ret;
-}
-
-/**
- * DPDK callback to modify flow control parameters.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param[in] fc_conf
- *   Flow control parameters.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-mlx4_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct ifreq ifr;
-	struct ethtool_pauseparam ethpause = {
-		.cmd = ETHTOOL_SPAUSEPARAM
-	};
-	int ret;
-
-	if (mlx4_is_secondary())
-		return -E_RTE_SECONDARY;
-	ifr.ifr_data = (void *)&ethpause;
-	ethpause.autoneg = fc_conf->autoneg;
-	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
-	    (fc_conf->mode & RTE_FC_RX_PAUSE))
-		ethpause.rx_pause = 1;
-	else
-		ethpause.rx_pause = 0;
-
-	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
-	    (fc_conf->mode & RTE_FC_TX_PAUSE))
-		ethpause.tx_pause = 1;
-	else
-		ethpause.tx_pause = 0;
-
-	priv_lock(priv);
-	if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
-		ret = errno;
-		WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
-		     " failed: %s",
-		     strerror(ret));
-		goto out;
-	}
-	ret = 0;
-
-out:
-	priv_unlock(priv);
-	assert(ret >= 0);
-	return -ret;
-}
-
-/**
- * Configure a VLAN filter.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param vlan_id
- *   VLAN ID to filter.
- * @param on
- *   Toggle filter.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
-{
-	struct priv *priv = dev->data->dev_private;
-	unsigned int i;
-	unsigned int j = -1;
-
-	DEBUG("%p: %s VLAN filter ID %" PRIu16,
-	      (void *)dev, (on ? "enable" : "disable"), vlan_id);
-	for (i = 0; (i != elemof(priv->vlan_filter)); ++i) {
-		if (!priv->vlan_filter[i].enabled) {
-			/* Unused index, remember it. */
-			j = i;
-			continue;
-		}
-		if (priv->vlan_filter[i].id != vlan_id)
-			continue;
-		/* This VLAN ID is already known, use its index. */
-		j = i;
-		break;
-	}
-	/* Check if there's room for another VLAN filter. */
-	if (j == (unsigned int)-1)
-		return ENOMEM;
-	/*
-	 * VLAN filters apply to all configured MAC addresses, flow
-	 * specifications must be reconfigured accordingly.
-	 */
-	priv->vlan_filter[j].id = vlan_id;
-	if ((on) && (!priv->vlan_filter[j].enabled)) {
-		/*
-		 * Filter is disabled, enable it.
-		 * Rehashing flows in all RX queues is necessary.
-		 */
-		if (priv->rss)
-			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
-		else
-			for (i = 0; (i != priv->rxqs_n); ++i)
-				if ((*priv->rxqs)[i] != NULL)
-					rxq_mac_addrs_del((*priv->rxqs)[i]);
-		priv->vlan_filter[j].enabled = 1;
-		if (priv->started) {
-			if (priv->rss)
-				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
-			else
-				for (i = 0; (i != priv->rxqs_n); ++i) {
-					if ((*priv->rxqs)[i] == NULL)
-						continue;
-					rxq_mac_addrs_add((*priv->rxqs)[i]);
-				}
-		}
-	} else if ((!on) && (priv->vlan_filter[j].enabled)) {
-		/*
-		 * Filter is enabled, disable it.
-		 * Rehashing flows in all RX queues is necessary.
-		 */
-		if (priv->rss)
-			rxq_mac_addrs_del(LIST_FIRST(&priv->parents));
-		else
-			for (i = 0; (i != priv->rxqs_n); ++i)
-				if ((*priv->rxqs)[i] != NULL)
-					rxq_mac_addrs_del((*priv->rxqs)[i]);
-		priv->vlan_filter[j].enabled = 0;
-		if (priv->started) {
-			if (priv->rss)
-				rxq_mac_addrs_add(LIST_FIRST(&priv->parents));
-			else
-				for (i = 0; (i != priv->rxqs_n); ++i) {
-					if ((*priv->rxqs)[i] == NULL)
-						continue;
-					rxq_mac_addrs_add((*priv->rxqs)[i]);
-				}
-		}
-	}
-	return 0;
-}
-
-/**
- * DPDK callback to configure a VLAN filter.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param vlan_id
- *   VLAN ID to filter.
- * @param on
- *   Toggle filter.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-mlx4_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
-{
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
-	if (mlx4_is_secondary())
-		return -E_RTE_SECONDARY;
-	priv_lock(priv);
-	if (priv->isolated) {
-		DEBUG("%p: cannot set vlan filter, "
-		      "device is in isolated mode", (void *)dev);
-		priv_unlock(priv);
-		return -EINVAL;
-	}
-	ret = vlan_filter_set(dev, vlan_id, on);
-	priv_unlock(priv);
-	assert(ret >= 0);
-	return -ret;
-}
-
-const struct rte_flow_ops mlx4_flow_ops = {
-	.validate = mlx4_flow_validate,
-	.create = mlx4_flow_create,
-	.destroy = mlx4_flow_destroy,
-	.flush = mlx4_flow_flush,
-	.query = NULL,
-	.isolate = mlx4_flow_isolate,
-};
-
-/**
- * Manage filter operations.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param filter_type
- *   Filter type.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-mlx4_dev_filter_ctrl(struct rte_eth_dev *dev,
-		     enum rte_filter_type filter_type,
-		     enum rte_filter_op filter_op,
-		     void *arg)
-{
-	int ret = EINVAL;
-
-	switch (filter_type) {
-	case RTE_ETH_FILTER_GENERIC:
-		if (filter_op != RTE_ETH_FILTER_GET)
-			return -EINVAL;
-		*(const void **)arg = &mlx4_flow_ops;
-		return 0;
-	case RTE_ETH_FILTER_FDIR:
-		DEBUG("%p: filter type FDIR is not supported by this PMD",
-		      (void *)dev);
-		break;
-	default:
-		ERROR("%p: filter type (%d) not supported",
-		      (void *)dev, filter_type);
-		break;
-	}
-	return -ret;
-}
-
 static const struct eth_dev_ops mlx4_dev_ops = {
 	.dev_configure = mlx4_dev_configure,
 	.dev_start = mlx4_dev_start,
 	.dev_stop = mlx4_dev_stop,
-	.dev_set_link_down = mlx4_set_link_down,
-	.dev_set_link_up = mlx4_set_link_up,
+	.dev_set_link_down = mlx4_dev_set_link_down,
+	.dev_set_link_up = mlx4_dev_set_link_up,
 	.dev_close = mlx4_dev_close,
+	.link_update = mlx4_link_update,
 	.promiscuous_enable = mlx4_promiscuous_enable,
 	.promiscuous_disable = mlx4_promiscuous_disable,
 	.allmulticast_enable = mlx4_allmulticast_enable,
 	.allmulticast_disable = mlx4_allmulticast_disable,
-	.link_update = mlx4_link_update,
+	.mac_addr_remove = mlx4_mac_addr_remove,
+	.mac_addr_add = mlx4_mac_addr_add,
+	.mac_addr_set = mlx4_mac_addr_set,
 	.stats_get = mlx4_stats_get,
 	.stats_reset = mlx4_stats_reset,
-	.queue_stats_mapping_set = NULL,
 	.dev_infos_get = mlx4_dev_infos_get,
 	.dev_supported_ptypes_get = mlx4_dev_supported_ptypes_get,
 	.vlan_filter_set = mlx4_vlan_filter_set,
-	.vlan_tpid_set = NULL,
-	.vlan_strip_queue_set = NULL,
-	.vlan_offload_set = NULL,
 	.rx_queue_setup = mlx4_rx_queue_setup,
 	.tx_queue_setup = mlx4_tx_queue_setup,
 	.rx_queue_release = mlx4_rx_queue_release,
 	.tx_queue_release = mlx4_tx_queue_release,
-	.dev_led_on = NULL,
-	.dev_led_off = NULL,
-	.flow_ctrl_get = mlx4_dev_get_flow_ctrl,
-	.flow_ctrl_set = mlx4_dev_set_flow_ctrl,
-	.priority_flow_ctrl_set = NULL,
-	.mac_addr_remove = mlx4_mac_addr_remove,
-	.mac_addr_add = mlx4_mac_addr_add,
-	.mac_addr_set = mlx4_mac_addr_set,
-	.mtu_set = mlx4_dev_set_mtu,
-	.filter_ctrl = mlx4_dev_filter_ctrl,
+	.flow_ctrl_get = mlx4_flow_ctrl_get,
+	.flow_ctrl_set = mlx4_flow_ctrl_set,
+	.mtu_set = mlx4_mtu_set,
+	.filter_ctrl = mlx4_filter_ctrl,
 	.rx_queue_intr_enable = mlx4_rx_intr_enable,
 	.rx_queue_intr_disable = mlx4_rx_intr_disable,
 };
@@ -5381,7 +267,7 @@ static const struct eth_dev_ops mlx4_dev_ops = {
  *   PCI bus address output buffer.
  *
  * @return
- *   0 on success, -1 on failure and errno is set.
+ *   0 on success, negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx4_ibv_device_to_pci_addr(const struct ibv_device *device,
@@ -5392,8 +278,10 @@ mlx4_ibv_device_to_pci_addr(const struct ibv_device *device,
 	MKSTR(path, "%s/device/uevent", device->ibdev_path);
 
 	file = fopen(path, "rb");
-	if (file == NULL)
-		return -1;
+	if (file == NULL) {
+		rte_errno = errno;
+		return -rte_errno;
+	}
 	while (fgets(line, sizeof(line), file) == line) {
 		size_t len = strlen(line);
 		int ret;
@@ -5423,572 +311,48 @@ mlx4_ibv_device_to_pci_addr(const struct ibv_device *device,
 }
 
 /**
- * Get MAC address by querying netdevice.
- *
- * @param[in] priv
- *   struct priv for the requested device.
- * @param[out] mac
- *   MAC address output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
-{
-	struct ifreq request;
-
-	if (priv_ifreq(priv, SIOCGIFHWADDR, &request))
-		return -1;
-	memcpy(mac, request.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
-	return 0;
-}
-
-/* Support up to 32 adapters. */
-static struct {
-	struct rte_pci_addr pci_addr; /* associated PCI address */
-	uint32_t ports; /* physical ports bitfield. */
-} mlx4_dev[32];
-
-/**
- * Get device index in mlx4_dev[] from PCI bus address.
- *
- * @param[in] pci_addr
- *   PCI bus address to look for.
- *
- * @return
- *   mlx4_dev[] index on success, -1 on failure.
- */
-static int
-mlx4_dev_idx(struct rte_pci_addr *pci_addr)
-{
-	unsigned int i;
-	int ret = -1;
-
-	assert(pci_addr != NULL);
-	for (i = 0; (i != elemof(mlx4_dev)); ++i) {
-		if ((mlx4_dev[i].pci_addr.domain == pci_addr->domain) &&
-		    (mlx4_dev[i].pci_addr.bus == pci_addr->bus) &&
-		    (mlx4_dev[i].pci_addr.devid == pci_addr->devid) &&
-		    (mlx4_dev[i].pci_addr.function == pci_addr->function))
-			return i;
-		if ((mlx4_dev[i].ports == 0) && (ret == -1))
-			ret = i;
-	}
-	return ret;
-}
-
-/**
- * Retrieve integer value from environment variable.
- *
- * @param[in] name
- *   Environment variable name.
- *
- * @return
- *   Integer value, 0 if the variable is not set.
- */
-static int
-mlx4_getenv_int(const char *name)
-{
-	const char *val = getenv(name);
-
-	if (val == NULL)
-		return 0;
-	return atoi(val);
-}
-
-static void
-mlx4_dev_link_status_handler(void *);
-static void
-mlx4_dev_interrupt_handler(void *);
-
-/**
- * Link/device status handler.
- *
- * @param priv
- *   Pointer to private structure.
- * @param dev
- *   Pointer to the rte_eth_dev structure.
- * @param events
- *   Pointer to event flags holder.
- *
- * @return
- *   Number of events
- */
-static int
-priv_dev_status_handler(struct priv *priv, struct rte_eth_dev *dev,
-			uint32_t *events)
-{
-	struct ibv_async_event event;
-	int port_change = 0;
-	struct rte_eth_link *link = &dev->data->dev_link;
-	int ret = 0;
-
-	*events = 0;
-	/* Read all message and acknowledge them. */
-	for (;;) {
-		if (ibv_get_async_event(priv->ctx, &event))
-			break;
-		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
-		     event.event_type == IBV_EVENT_PORT_ERR) &&
-		    (priv->intr_conf.lsc == 1)) {
-			port_change = 1;
-			ret++;
-		} else if (event.event_type == IBV_EVENT_DEVICE_FATAL &&
-			   priv->intr_conf.rmv == 1) {
-			*events |= (1 << RTE_ETH_EVENT_INTR_RMV);
-			ret++;
-		} else
-			DEBUG("event type %d on port %d not handled",
-			      event.event_type, event.element.port_num);
-		ibv_ack_async_event(&event);
-	}
-	if (!port_change)
-		return ret;
-	mlx4_link_update(dev, 0);
-	if (((link->link_speed == 0) && link->link_status) ||
-	    ((link->link_speed != 0) && !link->link_status)) {
-		if (!priv->pending_alarm) {
-			/* Inconsistent status, check again later. */
-			priv->pending_alarm = 1;
-			rte_eal_alarm_set(MLX4_ALARM_TIMEOUT_US,
-					  mlx4_dev_link_status_handler,
-					  dev);
-		}
-	} else {
-		*events |= (1 << RTE_ETH_EVENT_INTR_LSC);
-	}
-	return ret;
-}
-
-/**
- * Handle delayed link status event.
- *
- * @param arg
- *   Registered argument.
- */
-static void
-mlx4_dev_link_status_handler(void *arg)
-{
-	struct rte_eth_dev *dev = arg;
-	struct priv *priv = dev->data->dev_private;
-	uint32_t events;
-	int ret;
-
-	priv_lock(priv);
-	assert(priv->pending_alarm == 1);
-	priv->pending_alarm = 0;
-	ret = priv_dev_status_handler(priv, dev, &events);
-	priv_unlock(priv);
-	if (ret > 0 && events & (1 << RTE_ETH_EVENT_INTR_LSC))
-		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
-					      NULL);
-}
-
-/**
- * Handle interrupts from the NIC.
- *
- * @param[in] intr_handle
- *   Interrupt handler.
- * @param cb_arg
- *   Callback argument.
- */
-static void
-mlx4_dev_interrupt_handler(void *cb_arg)
-{
-	struct rte_eth_dev *dev = cb_arg;
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-	uint32_t ev;
-	int i;
-
-	priv_lock(priv);
-	ret = priv_dev_status_handler(priv, dev, &ev);
-	priv_unlock(priv);
-	if (ret > 0) {
-		for (i = RTE_ETH_EVENT_UNKNOWN;
-		     i < RTE_ETH_EVENT_MAX;
-		     i++) {
-			if (ev & (1 << i)) {
-				ev &= ~(1 << i);
-				_rte_eth_dev_callback_process(dev, i, NULL,
-							      NULL);
-				ret--;
-			}
-		}
-		if (ret)
-			WARN("%d event%s not processed", ret,
-			     (ret > 1 ? "s were" : " was"));
-	}
-}
-
-/**
- * Uninstall interrupt handler.
- *
- * @param priv
- *   Pointer to private structure.
- * @param dev
- *   Pointer to the rte_eth_dev structure.
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
-{
-	int ret;
-
-	if (priv->intr_conf.lsc ||
-	    priv->intr_conf.rmv)
-		return 0;
-	ret = rte_intr_callback_unregister(&priv->intr_handle,
-					   mlx4_dev_interrupt_handler,
-					   dev);
-	if (ret < 0) {
-		ERROR("rte_intr_callback_unregister failed with %d"
-		      "%s%s%s", ret,
-		      (errno ? " (errno: " : ""),
-		      (errno ? strerror(errno) : ""),
-		      (errno ? ")" : ""));
-	}
-	priv->intr_handle.fd = 0;
-	priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
-	return ret;
-}
-
-/**
- * Install interrupt handler.
- *
- * @param priv
- *   Pointer to private structure.
- * @param dev
- *   Pointer to the rte_eth_dev structure.
- * @return
- *   0 on success, negative errno value on failure.
- */
-static int
-priv_dev_interrupt_handler_install(struct priv *priv,
-				   struct rte_eth_dev *dev)
-{
-	int flags;
-	int rc;
-
-	/* Check whether the interrupt handler has already been installed
-	 * for either type of interrupt
-	 */
-	if (priv->intr_conf.lsc &&
-	    priv->intr_conf.rmv &&
-	    priv->intr_handle.fd)
-		return 0;
-	assert(priv->ctx->async_fd > 0);
-	flags = fcntl(priv->ctx->async_fd, F_GETFL);
-	rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
-	if (rc < 0) {
-		INFO("failed to change file descriptor async event queue");
-		dev->data->dev_conf.intr_conf.lsc = 0;
-		dev->data->dev_conf.intr_conf.rmv = 0;
-		return -errno;
-	} else {
-		priv->intr_handle.fd = priv->ctx->async_fd;
-		priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
-		rc = rte_intr_callback_register(&priv->intr_handle,
-						 mlx4_dev_interrupt_handler,
-						 dev);
-		if (rc) {
-			ERROR("rte_intr_callback_register failed "
-			      " (errno: %s)", strerror(errno));
-			return rc;
-		}
-	}
-	return 0;
-}
-
-/**
- * Uninstall interrupt handler.
- *
- * @param priv
- *   Pointer to private structure.
- * @param dev
- *   Pointer to the rte_eth_dev structure.
- * @return
- *   0 on success, negative value on error.
- */
-static int
-priv_dev_removal_interrupt_handler_uninstall(struct priv *priv,
-					    struct rte_eth_dev *dev)
-{
-	if (dev->data->dev_conf.intr_conf.rmv) {
-		priv->intr_conf.rmv = 0;
-		return priv_dev_interrupt_handler_uninstall(priv, dev);
-	}
-	return 0;
-}
-
-/**
- * Uninstall interrupt handler.
- *
- * @param priv
- *   Pointer to private structure.
- * @param dev
- *   Pointer to the rte_eth_dev structure.
- * @return
- *   0 on success, negative value on error,
- */
-static int
-priv_dev_link_interrupt_handler_uninstall(struct priv *priv,
-					  struct rte_eth_dev *dev)
-{
-	int ret = 0;
-
-	if (dev->data->dev_conf.intr_conf.lsc) {
-		priv->intr_conf.lsc = 0;
-		ret = priv_dev_interrupt_handler_uninstall(priv, dev);
-		if (ret)
-			return ret;
-	}
-	if (priv->pending_alarm)
-		if (rte_eal_alarm_cancel(mlx4_dev_link_status_handler,
-					 dev)) {
-			ERROR("rte_eal_alarm_cancel failed "
-			      " (errno: %s)", strerror(rte_errno));
-			return -rte_errno;
-		}
-	priv->pending_alarm = 0;
-	return 0;
-}
-
-/**
- * Install link interrupt handler.
- *
- * @param priv
- *   Pointer to private structure.
- * @param dev
- *   Pointer to the rte_eth_dev structure.
- * @return
- *   0 on success, negative value on error.
- */
-static int
-priv_dev_link_interrupt_handler_install(struct priv *priv,
-					struct rte_eth_dev *dev)
-{
-	int ret;
-
-	if (dev->data->dev_conf.intr_conf.lsc) {
-		ret = priv_dev_interrupt_handler_install(priv, dev);
-		if (ret)
-			return ret;
-		priv->intr_conf.lsc = 1;
-	}
-	return 0;
-}
-
-/**
- * Install removal interrupt handler.
- *
- * @param priv
- *   Pointer to private structure.
- * @param dev
- *   Pointer to the rte_eth_dev structure.
- * @return
- *   0 on success, negative value on error.
- */
-static int
-priv_dev_removal_interrupt_handler_install(struct priv *priv,
-					   struct rte_eth_dev *dev)
-{
-	int ret;
-
-	if (dev->data->dev_conf.intr_conf.rmv) {
-		ret = priv_dev_interrupt_handler_install(priv, dev);
-		if (ret)
-			return ret;
-		priv->intr_conf.rmv = 1;
-	}
-	return 0;
-}
-
-/**
- * Allocate queue vector and fill epoll fd list for Rx interrupts.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, negative on failure.
- */
-static int
-priv_rx_intr_vec_enable(struct priv *priv)
-{
-	unsigned int i;
-	unsigned int rxqs_n = priv->rxqs_n;
-	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
-	unsigned int count = 0;
-	struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
-
-	if (!priv->dev->data->dev_conf.intr_conf.rxq)
-		return 0;
-	priv_rx_intr_vec_disable(priv);
-	intr_handle->intr_vec = malloc(sizeof(intr_handle->intr_vec[rxqs_n]));
-	if (intr_handle->intr_vec == NULL) {
-		ERROR("failed to allocate memory for interrupt vector,"
-		      " Rx interrupts will not be supported");
-		return -ENOMEM;
-	}
-	intr_handle->type = RTE_INTR_HANDLE_EXT;
-	for (i = 0; i != n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
-		int fd;
-		int flags;
-		int rc;
-
-		/* Skip queues that cannot request interrupts. */
-		if (!rxq || !rxq->channel) {
-			/* Use invalid intr_vec[] index to disable entry. */
-			intr_handle->intr_vec[i] =
-				RTE_INTR_VEC_RXTX_OFFSET +
-				RTE_MAX_RXTX_INTR_VEC_ID;
-			continue;
-		}
-		if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
-			ERROR("too many Rx queues for interrupt vector size"
-			      " (%d), Rx interrupts cannot be enabled",
-			      RTE_MAX_RXTX_INTR_VEC_ID);
-			priv_rx_intr_vec_disable(priv);
-			return -1;
-		}
-		fd = rxq->channel->fd;
-		flags = fcntl(fd, F_GETFL);
-		rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
-		if (rc < 0) {
-			ERROR("failed to make Rx interrupt file descriptor"
-			      " %d non-blocking for queue index %d", fd, i);
-			priv_rx_intr_vec_disable(priv);
-			return rc;
-		}
-		intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
-		intr_handle->efds[count] = fd;
-		count++;
-	}
-	if (!count)
-		priv_rx_intr_vec_disable(priv);
-	else
-		intr_handle->nb_efd = count;
-	return 0;
-}
-
-/**
- * Clean up Rx interrupts handler.
- *
- * @param priv
- *   Pointer to private structure.
- */
-static void
-priv_rx_intr_vec_disable(struct priv *priv)
-{
-	struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
-
-	rte_intr_free_epoll_fd(intr_handle);
-	free(intr_handle->intr_vec);
-	intr_handle->nb_efd = 0;
-	intr_handle->intr_vec = NULL;
-}
-
-/**
- * DPDK callback for Rx queue interrupt enable.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param idx
- *   Rx queue index.
- *
- * @return
- *   0 on success, negative on failure.
- */
-static int
-mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	int ret;
-
-	if (!rxq || !rxq->channel)
-		ret = EINVAL;
-	else
-		ret = ibv_req_notify_cq(rxq->cq, 0);
-	if (ret)
-		WARN("unable to arm interrupt on rx queue %d", idx);
-	return -ret;
-}
-
-/**
- * DPDK callback for Rx queue interrupt disable.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param idx
- *   Rx queue index.
- *
- * @return
- *   0 on success, negative on failure.
- */
-static int
-mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	struct ibv_cq *ev_cq;
-	void *ev_ctx;
-	int ret;
-
-	if (!rxq || !rxq->channel) {
-		ret = EINVAL;
-	} else {
-		ret = ibv_get_cq_event(rxq->cq->channel, &ev_cq, &ev_ctx);
-		if (ret || ev_cq != rxq->cq)
-			ret = EINVAL;
-	}
-	if (ret)
-		WARN("unable to disable interrupt on rx queue %d",
-		     idx);
-	else
-		ibv_ack_cq_events(rxq->cq, 1);
-	return -ret;
-}
-
-/**
  * Verify and store value for device argument.
  *
  * @param[in] key
  *   Key argument to verify.
  * @param[in] val
  *   Value associated with key.
- * @param out
- *   User data.
+ * @param[in, out] conf
+ *   Shared configuration data.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_arg_parse(const char *key, const char *val, void *out)
+mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf)
 {
-	struct mlx4_conf *conf = out;
 	unsigned long tmp;
 
 	errno = 0;
 	tmp = strtoul(val, NULL, 0);
 	if (errno) {
+		rte_errno = errno;
 		WARN("%s: \"%s\" is not a valid integer", key, val);
-		return -errno;
+		return -rte_errno;
 	}
 	if (strcmp(MLX4_PMD_PORT_KVARG, key) == 0) {
-		if (tmp >= MLX4_PMD_MAX_PHYS_PORTS) {
-			ERROR("invalid port index %lu (max: %u)",
-				tmp, MLX4_PMD_MAX_PHYS_PORTS - 1);
+		uint32_t ports = rte_log2_u32(conf->ports.present);
+
+		if (tmp >= ports) {
+			ERROR("port index %lu outside range [0,%" PRIu32 ")",
+			      tmp, ports);
 			return -EINVAL;
 		}
-		conf->active_ports |= 1 << tmp;
+		if (!(conf->ports.present & (1 << tmp))) {
+			rte_errno = EINVAL;
+			ERROR("invalid port index %lu", tmp);
+			return -rte_errno;
+		}
+		conf->ports.enabled |= 1 << tmp;
 	} else {
+		rte_errno = EINVAL;
 		WARN("%s: unknown parameter", key);
-		return -EINVAL;
+		return -rte_errno;
 	}
 	return 0;
 }
@@ -6000,7 +364,7 @@ mlx4_arg_parse(const char *key, const char *val, void *out)
  *   Device arguments structure.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx4_args(struct rte_devargs *devargs, struct mlx4_conf *conf)
@@ -6014,15 +378,21 @@ mlx4_args(struct rte_devargs *devargs, struct mlx4_conf *conf)
 		return 0;
 	kvlist = rte_kvargs_parse(devargs->args, pmd_mlx4_init_params);
 	if (kvlist == NULL) {
+		rte_errno = EINVAL;
 		ERROR("failed to parse kvargs");
-		return -EINVAL;
+		return -rte_errno;
 	}
 	/* Process parameters. */
 	for (i = 0; pmd_mlx4_init_params[i]; ++i) {
 		arg_count = rte_kvargs_count(kvlist, MLX4_PMD_PORT_KVARG);
 		while (arg_count-- > 0) {
-			ret = rte_kvargs_process(kvlist, MLX4_PMD_PORT_KVARG,
-					mlx4_arg_parse, conf);
+			ret = rte_kvargs_process(kvlist,
+						 MLX4_PMD_PORT_KVARG,
+						 (int (*)(const char *,
+							  const char *,
+							  void *))
+						 mlx4_arg_parse,
+						 conf);
 			if (ret != 0)
 				goto free_kvlist;
 		}
@@ -6046,7 +416,7 @@ static struct rte_pci_driver mlx4_driver;
  *   PCI device information.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
@@ -6057,30 +427,20 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 	struct ibv_context *attr_ctx = NULL;
 	struct ibv_device_attr device_attr;
 	struct mlx4_conf conf = {
-		.active_ports = 0,
+		.ports.present = 0,
 	};
 	unsigned int vf;
-	int idx;
 	int i;
 
 	(void)pci_drv;
 	assert(pci_drv == &mlx4_driver);
-	/* Get mlx4_dev[] index. */
-	idx = mlx4_dev_idx(&pci_dev->addr);
-	if (idx == -1) {
-		ERROR("this driver cannot support any more adapters");
-		return -ENOMEM;
-	}
-	DEBUG("using driver device index %d", idx);
-
-	/* Save PCI address. */
-	mlx4_dev[idx].pci_addr = pci_dev->addr;
 	list = ibv_get_device_list(&i);
 	if (list == NULL) {
-		assert(errno);
-		if (errno == ENOSYS)
+		rte_errno = errno;
+		assert(rte_errno);
+		if (rte_errno == ENOSYS)
 			ERROR("cannot list devices, is ib_uverbs loaded?");
-		return -errno;
+		return -rte_errno;
 	}
 	assert(i >= 0);
 	/*
@@ -6111,190 +471,112 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		ibv_free_device_list(list);
 		switch (err) {
 		case 0:
+			rte_errno = ENODEV;
 			ERROR("cannot access device, is mlx4_ib loaded?");
-			return -ENODEV;
+			return -rte_errno;
 		case EINVAL:
+			rte_errno = EINVAL;
 			ERROR("cannot use device, are drivers up to date?");
-			return -EINVAL;
+			return -rte_errno;
 		}
 		assert(err > 0);
-		return -err;
+		rte_errno = err;
+		return -rte_errno;
 	}
 	ibv_dev = list[i];
-
 	DEBUG("device opened");
 	if (ibv_query_device(attr_ctx, &device_attr)) {
-		err = ENODEV;
+		rte_errno = ENODEV;
 		goto error;
 	}
 	INFO("%u port(s) detected", device_attr.phys_port_cnt);
-
+	conf.ports.present |= (UINT64_C(1) << device_attr.phys_port_cnt) - 1;
 	if (mlx4_args(pci_dev->device.devargs, &conf)) {
 		ERROR("failed to process device arguments");
-		err = EINVAL;
+		rte_errno = EINVAL;
 		goto error;
 	}
 	/* Use all ports when none are defined */
-	if (conf.active_ports == 0) {
-		for (i = 0; i < MLX4_PMD_MAX_PHYS_PORTS; i++)
-			conf.active_ports |= 1 << i;
-	}
+	if (!conf.ports.enabled)
+		conf.ports.enabled = conf.ports.present;
 	for (i = 0; i < device_attr.phys_port_cnt; i++) {
 		uint32_t port = i + 1; /* ports are indexed from one */
-		uint32_t test = (1 << i);
 		struct ibv_context *ctx = NULL;
 		struct ibv_port_attr port_attr;
 		struct ibv_pd *pd = NULL;
 		struct priv *priv = NULL;
 		struct rte_eth_dev *eth_dev = NULL;
-#ifdef HAVE_EXP_QUERY_DEVICE
-		struct ibv_exp_device_attr exp_device_attr;
-#endif /* HAVE_EXP_QUERY_DEVICE */
 		struct ether_addr mac;
 
-		/* If port is not active, skip. */
-		if (!(conf.active_ports & (1 << i)))
+		/* If port is not enabled, skip. */
+		if (!(conf.ports.enabled & (1 << i)))
 			continue;
-#ifdef HAVE_EXP_QUERY_DEVICE
-		exp_device_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS;
-#ifdef RSS_SUPPORT
-		exp_device_attr.comp_mask |= IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ;
-#endif /* RSS_SUPPORT */
-#endif /* HAVE_EXP_QUERY_DEVICE */
-
-		DEBUG("using port %u (%08" PRIx32 ")", port, test);
-
+		DEBUG("using port %u", port);
 		ctx = ibv_open_device(ibv_dev);
 		if (ctx == NULL) {
-			err = ENODEV;
+			rte_errno = ENODEV;
 			goto port_error;
 		}
-
 		/* Check port status. */
 		err = ibv_query_port(ctx, port, &port_attr);
 		if (err) {
-			ERROR("port query failed: %s", strerror(err));
-			err = ENODEV;
+			rte_errno = err;
+			ERROR("port query failed: %s", strerror(rte_errno));
 			goto port_error;
 		}
-
 		if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
+			rte_errno = ENOTSUP;
 			ERROR("port %d is not configured in Ethernet mode",
 			      port);
-			err = EINVAL;
 			goto port_error;
 		}
-
 		if (port_attr.state != IBV_PORT_ACTIVE)
 			DEBUG("port %d is not active: \"%s\" (%d)",
 			      port, ibv_port_state_str(port_attr.state),
 			      port_attr.state);
-
+		/* Make asynchronous FD non-blocking to handle interrupts. */
+		if (mlx4_fd_set_non_blocking(ctx->async_fd) < 0) {
+			ERROR("cannot make asynchronous FD non-blocking: %s",
+			      strerror(rte_errno));
+			goto port_error;
+		}
 		/* Allocate protection domain. */
 		pd = ibv_alloc_pd(ctx);
 		if (pd == NULL) {
+			rte_errno = ENOMEM;
 			ERROR("PD allocation failure");
-			err = ENOMEM;
 			goto port_error;
 		}
-
-		mlx4_dev[idx].ports |= test;
-
 		/* from rte_ethdev.c */
 		priv = rte_zmalloc("ethdev private structure",
 				   sizeof(*priv),
 				   RTE_CACHE_LINE_SIZE);
 		if (priv == NULL) {
+			rte_errno = ENOMEM;
 			ERROR("priv allocation failure");
-			err = ENOMEM;
 			goto port_error;
 		}
-
 		priv->ctx = ctx;
 		priv->device_attr = device_attr;
 		priv->port = port;
 		priv->pd = pd;
 		priv->mtu = ETHER_MTU;
-#ifdef HAVE_EXP_QUERY_DEVICE
-		if (ibv_exp_query_device(ctx, &exp_device_attr)) {
-			ERROR("ibv_exp_query_device() failed");
-			err = ENODEV;
-			goto port_error;
-		}
-#ifdef RSS_SUPPORT
-		if ((exp_device_attr.exp_device_cap_flags &
-		     IBV_EXP_DEVICE_QPG) &&
-		    (exp_device_attr.exp_device_cap_flags &
-		     IBV_EXP_DEVICE_UD_RSS) &&
-		    (exp_device_attr.comp_mask &
-		     IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ) &&
-		    (exp_device_attr.max_rss_tbl_sz > 0)) {
-			priv->hw_qpg = 1;
-			priv->hw_rss = 1;
-			priv->max_rss_tbl_sz = exp_device_attr.max_rss_tbl_sz;
-		} else {
-			priv->hw_qpg = 0;
-			priv->hw_rss = 0;
-			priv->max_rss_tbl_sz = 0;
-		}
-		priv->hw_tss = !!(exp_device_attr.exp_device_cap_flags &
-				  IBV_EXP_DEVICE_UD_TSS);
-		DEBUG("device flags: %s%s%s",
-		      (priv->hw_qpg ? "IBV_DEVICE_QPG " : ""),
-		      (priv->hw_tss ? "IBV_DEVICE_TSS " : ""),
-		      (priv->hw_rss ? "IBV_DEVICE_RSS " : ""));
-		if (priv->hw_rss)
-			DEBUG("maximum RSS indirection table size: %u",
-			      exp_device_attr.max_rss_tbl_sz);
-#endif /* RSS_SUPPORT */
-
-		priv->hw_csum =
-			((exp_device_attr.exp_device_cap_flags &
-			  IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) &&
-			 (exp_device_attr.exp_device_cap_flags &
-			  IBV_EXP_DEVICE_RX_CSUM_IP_PKT));
+		priv->vf = vf;
+		priv->hw_csum =	!!(device_attr.device_cap_flags &
+				   IBV_DEVICE_RAW_IP_CSUM);
 		DEBUG("checksum offloading is %ssupported",
 		      (priv->hw_csum ? "" : "not "));
-
-		priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
-					 IBV_EXP_DEVICE_VXLAN_SUPPORT);
+		/* Only ConnectX-3 Pro supports tunneling. */
+		priv->hw_csum_l2tun =
+			priv->hw_csum &&
+			(device_attr.vendor_part_id ==
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO);
 		DEBUG("L2 tunnel checksum offloads are %ssupported",
 		      (priv->hw_csum_l2tun ? "" : "not "));
-
-#ifdef INLINE_RECV
-		priv->inl_recv_size = mlx4_getenv_int("MLX4_INLINE_RECV_SIZE");
-
-		if (priv->inl_recv_size) {
-			exp_device_attr.comp_mask =
-				IBV_EXP_DEVICE_ATTR_INLINE_RECV_SZ;
-			if (ibv_exp_query_device(ctx, &exp_device_attr)) {
-				INFO("Couldn't query device for inline-receive"
-				     " capabilities.");
-				priv->inl_recv_size = 0;
-			} else {
-				if ((unsigned)exp_device_attr.inline_recv_sz <
-				    priv->inl_recv_size) {
-					INFO("Max inline-receive (%d) <"
-					     " requested inline-receive (%u)",
-					     exp_device_attr.inline_recv_sz,
-					     priv->inl_recv_size);
-					priv->inl_recv_size =
-						exp_device_attr.inline_recv_sz;
-				}
-			}
-			INFO("Set inline receive size to %u",
-			     priv->inl_recv_size);
-		}
-#endif /* INLINE_RECV */
-#endif /* HAVE_EXP_QUERY_DEVICE */
-
-		(void)mlx4_getenv_int;
-		priv->vf = vf;
 		/* Configure the first MAC address by default. */
-		if (priv_get_mac(priv, &mac.addr_bytes)) {
+		if (mlx4_get_mac(priv, &mac.addr_bytes)) {
 			ERROR("cannot get MAC address, is mlx4_en loaded?"
-			      " (errno: %s)", strerror(errno));
-			err = ENODEV;
+			      " (rte_errno: %s)", strerror(rte_errno));
 			goto port_error;
 		}
 		INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
@@ -6302,18 +584,13 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		     mac.addr_bytes[0], mac.addr_bytes[1],
 		     mac.addr_bytes[2], mac.addr_bytes[3],
 		     mac.addr_bytes[4], mac.addr_bytes[5]);
-		/* Register MAC and broadcast addresses. */
-		claim_zero(priv_mac_addr_add(priv, 0,
-					     (const uint8_t (*)[ETHER_ADDR_LEN])
-					     mac.addr_bytes));
-		claim_zero(priv_mac_addr_add(priv, (elemof(priv->mac) - 1),
-					     &(const uint8_t [ETHER_ADDR_LEN])
-					     { "\xff\xff\xff\xff\xff\xff" }));
+		/* Register MAC address. */
+		priv->mac[0] = mac;
 #ifndef NDEBUG
 		{
 			char ifname[IF_NAMESIZE];
 
-			if (priv_get_ifname(priv, &ifname) == 0)
+			if (mlx4_get_ifname(priv, &ifname) == 0)
 				DEBUG("port %u ifname is \"%s\"",
 				      priv->port, ifname);
 			else
@@ -6321,9 +598,8 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		}
 #endif
 		/* Get actual MTU if possible. */
-		priv_get_mtu(priv, &priv->mtu);
+		mlx4_mtu_get(priv, &priv->mtu);
 		DEBUG("port %u MTU is %u", priv->port, priv->mtu);
-
 		/* from rte_ethdev.c */
 		{
 			char name[RTE_ETH_NAME_MAX_LEN];
@@ -6334,67 +610,41 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		}
 		if (eth_dev == NULL) {
 			ERROR("can not allocate rte ethdev");
-			err = ENOMEM;
+			rte_errno = ENOMEM;
 			goto port_error;
 		}
-
-		/* Secondary processes have to use local storage for their
-		 * private data as well as a copy of eth_dev->data, but this
-		 * pointer must not be modified before burst functions are
-		 * actually called. */
-		if (mlx4_is_secondary()) {
-			struct mlx4_secondary_data *sd =
-				&mlx4_secondary_data[eth_dev->data->port_id];
-
-			sd->primary_priv = eth_dev->data->dev_private;
-			if (sd->primary_priv == NULL) {
-				ERROR("no private data for port %u",
-				      eth_dev->data->port_id);
-				err = EINVAL;
-				goto port_error;
-			}
-			sd->shared_dev_data = eth_dev->data;
-			rte_spinlock_init(&sd->lock);
-			memcpy(sd->data.name, sd->shared_dev_data->name,
-			       sizeof(sd->data.name));
-			sd->data.dev_private = priv;
-			sd->data.rx_mbuf_alloc_failed = 0;
-			sd->data.mtu = ETHER_MTU;
-			sd->data.port_id = sd->shared_dev_data->port_id;
-			sd->data.mac_addrs = priv->mac;
-			eth_dev->tx_pkt_burst = mlx4_tx_burst_secondary_setup;
-			eth_dev->rx_pkt_burst = mlx4_rx_burst_secondary_setup;
-		} else {
-			eth_dev->data->dev_private = priv;
-			eth_dev->data->mac_addrs = priv->mac;
-		}
+		eth_dev->data->dev_private = priv;
+		eth_dev->data->mac_addrs = priv->mac;
 		eth_dev->device = &pci_dev->device;
-
 		rte_eth_copy_pci_info(eth_dev, pci_dev);
-
 		eth_dev->device->driver = &mlx4_driver.driver;
-
+		/* Initialize local interrupt handle for current port. */
+		priv->intr_handle = (struct rte_intr_handle){
+			.fd = -1,
+			.type = RTE_INTR_HANDLE_EXT,
+		};
 		/*
-		 * Copy and override interrupt handle to prevent it from
-		 * being shared between all ethdev instances of a given PCI
-		 * device. This is required to properly handle Rx interrupts
-		 * on all ports.
+		 * Override ethdev interrupt handle pointer with private
+		 * handle instead of that of the parent PCI device used by
+		 * default. This prevents it from being shared between all
+		 * ports of the same PCI device since each of them is
+		 * associated its own Verbs context.
+		 *
+		 * Rx interrupts in particular require this as the PMD has
+		 * no control over the registration of queue interrupts
+		 * besides setting up eth_dev->intr_handle, the rest is
+		 * handled by rte_intr_rx_ctl().
 		 */
-		priv->intr_handle_dev = *eth_dev->intr_handle;
-		eth_dev->intr_handle = &priv->intr_handle_dev;
-
+		eth_dev->intr_handle = &priv->intr_handle;
 		priv->dev = eth_dev;
 		eth_dev->dev_ops = &mlx4_dev_ops;
-		eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
-
 		/* Bring Ethernet device up. */
 		DEBUG("forcing Ethernet interface up");
-		priv_set_flags(priv, ~IFF_UP, IFF_UP);
+		mlx4_dev_set_link_up(priv->dev);
 		/* Update link status once if waiting for LSC. */
 		if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
 			mlx4_link_update(eth_dev, 0);
 		continue;
-
 port_error:
 		rte_free(priv);
 		if (pd)
@@ -6405,27 +655,21 @@ port_error:
 			rte_eth_dev_release_port(eth_dev);
 		break;
 	}
-
+	if (i == device_attr.phys_port_cnt)
+		return 0;
 	/*
 	 * XXX if something went wrong in the loop above, there is a resource
 	 * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as
 	 * long as the dpdk does not provide a way to deallocate a ethdev and a
 	 * way to enumerate the registered ethdevs to free the previous ones.
 	 */
-
-	/* no port found, complain */
-	if (!mlx4_dev[idx].ports) {
-		err = ENODEV;
-		goto error;
-	}
-
 error:
 	if (attr_ctx)
 		claim_zero(ibv_close_device(attr_ctx));
 	if (list)
 		ibv_free_device_list(list);
-	assert(err >= 0);
-	return -err;
+	assert(rte_errno >= 0);
+	return -rte_errno;
 }
 
 static const struct rte_pci_id mlx4_pci_id_map[] = {
@@ -6463,7 +707,6 @@ RTE_INIT(rte_mlx4_pmd_init);
 static void
 rte_mlx4_pmd_init(void)
 {
-	RTE_BUILD_BUG_ON(sizeof(wr_id_t) != sizeof(uint64_t));
 	/*
 	 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
 	 * huge pages. Calling ibv_fork_init() during init allows
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index c0ade4f1..3aeef87e 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -1,8 +1,8 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright 2012-2017 6WIND S.A.
- *   Copyright 2012-2017 Mellanox.
+ *   Copyright 2012 6WIND S.A.
+ *   Copyright 2012 Mellanox
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -34,29 +34,11 @@
 #ifndef RTE_PMD_MLX4_H_
 #define RTE_PMD_MLX4_H_
 
-#include <stddef.h>
+#include <net/if.h>
 #include <stdint.h>
-#include <limits.h>
+#include <sys/queue.h>
 
-/*
- * Runtime logging through RTE_LOG() is enabled when not in debugging mode.
- * Intermediate LOG_*() macros add the required end-of-line characters.
- */
-#ifndef NDEBUG
-#define INFO(...) DEBUG(__VA_ARGS__)
-#define WARN(...) DEBUG(__VA_ARGS__)
-#define ERROR(...) DEBUG(__VA_ARGS__)
-#else
-#define LOG__(level, m, ...) \
-	RTE_LOG(level, PMD, MLX4_DRIVER_NAME ": " m "%c", __VA_ARGS__)
-#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
-#define INFO(...) LOG_(INFO, __VA_ARGS__)
-#define WARN(...) LOG_(WARNING, __VA_ARGS__)
-#define ERROR(...) LOG_(ERR, __VA_ARGS__)
-#endif
-
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+/* Verbs headers do not support -pedantic. */
 #ifdef PEDANTIC
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
@@ -65,36 +47,25 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/*
- * Maximum number of simultaneous MAC addresses supported.
- *
- * According to ConnectX's Programmer Reference Manual:
- *   The L2 Address Match is implemented by comparing a MAC/VLAN combination
- *   of 128 MAC addresses and 127 VLAN values, comprising 128x127 possible
- *   L2 addresses.
- */
-#define MLX4_MAX_MAC_ADDRESSES 128
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_interrupts.h>
+#include <rte_mempool.h>
+#include <rte_spinlock.h>
 
-/* Maximum number of simultaneous VLAN filters supported. See above. */
-#define MLX4_MAX_VLAN_IDS 127
+/** Maximum number of simultaneous MAC addresses. This value is arbitrary. */
+#define MLX4_MAX_MAC_ADDRESSES 128
 
-/* Request send completion once in every 64 sends, might be less. */
+/** Request send completion once in every 64 sends, might be less. */
 #define MLX4_PMD_TX_PER_COMP_REQ 64
 
-/* Maximum number of physical ports. */
-#define MLX4_PMD_MAX_PHYS_PORTS 2
-
-/* Maximum number of Scatter/Gather Elements per Work Request. */
-#ifndef MLX4_PMD_SGE_WR_N
-#define MLX4_PMD_SGE_WR_N 4
-#endif
-
-/* Maximum size for inline data. */
-#ifndef MLX4_PMD_MAX_INLINE
+/** Maximum size for inline data. */
 #define MLX4_PMD_MAX_INLINE 0
-#endif
 
-/*
+/** Fixed RSS hash key size in bytes. Cannot be modified. */
+#define MLX4_RSS_HASH_KEY_SIZE 40
+
+/**
  * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP
  * from which buffers are to be transmitted will have to be mapped by this
  * driver to their own Memory Region (MR). This is a slow operation.
@@ -105,18 +76,10 @@
 #define MLX4_PMD_TX_MP_CACHE 8
 #endif
 
-/*
- * If defined, only use software counters. The PMD will never ask the hardware
- * for these, and many of them won't be available.
- */
-#ifndef MLX4_PMD_SOFT_COUNTERS
-#define MLX4_PMD_SOFT_COUNTERS 1
-#endif
-
-/* Alarm timeout. */
-#define MLX4_ALARM_TIMEOUT_US 100000
+/** Interrupt alarm timeout value in microseconds. */
+#define MLX4_INTR_ALARM_TIMEOUT 100000
 
-/* Port parameter. */
+/** Port parameter. */
 #define MLX4_PMD_PORT_KVARG "port"
 
 enum {
@@ -129,258 +92,92 @@ enum {
 	PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO = 0x1007,
 };
 
+/** Driver name reported to lower layers and used in log output. */
 #define MLX4_DRIVER_NAME "net_mlx4"
 
-/* Bit-field manipulation. */
-#define BITFIELD_DECLARE(bf, type, size)				\
-	type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) +		\
-		 !!((size_t)(size) % (sizeof(type) * CHAR_BIT)))]
-#define BITFIELD_DEFINE(bf, type, size)					\
-	BITFIELD_DECLARE((bf), type, (size)) = { 0 }
-#define BITFIELD_SET(bf, b)						\
-	(assert((size_t)(b) < (sizeof(bf) * CHAR_BIT)),			\
-	 (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] |=		\
-		((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))))
-#define BITFIELD_RESET(bf, b)						\
-	(assert((size_t)(b) < (sizeof(bf) * CHAR_BIT)),			\
-	 (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] &=		\
-		~((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))))
-#define BITFIELD_ISSET(bf, b)						\
-	(assert((size_t)(b) < (sizeof(bf) * CHAR_BIT)),			\
-	 !!(((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] &		\
-	     ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))))
-
-/* Number of elements in array. */
-#define elemof(a) (sizeof(a) / sizeof((a)[0]))
-
-/* Cast pointer p to structure member m to its parent structure of type t. */
-#define containerof(p, t, m) ((t *)((uint8_t *)(p) - offsetof(t, m)))
-
-/* Branch prediction helpers. */
-#ifndef likely
-#define likely(c) __builtin_expect(!!(c), 1)
-#endif
-#ifndef unlikely
-#define unlikely(c) __builtin_expect(!!(c), 0)
-#endif
-
-/* Debugging */
-#ifndef NDEBUG
-#include <stdio.h>
-#define DEBUG__(m, ...)						\
-	(fprintf(stderr, "%s:%d: %s(): " m "%c",		\
-		 __FILE__, __LINE__, __func__, __VA_ARGS__),	\
-	 fflush(stderr),					\
-	 (void)0)
-/*
- * Save/restore errno around DEBUG__().
- * XXX somewhat undefined behavior, but works.
- */
-#define DEBUG_(...)				\
-	(errno = ((int []){			\
-		*(volatile int *)&errno,	\
-		(DEBUG__(__VA_ARGS__), 0)	\
-	})[0])
-#define DEBUG(...) DEBUG_(__VA_ARGS__, '\n')
-#ifndef MLX4_PMD_DEBUG_BROKEN_VERBS
-#define claim_zero(...) assert((__VA_ARGS__) == 0)
-#else /* MLX4_PMD_DEBUG_BROKEN_VERBS */
-#define claim_zero(...) \
-	(void)(((__VA_ARGS__) == 0) || \
-		DEBUG("Assertion `(" # __VA_ARGS__ ") == 0' failed (IGNORED)."))
-#endif /* MLX4_PMD_DEBUG_BROKEN_VERBS */
-#define claim_nonzero(...) assert((__VA_ARGS__) != 0)
-#define claim_positive(...) assert((__VA_ARGS__) >= 0)
-#else /* NDEBUG */
-/* No-ops. */
-#define DEBUG(...) (void)0
-#define claim_zero(...) (__VA_ARGS__)
-#define claim_nonzero(...) (__VA_ARGS__)
-#define claim_positive(...) (__VA_ARGS__)
-#endif /* NDEBUG */
-
-struct mlx4_rxq_stats {
-	unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	uint64_t ipackets; /**< Total of successfully received packets. */
-	uint64_t ibytes; /**< Total of successfully received bytes. */
-#endif
-	uint64_t idropped; /**< Total of packets dropped when RX ring full. */
-	uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */
-};
-
-/* RX element (scattered packets). */
-struct rxq_elt_sp {
-	struct ibv_recv_wr wr; /* Work Request. */
-	struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
-	struct rte_mbuf *bufs[MLX4_PMD_SGE_WR_N]; /* SGEs buffers. */
-};
-
-/* RX element. */
-struct rxq_elt {
-	struct ibv_recv_wr wr; /* Work Request. */
-	struct ibv_sge sge; /* Scatter/Gather Element. */
-	/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
-};
-
-/* RX queue descriptor. */
-struct rxq {
-	LIST_ENTRY(rxq) next; /* Used by parent queue only */
-	struct priv *priv; /* Back pointer to private data. */
-	struct rte_mempool *mp; /* Memory Pool for allocations. */
-	struct ibv_mr *mr; /* Memory Region (for mp). */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
-	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
-	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-	struct ibv_comp_channel *channel;
-	/*
-	 * Each VLAN ID requires a separate flow steering rule.
-	 */
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
-	struct ibv_flow *mac_flow[MLX4_MAX_MAC_ADDRESSES][MLX4_MAX_VLAN_IDS];
-	struct ibv_flow *promisc_flow; /* Promiscuous flow. */
-	struct ibv_flow *allmulti_flow; /* Multicast flow. */
-	unsigned int port_id; /* Port ID for incoming packets. */
-	unsigned int elts_n; /* (*elts)[] length. */
-	unsigned int elts_head; /* Current index in (*elts)[]. */
-	union {
-		struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
-		struct rxq_elt (*no_sp)[]; /* RX elements. */
-	} elts;
-	unsigned int sp:1; /* Use scattered RX elements. */
-	unsigned int csum:1; /* Enable checksum offloading. */
-	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
-	struct mlx4_rxq_stats stats; /* RX queue counters. */
-	unsigned int socket; /* CPU socket ID for allocations. */
-	struct ibv_exp_res_domain *rd; /* Resource Domain. */
-	struct {
-		uint16_t queues_n;
-		uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
-	} rss;
-};
-
-/* TX element. */
-struct txq_elt {
-	struct rte_mbuf *buf;
-};
-
-struct mlx4_txq_stats {
-	unsigned int idx; /**< Mapping index. */
-#ifdef MLX4_PMD_SOFT_COUNTERS
-	uint64_t opackets; /**< Total of successfully sent packets. */
-	uint64_t obytes;   /**< Total of successfully sent bytes. */
-#endif
-	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
-};
-
-/*
- * Linear buffer type. It is used when transmitting buffers with too many
- * segments that do not fit the hardware queue (see max_send_sge).
- * Extra segments are copied (linearized) in such buffers, replacing the
- * last SGE during TX.
- * The size is arbitrary but large enough to hold a jumbo frame with
- * 8 segments considering mbuf.buf_len is about 2048 bytes.
- */
-typedef uint8_t linear_t[16384];
+struct mlx4_drop;
+struct mlx4_rss;
+struct rxq;
+struct txq;
+struct rte_flow;
 
-/* TX queue descriptor. */
-struct txq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct {
-		const struct rte_mempool *mp; /* Cached Memory Pool. */
-		struct ibv_mr *mr; /* Memory Region (for mp). */
-		uint32_t lkey; /* mr->lkey */
-	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
-	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_qp *qp; /* Queue Pair. */
-	struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
-	struct ibv_exp_cq_family *if_cq; /* CQ interface. */
-#if MLX4_PMD_MAX_INLINE > 0
-	uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
-#endif
-	unsigned int elts_n; /* (*elts)[] length. */
-	struct txq_elt (*elts)[]; /* TX elements. */
-	unsigned int elts_head; /* Current index in (*elts)[]. */
-	unsigned int elts_tail; /* First element awaiting completion. */
-	unsigned int elts_comp; /* Number of completion requests. */
-	unsigned int elts_comp_cd; /* Countdown for next completion request. */
-	unsigned int elts_comp_cd_init; /* Initial value for countdown. */
-	struct mlx4_txq_stats stats; /* TX queue counters. */
-	linear_t (*elts_linear)[]; /* Linearized buffers. */
-	struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */
-	unsigned int socket; /* CPU socket ID for allocations. */
-	struct ibv_exp_res_domain *rd; /* Resource Domain. */
+/** Memory region descriptor. */
+struct mlx4_mr {
+	LIST_ENTRY(mlx4_mr) next; /**< Next entry in list. */
+	uintptr_t start; /**< Base address for memory region. */
+	uintptr_t end; /**< End address for memory region. */
+	uint32_t lkey; /**< L_Key extracted from @p mr. */
+	uint32_t refcnt; /**< Reference count for this object. */
+	struct priv *priv; /**< Back pointer to private data. */
+	struct ibv_mr *mr; /**< Memory region associated with @p mp. */
+	struct rte_mempool *mp; /**< Target memory pool (mempool). */
 };
 
-struct rte_flow;
-
+/** Private data structure. */
 struct priv {
-	struct rte_eth_dev *dev; /* Ethernet device. */
-	struct ibv_context *ctx; /* Verbs context. */
-	struct ibv_device_attr device_attr; /* Device properties. */
-	struct ibv_pd *pd; /* Protection Domain. */
-	/*
-	 * MAC addresses array and configuration bit-field.
-	 * An extra entry that cannot be modified by the DPDK is reserved
-	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
-	 */
-	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX4_MAX_MAC_ADDRESSES);
-	/* VLAN filters. */
-	struct {
-		unsigned int enabled:1; /* If enabled. */
-		unsigned int id:12; /* VLAN ID (0-4095). */
-	} vlan_filter[MLX4_MAX_VLAN_IDS]; /* VLAN filters table. */
+	struct rte_eth_dev *dev; /**< Ethernet device. */
+	struct ibv_context *ctx; /**< Verbs context. */
+	struct ibv_device_attr device_attr; /**< Device properties. */
+	struct ibv_pd *pd; /**< Protection Domain. */
 	/* Device properties. */
-	uint16_t mtu; /* Configured MTU. */
-	uint8_t port; /* Physical port number. */
-	unsigned int started:1; /* Device started, flows enabled. */
-	unsigned int promisc:1; /* Device in promiscuous mode. */
-	unsigned int allmulti:1; /* Device receives all multicast packets. */
-	unsigned int hw_qpg:1; /* QP groups are supported. */
-	unsigned int hw_tss:1; /* TSS is supported. */
-	unsigned int hw_rss:1; /* RSS is supported. */
-	unsigned int hw_csum:1; /* Checksum offload is supported. */
-	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
-	unsigned int rss:1; /* RSS is enabled. */
-	unsigned int vf:1; /* This is a VF device. */
-	unsigned int pending_alarm:1; /* An alarm is pending. */
-	unsigned int isolated:1; /* Toggle isolated mode. */
-#ifdef INLINE_RECV
-	unsigned int inl_recv_size; /* Inline recv size */
-#endif
-	unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
-	/* RX/TX queues. */
-	unsigned int rxqs_n; /* RX queues array size. */
-	unsigned int txqs_n; /* TX queues array size. */
-	struct rxq *(*rxqs)[]; /* RX queues. */
-	struct txq *(*txqs)[]; /* TX queues. */
-	struct rte_intr_handle intr_handle_dev; /* Device interrupt handler. */
-	struct rte_intr_handle intr_handle; /* Interrupt handler. */
-	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
-	LIST_HEAD(mlx4_flows, rte_flow) flows;
-	struct rte_intr_conf intr_conf; /* Active interrupt configuration. */
-	LIST_HEAD(mlx4_parents, rxq) parents;
-	rte_spinlock_t lock; /* Lock for control functions. */
+	uint16_t mtu; /**< Configured MTU. */
+	uint8_t port; /**< Physical port number. */
+	uint32_t started:1; /**< Device started, flows enabled. */
+	uint32_t vf:1; /**< This is a VF device. */
+	uint32_t intr_alarm:1; /**< An interrupt alarm is scheduled. */
+	uint32_t isolated:1; /**< Toggle isolated mode. */
+	uint32_t hw_csum:1; /* Checksum offload is supported. */
+	uint32_t hw_csum_l2tun:1; /* Checksum support for L2 tunnels. */
+	struct rte_intr_handle intr_handle; /**< Port interrupt handle. */
+	struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */
+	LIST_HEAD(, mlx4_rss) rss; /**< Shared targets for Rx flow rules. */
+	LIST_HEAD(, rte_flow) flows; /**< Configured flow rule handles. */
+	LIST_HEAD(, mlx4_mr) mr; /**< Registered memory regions. */
+	rte_spinlock_t mr_lock; /**< Lock for @p mr access. */
+	struct ether_addr mac[MLX4_MAX_MAC_ADDRESSES];
+	/**< Configured MAC addresses. Unused entries are zeroed. */
 };
 
-void priv_lock(struct priv *priv);
-void priv_unlock(struct priv *priv);
-
-int
-rxq_create_qp(struct rxq *rxq,
-	      uint16_t desc,
-	      int inactive,
-	      int children_n,
-	      struct rxq *rxq_parent);
-
-void
-rxq_parent_cleanup(struct rxq *parent);
-
-struct rxq *
-priv_parent_create(struct priv *priv,
-		   uint16_t queues[],
-		   uint16_t children_n);
+/* mlx4_ethdev.c */
+
+int mlx4_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]);
+int mlx4_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN]);
+int mlx4_mtu_get(struct priv *priv, uint16_t *mtu);
+int mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
+int mlx4_dev_set_link_down(struct rte_eth_dev *dev);
+int mlx4_dev_set_link_up(struct rte_eth_dev *dev);
+void mlx4_promiscuous_enable(struct rte_eth_dev *dev);
+void mlx4_promiscuous_disable(struct rte_eth_dev *dev);
+void mlx4_allmulticast_enable(struct rte_eth_dev *dev);
+void mlx4_allmulticast_disable(struct rte_eth_dev *dev);
+void mlx4_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
+int mlx4_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+		      uint32_t index, uint32_t vmdq);
+void mlx4_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr);
+int mlx4_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on);
+int mlx4_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
+void mlx4_stats_reset(struct rte_eth_dev *dev);
+void mlx4_dev_infos_get(struct rte_eth_dev *dev,
+			struct rte_eth_dev_info *info);
+int mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete);
+int mlx4_flow_ctrl_get(struct rte_eth_dev *dev,
+		       struct rte_eth_fc_conf *fc_conf);
+int mlx4_flow_ctrl_set(struct rte_eth_dev *dev,
+		       struct rte_eth_fc_conf *fc_conf);
+const uint32_t *mlx4_dev_supported_ptypes_get(struct rte_eth_dev *dev);
+
+/* mlx4_intr.c */
+
+int mlx4_intr_uninstall(struct priv *priv);
+int mlx4_intr_install(struct priv *priv);
+int mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx);
+int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
+
+/* mlx4_mr.c */
+
+struct mlx4_mr *mlx4_mr_get(struct priv *priv, struct rte_mempool *mp);
+void mlx4_mr_put(struct mlx4_mr *mr);
+uint32_t mlx4_txq_add_mr(struct txq *txq, struct rte_mempool *mp,
+			 uint32_t i);
 
 #endif /* RTE_PMD_MLX4_H_ */
diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c
new file mode 100644
index 00000000..c2ea4db1
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_ethdev.c
@@ -0,0 +1,1047 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Miscellaneous control operations for mlx4 driver.
+ */
+
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_bus_pci.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_flow.h>
+#include <rte_pci.h>
+
+#include "mlx4.h"
+#include "mlx4_flow.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+/**
+ * Get interface name from private structure.
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ * @param[out] ifname
+ *   Interface name output buffer.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
+{
+	DIR *dir;
+	struct dirent *dent;
+	unsigned int dev_type = 0;
+	unsigned int dev_port_prev = ~0u;
+	char match[IF_NAMESIZE] = "";
+
+	{
+		MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
+
+		dir = opendir(path);
+		if (dir == NULL) {
+			rte_errno = errno;
+			return -rte_errno;
+		}
+	}
+	while ((dent = readdir(dir)) != NULL) {
+		char *name = dent->d_name;
+		FILE *file;
+		unsigned int dev_port;
+		int r;
+
+		if ((name[0] == '.') &&
+		    ((name[1] == '\0') ||
+		     ((name[1] == '.') && (name[2] == '\0'))))
+			continue;
+
+		MKSTR(path, "%s/device/net/%s/%s",
+		      priv->ctx->device->ibdev_path, name,
+		      (dev_type ? "dev_id" : "dev_port"));
+
+		file = fopen(path, "rb");
+		if (file == NULL) {
+			if (errno != ENOENT)
+				continue;
+			/*
+			 * Switch to dev_id when dev_port does not exist as
+			 * is the case with Linux kernel versions < 3.15.
+			 */
+try_dev_id:
+			match[0] = '\0';
+			if (dev_type)
+				break;
+			dev_type = 1;
+			dev_port_prev = ~0u;
+			rewinddir(dir);
+			continue;
+		}
+		r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
+		fclose(file);
+		if (r != 1)
+			continue;
+		/*
+		 * Switch to dev_id when dev_port returns the same value for
+		 * all ports. May happen when using a MOFED release older than
+		 * 3.0 with a Linux kernel >= 3.15.
+		 */
+		if (dev_port == dev_port_prev)
+			goto try_dev_id;
+		dev_port_prev = dev_port;
+		if (dev_port == (priv->port - 1u))
+			snprintf(match, sizeof(match), "%s", name);
+	}
+	closedir(dir);
+	if (match[0] == '\0') {
+		rte_errno = ENODEV;
+		return -rte_errno;
+	}
+	strncpy(*ifname, match, sizeof(*ifname));
+	return 0;
+}
+
+/**
+ * Read from sysfs entry.
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ * @param[in] entry
+ *   Entry name relative to sysfs path.
+ * @param[out] buf
+ *   Data output buffer.
+ * @param size
+ *   Buffer size.
+ *
+ * @return
+ *   Number of bytes read on success, negative errno value otherwise and
+ *   rte_errno is set.
+ */
+static int
+mlx4_sysfs_read(const struct priv *priv, const char *entry,
+		char *buf, size_t size)
+{
+	char ifname[IF_NAMESIZE];
+	FILE *file;
+	int ret;
+
+	ret = mlx4_get_ifname(priv, &ifname);
+	if (ret)
+		return ret;
+
+	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
+	      ifname, entry);
+
+	file = fopen(path, "rb");
+	if (file == NULL) {
+		rte_errno = errno;
+		return -rte_errno;
+	}
+	ret = fread(buf, 1, size, file);
+	if ((size_t)ret < size && ferror(file)) {
+		rte_errno = EIO;
+		ret = -rte_errno;
+	} else {
+		ret = size;
+	}
+	fclose(file);
+	return ret;
+}
+
+/**
+ * Write to sysfs entry.
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ * @param[in] entry
+ *   Entry name relative to sysfs path.
+ * @param[in] buf
+ *   Data buffer.
+ * @param size
+ *   Buffer size.
+ *
+ * @return
+ *   Number of bytes written on success, negative errno value otherwise and
+ *   rte_errno is set.
+ */
+static int
+mlx4_sysfs_write(const struct priv *priv, const char *entry,
+		 char *buf, size_t size)
+{
+	char ifname[IF_NAMESIZE];
+	FILE *file;
+	int ret;
+
+	ret = mlx4_get_ifname(priv, &ifname);
+	if (ret)
+		return ret;
+
+	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
+	      ifname, entry);
+
+	file = fopen(path, "wb");
+	if (file == NULL) {
+		rte_errno = errno;
+		return -rte_errno;
+	}
+	ret = fwrite(buf, 1, size, file);
+	if ((size_t)ret < size || ferror(file)) {
+		rte_errno = EIO;
+		ret = -rte_errno;
+	} else {
+		ret = size;
+	}
+	fclose(file);
+	return ret;
+}
+
+/**
+ * Get unsigned long sysfs property.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] name
+ *   Entry name relative to sysfs path.
+ * @param[out] value
+ *   Value output buffer.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
+{
+	int ret;
+	unsigned long value_ret;
+	char value_str[32];
+
+	ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
+	if (ret < 0) {
+		DEBUG("cannot read %s value from sysfs: %s",
+		      name, strerror(rte_errno));
+		return ret;
+	}
+	value_str[ret] = '\0';
+	errno = 0;
+	value_ret = strtoul(value_str, NULL, 0);
+	if (errno) {
+		rte_errno = errno;
+		DEBUG("invalid %s value `%s': %s", name, value_str,
+		      strerror(rte_errno));
+		return -rte_errno;
+	}
+	*value = value_ret;
+	return 0;
+}
+
+/**
+ * Set unsigned long sysfs property.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] name
+ *   Entry name relative to sysfs path.
+ * @param value
+ *   Value to set.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
+{
+	int ret;
+	MKSTR(value_str, "%lu", value);
+
+	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
+	if (ret < 0) {
+		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
+		      name, value_str, value, strerror(rte_errno));
+		return ret;
+	}
+	return 0;
+}
+
+/**
+ * Perform ifreq ioctl() on associated Ethernet device.
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ * @param req
+ *   Request number to pass to ioctl().
+ * @param[out] ifr
+ *   Interface request structure output buffer.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_ifreq(const struct priv *priv, int req, struct ifreq *ifr)
+{
+	int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
+	int ret;
+
+	if (sock == -1) {
+		rte_errno = errno;
+		return -rte_errno;
+	}
+	ret = mlx4_get_ifname(priv, &ifr->ifr_name);
+	if (!ret && ioctl(sock, req, ifr) == -1) {
+		rte_errno = errno;
+		ret = -rte_errno;
+	}
+	close(sock);
+	return ret;
+}
+
+/**
+ * Get MAC address by querying netdevice.
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ * @param[out] mac
+ *   MAC address output buffer.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
+{
+	struct ifreq request;
+	int ret = mlx4_ifreq(priv, SIOCGIFHWADDR, &request);
+
+	if (ret)
+		return ret;
+	memcpy(mac, request.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
+	return 0;
+}
+
+/**
+ * Get device MTU.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[out] mtu
+ *   MTU value output buffer.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_mtu_get(struct priv *priv, uint16_t *mtu)
+{
+	unsigned long ulong_mtu = 0;
+	int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
+
+	if (ret)
+		return ret;
+	*mtu = ulong_mtu;
+	return 0;
+}
+
+/**
+ * DPDK callback to change the MTU.
+ *
+ * @param priv
+ *   Pointer to Ethernet device structure.
+ * @param mtu
+ *   MTU value to set.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+	struct priv *priv = dev->data->dev_private;
+	uint16_t new_mtu;
+	int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
+
+	if (ret)
+		return ret;
+	ret = mlx4_mtu_get(priv, &new_mtu);
+	if (ret)
+		return ret;
+	if (new_mtu == mtu) {
+		priv->mtu = mtu;
+		return 0;
+	}
+	rte_errno = EINVAL;
+	return -rte_errno;
+}
+
+/**
+ * Set device flags.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param keep
+ *   Bitmask for flags that must remain untouched.
+ * @param flags
+ *   Bitmask for flags to modify.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
+{
+	unsigned long tmp = 0;
+	int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
+
+	if (ret)
+		return ret;
+	tmp &= keep;
+	tmp |= (flags & (~keep));
+	return mlx4_set_sysfs_ulong(priv, "flags", tmp);
+}
+
+/**
+ * Change the link state (UP / DOWN).
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param up
+ *   Nonzero for link up, otherwise link down.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_dev_set_link(struct priv *priv, int up)
+{
+	int err;
+
+	if (up) {
+		err = mlx4_set_flags(priv, ~IFF_UP, IFF_UP);
+		if (err)
+			return err;
+	} else {
+		err = mlx4_set_flags(priv, ~IFF_UP, ~IFF_UP);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/**
+ * DPDK callback to bring the link DOWN.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_dev_set_link_down(struct rte_eth_dev *dev)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	return mlx4_dev_set_link(priv, 0);
+}
+
+/**
+ * DPDK callback to bring the link UP.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_dev_set_link_up(struct rte_eth_dev *dev)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	return mlx4_dev_set_link(priv, 1);
+}
+
+/**
+ * Supported Rx mode toggles.
+ *
+ * Even and odd values respectively stand for off and on.
+ */
+enum rxmode_toggle {
+	RXMODE_TOGGLE_PROMISC_OFF,
+	RXMODE_TOGGLE_PROMISC_ON,
+	RXMODE_TOGGLE_ALLMULTI_OFF,
+	RXMODE_TOGGLE_ALLMULTI_ON,
+};
+
+/**
+ * Helper function to toggle promiscuous and all multicast modes.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param toggle
+ *   Toggle to set.
+ */
+static void
+mlx4_rxmode_toggle(struct rte_eth_dev *dev, enum rxmode_toggle toggle)
+{
+	struct priv *priv = dev->data->dev_private;
+	const char *mode;
+	struct rte_flow_error error;
+
+	switch (toggle) {
+	case RXMODE_TOGGLE_PROMISC_OFF:
+	case RXMODE_TOGGLE_PROMISC_ON:
+		mode = "promiscuous";
+		dev->data->promiscuous = toggle & 1;
+		break;
+	case RXMODE_TOGGLE_ALLMULTI_OFF:
+	case RXMODE_TOGGLE_ALLMULTI_ON:
+		mode = "all multicast";
+		dev->data->all_multicast = toggle & 1;
+		break;
+	}
+	if (!mlx4_flow_sync(priv, &error))
+		return;
+	ERROR("cannot toggle %s mode (code %d, \"%s\"),"
+	      " flow error type %d, cause %p, message: %s",
+	      mode, rte_errno, strerror(rte_errno), error.type, error.cause,
+	      error.message ? error.message : "(unspecified)");
+}
+
+/**
+ * DPDK callback to enable promiscuous mode.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+void
+mlx4_promiscuous_enable(struct rte_eth_dev *dev)
+{
+	mlx4_rxmode_toggle(dev, RXMODE_TOGGLE_PROMISC_ON);
+}
+
+/**
+ * DPDK callback to disable promiscuous mode.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+void
+mlx4_promiscuous_disable(struct rte_eth_dev *dev)
+{
+	mlx4_rxmode_toggle(dev, RXMODE_TOGGLE_PROMISC_OFF);
+}
+
+/**
+ * DPDK callback to enable all multicast mode.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+void
+mlx4_allmulticast_enable(struct rte_eth_dev *dev)
+{
+	mlx4_rxmode_toggle(dev, RXMODE_TOGGLE_ALLMULTI_ON);
+}
+
+/**
+ * DPDK callback to disable all multicast mode.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+void
+mlx4_allmulticast_disable(struct rte_eth_dev *dev)
+{
+	mlx4_rxmode_toggle(dev, RXMODE_TOGGLE_ALLMULTI_OFF);
+}
+
+/**
+ * DPDK callback to remove a MAC address.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param index
+ *   MAC address index.
+ */
+void
+mlx4_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct rte_flow_error error;
+
+	if (index >= RTE_DIM(priv->mac)) {
+		rte_errno = EINVAL;
+		return;
+	}
+	memset(&priv->mac[index], 0, sizeof(priv->mac[index]));
+	if (!mlx4_flow_sync(priv, &error))
+		return;
+	ERROR("failed to synchronize flow rules after removing MAC address"
+	      " at index %d (code %d, \"%s\"),"
+	      " flow error type %d, cause %p, message: %s",
+	      index, rte_errno, strerror(rte_errno), error.type, error.cause,
+	      error.message ? error.message : "(unspecified)");
+}
+
+/**
+ * DPDK callback to add a MAC address.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param mac_addr
+ *   MAC address to register.
+ * @param index
+ *   MAC address index.
+ * @param vmdq
+ *   VMDq pool index to associate address with (ignored).
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+		  uint32_t index, uint32_t vmdq)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct rte_flow_error error;
+	int ret;
+
+	(void)vmdq;
+	if (index >= RTE_DIM(priv->mac)) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	memcpy(&priv->mac[index], mac_addr, sizeof(priv->mac[index]));
+	ret = mlx4_flow_sync(priv, &error);
+	if (!ret)
+		return 0;
+	ERROR("failed to synchronize flow rules after adding MAC address"
+	      " at index %d (code %d, \"%s\"),"
+	      " flow error type %d, cause %p, message: %s",
+	      index, rte_errno, strerror(rte_errno), error.type, error.cause,
+	      error.message ? error.message : "(unspecified)");
+	return ret;
+}
+
+/**
+ * DPDK callback to configure a VLAN filter.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param vlan_id
+ *   VLAN ID to filter.
+ * @param on
+ *   Toggle filter.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct rte_flow_error error;
+	unsigned int vidx = vlan_id / 64;
+	unsigned int vbit = vlan_id % 64;
+	uint64_t *v;
+	int ret;
+
+	if (vidx >= RTE_DIM(dev->data->vlan_filter_conf.ids)) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	v = &dev->data->vlan_filter_conf.ids[vidx];
+	*v &= ~(UINT64_C(1) << vbit);
+	*v |= (uint64_t)!!on << vbit;
+	ret = mlx4_flow_sync(priv, &error);
+	if (!ret)
+		return 0;
+	ERROR("failed to synchronize flow rules after %s VLAN filter on ID %u"
+	      " (code %d, \"%s\"), "
+	      " flow error type %d, cause %p, message: %s",
+	      on ? "enabling" : "disabling", vlan_id,
+	      rte_errno, strerror(rte_errno), error.type, error.cause,
+	      error.message ? error.message : "(unspecified)");
+	return ret;
+}
+
+/**
+ * DPDK callback to set the primary MAC address.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param mac_addr
+ *   MAC address to register.
+ */
+void
+mlx4_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+	mlx4_mac_addr_add(dev, mac_addr, 0, 0);
+}
+
+/**
+ * DPDK callback to get information about the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] info
+ *   Info structure output buffer.
+ */
+void
+mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
+{
+	struct priv *priv = dev->data->dev_private;
+	unsigned int max;
+	char ifname[IF_NAMESIZE];
+
+	info->pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+	/* FIXME: we should ask the device for these values. */
+	info->min_rx_bufsize = 32;
+	info->max_rx_pktlen = 65536;
+	/*
+	 * Since we need one CQ per QP, the limit is the minimum number
+	 * between the two values.
+	 */
+	max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ?
+	       priv->device_attr.max_qp : priv->device_attr.max_cq);
+	/* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */
+	if (max >= 65535)
+		max = 65535;
+	info->max_rx_queues = max;
+	info->max_tx_queues = max;
+	info->max_mac_addrs = RTE_DIM(priv->mac);
+	info->rx_offload_capa = 0;
+	info->tx_offload_capa = 0;
+	if (priv->hw_csum) {
+		info->tx_offload_capa |= (DEV_TX_OFFLOAD_IPV4_CKSUM |
+					  DEV_TX_OFFLOAD_UDP_CKSUM |
+					  DEV_TX_OFFLOAD_TCP_CKSUM);
+		info->rx_offload_capa |= (DEV_RX_OFFLOAD_IPV4_CKSUM |
+					  DEV_RX_OFFLOAD_UDP_CKSUM |
+					  DEV_RX_OFFLOAD_TCP_CKSUM);
+	}
+	if (priv->hw_csum_l2tun)
+		info->tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
+	if (mlx4_get_ifname(priv, &ifname) == 0)
+		info->if_index = if_nametoindex(ifname);
+	info->hash_key_size = MLX4_RSS_HASH_KEY_SIZE;
+	info->speed_capa =
+			ETH_LINK_SPEED_1G |
+			ETH_LINK_SPEED_10G |
+			ETH_LINK_SPEED_20G |
+			ETH_LINK_SPEED_40G |
+			ETH_LINK_SPEED_56G;
+}
+
+/**
+ * DPDK callback to get device statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] stats
+ *   Stats structure output buffer.
+ */
+int
+mlx4_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	struct rte_eth_stats tmp;
+	unsigned int i;
+	unsigned int idx;
+
+	memset(&tmp, 0, sizeof(tmp));
+	/* Add software counters. */
+	for (i = 0; i != dev->data->nb_rx_queues; ++i) {
+		struct rxq *rxq = dev->data->rx_queues[i];
+
+		if (rxq == NULL)
+			continue;
+		idx = rxq->stats.idx;
+		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			tmp.q_ipackets[idx] += rxq->stats.ipackets;
+			tmp.q_ibytes[idx] += rxq->stats.ibytes;
+			tmp.q_errors[idx] += (rxq->stats.idropped +
+					      rxq->stats.rx_nombuf);
+		}
+		tmp.ipackets += rxq->stats.ipackets;
+		tmp.ibytes += rxq->stats.ibytes;
+		tmp.ierrors += rxq->stats.idropped;
+		tmp.rx_nombuf += rxq->stats.rx_nombuf;
+	}
+	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
+		struct txq *txq = dev->data->tx_queues[i];
+
+		if (txq == NULL)
+			continue;
+		idx = txq->stats.idx;
+		if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+			tmp.q_opackets[idx] += txq->stats.opackets;
+			tmp.q_obytes[idx] += txq->stats.obytes;
+			tmp.q_errors[idx] += txq->stats.odropped;
+		}
+		tmp.opackets += txq->stats.opackets;
+		tmp.obytes += txq->stats.obytes;
+		tmp.oerrors += txq->stats.odropped;
+	}
+	*stats = tmp;
+	return 0;
+}
+
+/**
+ * DPDK callback to clear device statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+void
+mlx4_stats_reset(struct rte_eth_dev *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i != dev->data->nb_rx_queues; ++i) {
+		struct rxq *rxq = dev->data->rx_queues[i];
+
+		if (rxq)
+			rxq->stats = (struct mlx4_rxq_stats){
+				.idx = rxq->stats.idx,
+			};
+	}
+	for (i = 0; i != dev->data->nb_tx_queues; ++i) {
+		struct txq *txq = dev->data->tx_queues[i];
+
+		if (txq)
+			txq->stats = (struct mlx4_txq_stats){
+				.idx = txq->stats.idx,
+			};
+	}
+}
+
+/**
+ * DPDK callback to retrieve physical link information.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param wait_to_complete
+ *   Wait for request completion (ignored).
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete)
+{
+	const struct priv *priv = dev->data->dev_private;
+	struct ethtool_cmd edata = {
+		.cmd = ETHTOOL_GSET,
+	};
+	struct ifreq ifr;
+	struct rte_eth_link dev_link;
+	int link_speed = 0;
+
+	if (priv == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	(void)wait_to_complete;
+	if (mlx4_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
+		WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(rte_errno));
+		return -rte_errno;
+	}
+	memset(&dev_link, 0, sizeof(dev_link));
+	dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
+				(ifr.ifr_flags & IFF_RUNNING));
+	ifr.ifr_data = (void *)&edata;
+	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
+		WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
+		     strerror(rte_errno));
+		return -rte_errno;
+	}
+	link_speed = ethtool_cmd_speed(&edata);
+	if (link_speed == -1)
+		dev_link.link_speed = 0;
+	else
+		dev_link.link_speed = link_speed;
+	dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
+				ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
+	dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+				  ETH_LINK_SPEED_FIXED);
+	dev->data->dev_link = dev_link;
+	return 0;
+}
+
+/**
+ * DPDK callback to get flow control status.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] fc_conf
+ *   Flow control output buffer.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct ifreq ifr;
+	struct ethtool_pauseparam ethpause = {
+		.cmd = ETHTOOL_GPAUSEPARAM,
+	};
+	int ret;
+
+	ifr.ifr_data = (void *)&ethpause;
+	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
+		ret = rte_errno;
+		WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)"
+		     " failed: %s",
+		     strerror(rte_errno));
+		goto out;
+	}
+	fc_conf->autoneg = ethpause.autoneg;
+	if (ethpause.rx_pause && ethpause.tx_pause)
+		fc_conf->mode = RTE_FC_FULL;
+	else if (ethpause.rx_pause)
+		fc_conf->mode = RTE_FC_RX_PAUSE;
+	else if (ethpause.tx_pause)
+		fc_conf->mode = RTE_FC_TX_PAUSE;
+	else
+		fc_conf->mode = RTE_FC_NONE;
+	ret = 0;
+out:
+	assert(ret >= 0);
+	return -ret;
+}
+
+/**
+ * DPDK callback to modify flow control parameters.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[in] fc_conf
+ *   Flow control parameters.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct ifreq ifr;
+	struct ethtool_pauseparam ethpause = {
+		.cmd = ETHTOOL_SPAUSEPARAM,
+	};
+	int ret;
+
+	ifr.ifr_data = (void *)&ethpause;
+	ethpause.autoneg = fc_conf->autoneg;
+	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
+	    (fc_conf->mode & RTE_FC_RX_PAUSE))
+		ethpause.rx_pause = 1;
+	else
+		ethpause.rx_pause = 0;
+	if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
+	    (fc_conf->mode & RTE_FC_TX_PAUSE))
+		ethpause.tx_pause = 1;
+	else
+		ethpause.tx_pause = 0;
+	if (mlx4_ifreq(priv, SIOCETHTOOL, &ifr)) {
+		ret = rte_errno;
+		WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
+		     " failed: %s",
+		     strerror(rte_errno));
+		goto out;
+	}
+	ret = 0;
+out:
+	assert(ret >= 0);
+	return -ret;
+}
+
+/**
+ * DPDK callback to retrieve the received packet types that are recognized
+ * by the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   Pointer to an array of recognized packet types if in Rx burst mode,
+ *   NULL otherwise.
+ */
+const uint32_t *
+mlx4_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+	static const uint32_t ptypes[] = {
+		/* refers to rxq_cq_to_pkt_type() */
+		RTE_PTYPE_L2_ETHER,
+		RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
+		RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
+		RTE_PTYPE_L4_FRAG,
+		RTE_PTYPE_L4_TCP,
+		RTE_PTYPE_L4_UDP,
+		RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
+		RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
+		RTE_PTYPE_UNKNOWN
+	};
+
+	if (dev->rx_pkt_burst == mlx4_rx_burst)
+		return ptypes;
+	return NULL;
+}
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 925c89c5..8b87b298 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -2,7 +2,7 @@
  *   BSD LICENSE
  *
  *   Copyright 2017 6WIND S.A.
- *   Copyright 2017 Mellanox.
+ *   Copyright 2017 Mellanox
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -31,197 +31,328 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+/**
+ * @file
+ * Flow API operations for mlx4 driver.
+ */
+
+#include <arpa/inet.h>
 #include <assert.h>
+#include <errno.h>
+#include <stdalign.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/queue.h>
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
 
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+#include <rte_eth_ctrl.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
 #include <rte_malloc.h>
 
-/* Generated configuration header. */
-#include "mlx4_autoconf.h"
-
 /* PMD headers. */
 #include "mlx4.h"
 #include "mlx4_flow.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
 
-/** Static initializer for items. */
-#define ITEMS(...) \
+/** Static initializer for a list of subsequent item types. */
+#define NEXT_ITEM(...) \
 	(const enum rte_flow_item_type []){ \
 		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
 	}
 
-/** Structure to generate a simple graph of layers supported by the NIC. */
-struct mlx4_flow_items {
-	/** List of possible actions for these items. */
-	const enum rte_flow_action_type *const actions;
-	/** Bit-masks corresponding to the possibilities for the item. */
-	const void *mask;
-	/**
-	 * Default bit-masks to use when item->mask is not provided. When
-	 * \default_mask is also NULL, the full supported bit-mask (\mask) is
-	 * used instead.
-	 */
-	const void *default_mask;
-	/** Bit-masks size in bytes. */
+/** Processor structure associated with a flow item. */
+struct mlx4_flow_proc_item {
+	/** Bit-mask for fields supported by this PMD. */
+	const void *mask_support;
+	/** Bit-mask to use when @p item->mask is not provided. */
+	const void *mask_default;
+	/** Size in bytes for @p mask_support and @p mask_default. */
 	const unsigned int mask_sz;
-	/**
-	 * Check support for a given item.
-	 *
-	 * @param item[in]
-	 *   Item specification.
-	 * @param mask[in]
-	 *   Bit-masks covering supported fields to compare with spec,
-	 *   last and mask in
-	 *   \item.
-	 * @param size
-	 *   Bit-Mask size in bytes.
-	 *
-	 * @return
-	 *   0 on success, negative value otherwise.
-	 */
-	int (*validate)(const struct rte_flow_item *item,
-			const uint8_t *mask, unsigned int size);
-	/**
-	 * Conversion function from rte_flow to NIC specific flow.
-	 *
-	 * @param item
-	 *   rte_flow item to convert.
-	 * @param default_mask
-	 *   Default bit-masks to use when item->mask is not provided.
-	 * @param data
-	 *   Internal structure to store the conversion.
-	 *
-	 * @return
-	 *   0 on success, negative value otherwise.
-	 */
-	int (*convert)(const struct rte_flow_item *item,
-		       const void *default_mask,
-		       void *data);
+	/** Merge a pattern item into a flow rule handle. */
+	int (*merge)(struct rte_flow *flow,
+		     const struct rte_flow_item *item,
+		     const struct mlx4_flow_proc_item *proc,
+		     struct rte_flow_error *error);
 	/** Size in bytes of the destination structure. */
 	const unsigned int dst_sz;
-	/** List of possible following items.  */
-	const enum rte_flow_item_type *const items;
+	/** List of possible subsequent items. */
+	const enum rte_flow_item_type *const next_item;
 };
 
-struct rte_flow_drop {
-	struct ibv_qp *qp; /**< Verbs queue pair. */
-	struct ibv_cq *cq; /**< Verbs completion queue. */
+/** Shared resources for drop flow rules. */
+struct mlx4_drop {
+	struct ibv_qp *qp; /**< QP target. */
+	struct ibv_cq *cq; /**< CQ associated with above QP. */
+	struct priv *priv; /**< Back pointer to private data. */
+	uint32_t refcnt; /**< Reference count. */
 };
 
-/** Valid action for this PMD. */
-static const enum rte_flow_action_type valid_actions[] = {
-	RTE_FLOW_ACTION_TYPE_DROP,
-	RTE_FLOW_ACTION_TYPE_QUEUE,
-	RTE_FLOW_ACTION_TYPE_RSS,
-	RTE_FLOW_ACTION_TYPE_END,
-};
+/**
+ * Convert DPDK RSS hash fields to their Verbs equivalent.
+ *
+ * @param rss_hf
+ *   Hash fields in DPDK format (see struct rte_eth_rss_conf).
+ *
+ * @return
+ *   A valid Verbs RSS hash fields mask for mlx4 on success, (uint64_t)-1
+ *   otherwise and rte_errno is set.
+ */
+static uint64_t
+mlx4_conv_rss_hf(uint64_t rss_hf)
+{
+	enum { IPV4, IPV6, TCP, UDP, };
+	const uint64_t in[] = {
+		[IPV4] = (ETH_RSS_IPV4 |
+			  ETH_RSS_FRAG_IPV4 |
+			  ETH_RSS_NONFRAG_IPV4_TCP |
+			  ETH_RSS_NONFRAG_IPV4_UDP |
+			  ETH_RSS_NONFRAG_IPV4_OTHER),
+		[IPV6] = (ETH_RSS_IPV6 |
+			  ETH_RSS_FRAG_IPV6 |
+			  ETH_RSS_NONFRAG_IPV6_TCP |
+			  ETH_RSS_NONFRAG_IPV6_UDP |
+			  ETH_RSS_NONFRAG_IPV6_OTHER |
+			  ETH_RSS_IPV6_EX |
+			  ETH_RSS_IPV6_TCP_EX |
+			  ETH_RSS_IPV6_UDP_EX),
+		[TCP] = (ETH_RSS_NONFRAG_IPV4_TCP |
+			 ETH_RSS_NONFRAG_IPV6_TCP |
+			 ETH_RSS_IPV6_TCP_EX),
+		/*
+		 * UDP support is temporarily disabled due to an
+		 * implementation issue in the kernel.
+		 */
+		[UDP] = 0,
+	};
+	const uint64_t out[RTE_DIM(in)] = {
+		[IPV4] = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
+		[IPV6] = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6,
+		[TCP] = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP,
+		[UDP] = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP,
+	};
+	uint64_t seen = 0;
+	uint64_t conv = 0;
+	unsigned int i;
+
+	for (i = 0; i != RTE_DIM(in); ++i)
+		if (rss_hf & in[i]) {
+			seen |= rss_hf & in[i];
+			conv |= out[i];
+		}
+	if (!(rss_hf & ~seen))
+		return conv;
+	rte_errno = ENOTSUP;
+	return (uint64_t)-1;
+}
 
 /**
- * Convert Ethernet item to Verbs specification.
+ * Merge Ethernet pattern item into flow rule handle.
  *
- * @param item[in]
- *   Item specification.
- * @param default_mask[in]
- *   Default bit-masks to use when item->mask is not provided.
- * @param data[in, out]
- *   User structure.
+ * Additional mlx4-specific constraints on supported fields:
+ *
+ * - No support for partial masks, except in the specific case of matching
+ *   all multicast traffic (@p spec->dst and @p mask->dst equal to
+ *   01:00:00:00:00:00).
+ * - Not providing @p item->spec or providing an empty @p mask->dst is
+ *   *only* supported if the rule doesn't specify additional matching
+ *   criteria (i.e. rule is promiscuous-like).
+ *
+ * @param[in, out] flow
+ *   Flow rule handle to update.
+ * @param[in] item
+ *   Pattern item to merge.
+ * @param[in] proc
+ *   Associated item-processing object.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_flow_create_eth(const struct rte_flow_item *item,
-		     const void *default_mask,
-		     void *data)
+mlx4_flow_merge_eth(struct rte_flow *flow,
+		    const struct rte_flow_item *item,
+		    const struct mlx4_flow_proc_item *proc,
+		    struct rte_flow_error *error)
 {
 	const struct rte_flow_item_eth *spec = item->spec;
-	const struct rte_flow_item_eth *mask = item->mask;
-	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	const struct rte_flow_item_eth *mask =
+		spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
 	struct ibv_flow_spec_eth *eth;
-	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+	const char *msg;
 	unsigned int i;
 
+	if (!mask) {
+		flow->promisc = 1;
+	} else {
+		uint32_t sum_dst = 0;
+		uint32_t sum_src = 0;
+
+		for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) {
+			sum_dst += mask->dst.addr_bytes[i];
+			sum_src += mask->src.addr_bytes[i];
+		}
+		if (sum_src) {
+			msg = "mlx4 does not support source MAC matching";
+			goto error;
+		} else if (!sum_dst) {
+			flow->promisc = 1;
+		} else if (sum_dst == 1 && mask->dst.addr_bytes[0] == 1) {
+			if (!(spec->dst.addr_bytes[0] & 1)) {
+				msg = "mlx4 does not support the explicit"
+					" exclusion of all multicast traffic";
+				goto error;
+			}
+			flow->allmulti = 1;
+		} else if (sum_dst != (UINT8_C(0xff) * ETHER_ADDR_LEN)) {
+			msg = "mlx4 does not support matching partial"
+				" Ethernet fields";
+			goto error;
+		}
+	}
+	if (!flow->ibv_attr)
+		return 0;
+	if (flow->promisc) {
+		flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
+		return 0;
+	}
+	if (flow->allmulti) {
+		flow->ibv_attr->type = IBV_FLOW_ATTR_MC_DEFAULT;
+		return 0;
+	}
 	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 2;
-	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
 	*eth = (struct ibv_flow_spec_eth) {
 		.type = IBV_FLOW_SPEC_ETH,
-		.size = eth_size,
+		.size = sizeof(*eth),
 	};
-	if (!spec) {
-		flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
-		return 0;
-	}
-	if (!mask)
-		mask = default_mask;
 	memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
-	memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
 	memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
-	memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
 	/* Remove unwanted bits from values. */
 	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
 		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
-		eth->val.src_mac[i] &= eth->mask.src_mac[i];
 	}
 	return 0;
+error:
+	return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+				  item, msg);
 }
 
 /**
- * Convert VLAN item to Verbs specification.
+ * Merge VLAN pattern item into flow rule handle.
  *
- * @param item[in]
- *   Item specification.
- * @param default_mask[in]
- *   Default bit-masks to use when item->mask is not provided.
- * @param data[in, out]
- *   User structure.
+ * Additional mlx4-specific constraints on supported fields:
+ *
+ * - Matching *all* VLAN traffic by omitting @p item->spec or providing an
+ *   empty @p item->mask would also include non-VLAN traffic. Doing so is
+ *   therefore unsupported.
+ * - No support for partial masks.
+ *
+ * @param[in, out] flow
+ *   Flow rule handle to update.
+ * @param[in] item
+ *   Pattern item to merge.
+ * @param[in] proc
+ *   Associated item-processing object.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_flow_create_vlan(const struct rte_flow_item *item,
-		      const void *default_mask,
-		      void *data)
+mlx4_flow_merge_vlan(struct rte_flow *flow,
+		     const struct rte_flow_item *item,
+		     const struct mlx4_flow_proc_item *proc,
+		     struct rte_flow_error *error)
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
-	const struct rte_flow_item_vlan *mask = item->mask;
-	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	const struct rte_flow_item_vlan *mask =
+		spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
 	struct ibv_flow_spec_eth *eth;
-	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+	const char *msg;
 
-	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
-	if (!spec)
+	if (!mask || !mask->tci) {
+		msg = "mlx4 cannot match all VLAN traffic while excluding"
+			" non-VLAN traffic, TCI VID must be specified";
+		goto error;
+	}
+	if (mask->tci != RTE_BE16(0x0fff)) {
+		msg = "mlx4 does not support partial TCI VID matching";
+		goto error;
+	}
+	if (!flow->ibv_attr)
 		return 0;
-	if (!mask)
-		mask = default_mask;
+	eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
+		       sizeof(*eth));
 	eth->val.vlan_tag = spec->tci;
 	eth->mask.vlan_tag = mask->tci;
 	eth->val.vlan_tag &= eth->mask.vlan_tag;
 	return 0;
+error:
+	return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+				  item, msg);
 }
 
 /**
- * Convert IPv4 item to Verbs specification.
+ * Merge IPv4 pattern item into flow rule handle.
  *
- * @param item[in]
- *   Item specification.
- * @param default_mask[in]
- *   Default bit-masks to use when item->mask is not provided.
- * @param data[in, out]
- *   User structure.
+ * Additional mlx4-specific constraints on supported fields:
+ *
+ * - No support for partial masks.
+ *
+ * @param[in, out] flow
+ *   Flow rule handle to update.
+ * @param[in] item
+ *   Pattern item to merge.
+ * @param[in] proc
+ *   Associated item-processing object.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_flow_create_ipv4(const struct rte_flow_item *item,
-		      const void *default_mask,
-		      void *data)
+mlx4_flow_merge_ipv4(struct rte_flow *flow,
+		     const struct rte_flow_item *item,
+		     const struct mlx4_flow_proc_item *proc,
+		     struct rte_flow_error *error)
 {
 	const struct rte_flow_item_ipv4 *spec = item->spec;
-	const struct rte_flow_item_ipv4 *mask = item->mask;
-	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	const struct rte_flow_item_ipv4 *mask =
+		spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
 	struct ibv_flow_spec_ipv4 *ipv4;
-	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
+	const char *msg;
 
+	if (mask &&
+	    ((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) ||
+	     (uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) {
+		msg = "mlx4 does not support matching partial IPv4 fields";
+		goto error;
+	}
+	if (!flow->ibv_attr)
+		return 0;
 	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 1;
-	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
 	*ipv4 = (struct ibv_flow_spec_ipv4) {
 		.type = IBV_FLOW_SPEC_IPV4,
-		.size = ipv4_size,
+		.size = sizeof(*ipv4),
 	};
 	if (!spec)
 		return 0;
@@ -229,8 +360,6 @@ mlx4_flow_create_ipv4(const struct rte_flow_item *item,
 		.src_ip = spec->hdr.src_addr,
 		.dst_ip = spec->hdr.dst_addr,
 	};
-	if (!mask)
-		mask = default_mask;
 	ipv4->mask = (struct ibv_flow_ipv4_filter) {
 		.src_ip = mask->hdr.src_addr,
 		.dst_ip = mask->hdr.dst_addr,
@@ -239,528 +368,504 @@ mlx4_flow_create_ipv4(const struct rte_flow_item *item,
 	ipv4->val.src_ip &= ipv4->mask.src_ip;
 	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
 	return 0;
+error:
+	return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+				  item, msg);
 }
 
 /**
- * Convert UDP item to Verbs specification.
+ * Merge UDP pattern item into flow rule handle.
  *
- * @param item[in]
- *   Item specification.
- * @param default_mask[in]
- *   Default bit-masks to use when item->mask is not provided.
- * @param data[in, out]
- *   User structure.
+ * Additional mlx4-specific constraints on supported fields:
+ *
+ * - No support for partial masks.
+ *
+ * @param[in, out] flow
+ *   Flow rule handle to update.
+ * @param[in] item
+ *   Pattern item to merge.
+ * @param[in] proc
+ *   Associated item-processing object.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_flow_create_udp(const struct rte_flow_item *item,
-		     const void *default_mask,
-		     void *data)
+mlx4_flow_merge_udp(struct rte_flow *flow,
+		    const struct rte_flow_item *item,
+		    const struct mlx4_flow_proc_item *proc,
+		    struct rte_flow_error *error)
 {
 	const struct rte_flow_item_udp *spec = item->spec;
-	const struct rte_flow_item_udp *mask = item->mask;
-	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	const struct rte_flow_item_udp *mask =
+		spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
 	struct ibv_flow_spec_tcp_udp *udp;
-	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+	const char *msg;
 
+	if (mask &&
+	    ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
+	     (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
+		msg = "mlx4 does not support matching partial UDP fields";
+		goto error;
+	}
+	if (!flow->ibv_attr)
+		return 0;
 	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
-	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
 	*udp = (struct ibv_flow_spec_tcp_udp) {
 		.type = IBV_FLOW_SPEC_UDP,
-		.size = udp_size,
+		.size = sizeof(*udp),
 	};
 	if (!spec)
 		return 0;
 	udp->val.dst_port = spec->hdr.dst_port;
 	udp->val.src_port = spec->hdr.src_port;
-	if (!mask)
-		mask = default_mask;
 	udp->mask.dst_port = mask->hdr.dst_port;
 	udp->mask.src_port = mask->hdr.src_port;
 	/* Remove unwanted bits from values. */
 	udp->val.src_port &= udp->mask.src_port;
 	udp->val.dst_port &= udp->mask.dst_port;
 	return 0;
+error:
+	return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+				  item, msg);
 }
 
 /**
- * Convert TCP item to Verbs specification.
+ * Merge TCP pattern item into flow rule handle.
  *
- * @param item[in]
- *   Item specification.
- * @param default_mask[in]
- *   Default bit-masks to use when item->mask is not provided.
- * @param data[in, out]
- *   User structure.
+ * Additional mlx4-specific constraints on supported fields:
+ *
+ * - No support for partial masks.
+ *
+ * @param[in, out] flow
+ *   Flow rule handle to update.
+ * @param[in] item
+ *   Pattern item to merge.
+ * @param[in] proc
+ *   Associated item-processing object.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_flow_create_tcp(const struct rte_flow_item *item,
-		     const void *default_mask,
-		     void *data)
+mlx4_flow_merge_tcp(struct rte_flow *flow,
+		    const struct rte_flow_item *item,
+		    const struct mlx4_flow_proc_item *proc,
+		    struct rte_flow_error *error)
 {
 	const struct rte_flow_item_tcp *spec = item->spec;
-	const struct rte_flow_item_tcp *mask = item->mask;
-	struct mlx4_flow *flow = (struct mlx4_flow *)data;
+	const struct rte_flow_item_tcp *mask =
+		spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
 	struct ibv_flow_spec_tcp_udp *tcp;
-	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+	const char *msg;
 
+	if (mask &&
+	    ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
+	     (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
+		msg = "mlx4 does not support matching partial TCP fields";
+		goto error;
+	}
+	if (!flow->ibv_attr)
+		return 0;
 	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
-	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
 	*tcp = (struct ibv_flow_spec_tcp_udp) {
 		.type = IBV_FLOW_SPEC_TCP,
-		.size = tcp_size,
+		.size = sizeof(*tcp),
 	};
 	if (!spec)
 		return 0;
 	tcp->val.dst_port = spec->hdr.dst_port;
 	tcp->val.src_port = spec->hdr.src_port;
-	if (!mask)
-		mask = default_mask;
 	tcp->mask.dst_port = mask->hdr.dst_port;
 	tcp->mask.src_port = mask->hdr.src_port;
 	/* Remove unwanted bits from values. */
 	tcp->val.src_port &= tcp->mask.src_port;
 	tcp->val.dst_port &= tcp->mask.dst_port;
 	return 0;
+error:
+	return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+				  item, msg);
 }
 
 /**
- * Check support for a given item.
+ * Perform basic sanity checks on a pattern item.
  *
- * @param item[in]
+ * @param[in] item
  *   Item specification.
- * @param mask[in]
- *   Bit-masks covering supported fields to compare with spec, last and mask in
- *   \item.
- * @param size
- *   Bit-Mask size in bytes.
+ * @param[in] proc
+ *   Associated item-processing object.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   0 on success, negative value otherwise.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx4_flow_item_validate(const struct rte_flow_item *item,
-			const uint8_t *mask, unsigned int size)
+mlx4_flow_item_check(const struct rte_flow_item *item,
+		     const struct mlx4_flow_proc_item *proc,
+		     struct rte_flow_error *error)
 {
-	int ret = 0;
+	const uint8_t *mask;
+	unsigned int i;
 
+	/* item->last and item->mask cannot exist without item->spec. */
 	if (!item->spec && (item->mask || item->last))
-		return -1;
-	if (item->spec && !item->mask) {
-		unsigned int i;
-		const uint8_t *spec = item->spec;
-
-		for (i = 0; i < size; ++i)
-			if ((spec[i] | mask[i]) != mask[i])
-				return -1;
-	}
-	if (item->last && !item->mask) {
-		unsigned int i;
-		const uint8_t *spec = item->last;
-
-		for (i = 0; i < size; ++i)
-			if ((spec[i] | mask[i]) != mask[i])
-				return -1;
-	}
-	if (item->spec && item->last) {
-		uint8_t spec[size];
-		uint8_t last[size];
-		const uint8_t *apply = mask;
-		unsigned int i;
-
-		if (item->mask)
-			apply = item->mask;
-		for (i = 0; i < size; ++i) {
-			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
-			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
-		}
-		ret = memcmp(spec, last, size);
-	}
-	return ret;
-}
-
-static int
-mlx4_flow_validate_eth(const struct rte_flow_item *item,
-		       const uint8_t *mask, unsigned int size)
-{
-	if (item->mask) {
-		const struct rte_flow_item_eth *mask = item->mask;
-
-		if (mask->dst.addr_bytes[0] != 0xff ||
-				mask->dst.addr_bytes[1] != 0xff ||
-				mask->dst.addr_bytes[2] != 0xff ||
-				mask->dst.addr_bytes[3] != 0xff ||
-				mask->dst.addr_bytes[4] != 0xff ||
-				mask->dst.addr_bytes[5] != 0xff)
-			return -1;
-	}
-	return mlx4_flow_item_validate(item, mask, size);
-}
-
-static int
-mlx4_flow_validate_vlan(const struct rte_flow_item *item,
-			const uint8_t *mask, unsigned int size)
-{
-	if (item->mask) {
-		const struct rte_flow_item_vlan *mask = item->mask;
-
-		if (mask->tci != 0 &&
-		    ntohs(mask->tci) != 0x0fff)
-			return -1;
-	}
-	return mlx4_flow_item_validate(item, mask, size);
-}
-
-static int
-mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
-			const uint8_t *mask, unsigned int size)
-{
-	if (item->mask) {
-		const struct rte_flow_item_ipv4 *mask = item->mask;
-
-		if (mask->hdr.src_addr != 0 &&
-		    mask->hdr.src_addr != 0xffffffff)
-			return -1;
-		if (mask->hdr.dst_addr != 0 &&
-		    mask->hdr.dst_addr != 0xffffffff)
-			return -1;
-	}
-	return mlx4_flow_item_validate(item, mask, size);
-}
-
-static int
-mlx4_flow_validate_udp(const struct rte_flow_item *item,
-		       const uint8_t *mask, unsigned int size)
-{
-	if (item->mask) {
-		const struct rte_flow_item_udp *mask = item->mask;
-
-		if (mask->hdr.src_port != 0 &&
-		    mask->hdr.src_port != 0xffff)
-			return -1;
-		if (mask->hdr.dst_port != 0 &&
-		    mask->hdr.dst_port != 0xffff)
-			return -1;
-	}
-	return mlx4_flow_item_validate(item, mask, size);
-}
-
-static int
-mlx4_flow_validate_tcp(const struct rte_flow_item *item,
-		       const uint8_t *mask, unsigned int size)
-{
-	if (item->mask) {
-		const struct rte_flow_item_tcp *mask = item->mask;
-
-		if (mask->hdr.src_port != 0 &&
-		    mask->hdr.src_port != 0xffff)
-			return -1;
-		if (mask->hdr.dst_port != 0 &&
-		    mask->hdr.dst_port != 0xffff)
-			return -1;
+		return rte_flow_error_set
+			(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
+			 "\"mask\" or \"last\" field provided without a"
+			 " corresponding \"spec\"");
+	/* No spec, no mask, no problem. */
+	if (!item->spec)
+		return 0;
+	mask = item->mask ?
+		(const uint8_t *)item->mask :
+		(const uint8_t *)proc->mask_default;
+	assert(mask);
+	/*
+	 * Single-pass check to make sure that:
+	 * - Mask is supported, no bits are set outside proc->mask_support.
+	 * - Both item->spec and item->last are included in mask.
+	 */
+	for (i = 0; i != proc->mask_sz; ++i) {
+		if (!mask[i])
+			continue;
+		if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) !=
+		    ((const uint8_t *)proc->mask_support)[i])
+			return rte_flow_error_set
+				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+				 item, "unsupported field found in \"mask\"");
+		if (item->last &&
+		    (((const uint8_t *)item->spec)[i] & mask[i]) !=
+		    (((const uint8_t *)item->last)[i] & mask[i]))
+			return rte_flow_error_set
+				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+				 item,
+				 "range between \"spec\" and \"last\""
+				 " is larger than \"mask\"");
 	}
-	return mlx4_flow_item_validate(item, mask, size);
+	return 0;
 }
 
 /** Graph of supported items and associated actions. */
-static const struct mlx4_flow_items mlx4_flow_items[] = {
+static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
 	[RTE_FLOW_ITEM_TYPE_END] = {
-		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
+		.next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
 	},
 	[RTE_FLOW_ITEM_TYPE_ETH] = {
-		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
-			       RTE_FLOW_ITEM_TYPE_IPV4),
-		.actions = valid_actions,
-		.mask = &(const struct rte_flow_item_eth){
+		.next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
+				       RTE_FLOW_ITEM_TYPE_IPV4),
+		.mask_support = &(const struct rte_flow_item_eth){
+			/* Only destination MAC can be matched. */
 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
 		},
-		.default_mask = &rte_flow_item_eth_mask,
+		.mask_default = &rte_flow_item_eth_mask,
 		.mask_sz = sizeof(struct rte_flow_item_eth),
-		.validate = mlx4_flow_validate_eth,
-		.convert = mlx4_flow_create_eth,
+		.merge = mlx4_flow_merge_eth,
 		.dst_sz = sizeof(struct ibv_flow_spec_eth),
 	},
 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
-		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4),
-		.actions = valid_actions,
-		.mask = &(const struct rte_flow_item_vlan){
-		/* rte_flow_item_vlan_mask is invalid for mlx4. */
-#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
-			.tci = 0x0fff,
-#else
-			.tci = 0xff0f,
-#endif
+		.next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
+		.mask_support = &(const struct rte_flow_item_vlan){
+			/* Only TCI VID matching is supported. */
+			.tci = RTE_BE16(0x0fff),
 		},
+		.mask_default = &rte_flow_item_vlan_mask,
 		.mask_sz = sizeof(struct rte_flow_item_vlan),
-		.validate = mlx4_flow_validate_vlan,
-		.convert = mlx4_flow_create_vlan,
+		.merge = mlx4_flow_merge_vlan,
 		.dst_sz = 0,
 	},
 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
-		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
-			       RTE_FLOW_ITEM_TYPE_TCP),
-		.actions = valid_actions,
-		.mask = &(const struct rte_flow_item_ipv4){
+		.next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
+				       RTE_FLOW_ITEM_TYPE_TCP),
+		.mask_support = &(const struct rte_flow_item_ipv4){
 			.hdr = {
-				.src_addr = -1,
-				.dst_addr = -1,
+				.src_addr = RTE_BE32(0xffffffff),
+				.dst_addr = RTE_BE32(0xffffffff),
 			},
 		},
-		.default_mask = &rte_flow_item_ipv4_mask,
+		.mask_default = &rte_flow_item_ipv4_mask,
 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
-		.validate = mlx4_flow_validate_ipv4,
-		.convert = mlx4_flow_create_ipv4,
+		.merge = mlx4_flow_merge_ipv4,
 		.dst_sz = sizeof(struct ibv_flow_spec_ipv4),
 	},
 	[RTE_FLOW_ITEM_TYPE_UDP] = {
-		.actions = valid_actions,
-		.mask = &(const struct rte_flow_item_udp){
+		.mask_support = &(const struct rte_flow_item_udp){
 			.hdr = {
-				.src_port = -1,
-				.dst_port = -1,
+				.src_port = RTE_BE16(0xffff),
+				.dst_port = RTE_BE16(0xffff),
 			},
 		},
-		.default_mask = &rte_flow_item_udp_mask,
+		.mask_default = &rte_flow_item_udp_mask,
 		.mask_sz = sizeof(struct rte_flow_item_udp),
-		.validate = mlx4_flow_validate_udp,
-		.convert = mlx4_flow_create_udp,
+		.merge = mlx4_flow_merge_udp,
 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
 	},
 	[RTE_FLOW_ITEM_TYPE_TCP] = {
-		.actions = valid_actions,
-		.mask = &(const struct rte_flow_item_tcp){
+		.mask_support = &(const struct rte_flow_item_tcp){
 			.hdr = {
-				.src_port = -1,
-				.dst_port = -1,
+				.src_port = RTE_BE16(0xffff),
+				.dst_port = RTE_BE16(0xffff),
 			},
 		},
-		.default_mask = &rte_flow_item_tcp_mask,
+		.mask_default = &rte_flow_item_tcp_mask,
 		.mask_sz = sizeof(struct rte_flow_item_tcp),
-		.validate = mlx4_flow_validate_tcp,
-		.convert = mlx4_flow_create_tcp,
+		.merge = mlx4_flow_merge_tcp,
 		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
 	},
 };
 
 /**
- * Validate a flow supported by the NIC.
+ * Make sure a flow rule is supported and initialize associated structure.
  *
  * @param priv
  *   Pointer to private structure.
  * @param[in] attr
  *   Flow rule attributes.
- * @param[in] items
+ * @param[in] pattern
  *   Pattern specification (list terminated by the END pattern item).
  * @param[in] actions
  *   Associated actions (list terminated by the END action).
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
- * @param[in, out] flow
- *   Flow structure to update.
+ * @param[in, out] addr
+ *   Buffer where the resulting flow rule handle pointer must be stored.
+ *   If NULL, stop processing after validation stage.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_validate(struct priv *priv,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error,
-		   struct mlx4_flow *flow)
+mlx4_flow_prepare(struct priv *priv,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item pattern[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error,
+		  struct rte_flow **addr)
 {
-	const struct mlx4_flow_items *cur_item = mlx4_flow_items;
-	struct mlx4_flow_action action = {
-		.queue = 0,
-		.drop = 0,
-	};
-
-	(void)priv;
-	if (attr->group) {
-		rte_flow_error_set(error, ENOTSUP,
-				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
-				   NULL,
-				   "groups are not supported");
-		return -rte_errno;
-	}
-	if (attr->priority) {
-		rte_flow_error_set(error, ENOTSUP,
-				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
-				   NULL,
-				   "priorities are not supported");
-		return -rte_errno;
-	}
-	if (attr->egress) {
-		rte_flow_error_set(error, ENOTSUP,
-				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
-				   NULL,
-				   "egress is not supported");
-		return -rte_errno;
-	}
-	if (!attr->ingress) {
-		rte_flow_error_set(error, ENOTSUP,
-				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
-				   NULL,
-				   "only ingress is supported");
-		return -rte_errno;
-	}
-	/* Go over items list. */
-	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
-		const struct mlx4_flow_items *token = NULL;
+	const struct rte_flow_item *item;
+	const struct rte_flow_action *action;
+	const struct mlx4_flow_proc_item *proc;
+	struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
+	struct rte_flow *flow = &temp;
+	const char *msg = NULL;
+
+	if (attr->group)
+		return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+			 NULL, "groups are not supported");
+	if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
+		return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+			 NULL, "maximum priority level is "
+			 MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
+	if (attr->egress)
+		return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+			 NULL, "egress is not supported");
+	if (!attr->ingress)
+		return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+			 NULL, "only ingress is supported");
+fill:
+	proc = mlx4_flow_proc_item_list;
+	/* Go over pattern. */
+	for (item = pattern; item->type; ++item) {
+		const struct mlx4_flow_proc_item *next = NULL;
 		unsigned int i;
 		int err;
 
-		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+		if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
+			flow->internal = 1;
 			continue;
-		/*
-		 * The nic can support patterns with NULL eth spec only
-		 * if eth is a single item in a rule.
-		 */
-		if (!items->spec &&
-			items->type == RTE_FLOW_ITEM_TYPE_ETH) {
-			const struct rte_flow_item *next = items + 1;
-
-			if (next->type != RTE_FLOW_ITEM_TYPE_END) {
-				rte_flow_error_set(error, ENOTSUP,
-						   RTE_FLOW_ERROR_TYPE_ITEM,
-						   items,
-						   "the rule requires"
-						   " an Ethernet spec");
-				return -rte_errno;
-			}
 		}
-		for (i = 0;
-		     cur_item->items &&
-		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
-		     ++i) {
-			if (cur_item->items[i] == items->type) {
-				token = &mlx4_flow_items[items->type];
+		if (flow->promisc || flow->allmulti) {
+			msg = "mlx4 does not support additional matching"
+				" criteria combined with indiscriminate"
+				" matching on Ethernet headers";
+			goto exit_item_not_supported;
+		}
+		for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
+			if (proc->next_item[i] == item->type) {
+				next = &mlx4_flow_proc_item_list[item->type];
 				break;
 			}
 		}
-		if (!token)
-			goto exit_item_not_supported;
-		cur_item = token;
-		err = cur_item->validate(items,
-					(const uint8_t *)cur_item->mask,
-					 cur_item->mask_sz);
-		if (err)
+		if (!next)
 			goto exit_item_not_supported;
-		if (flow->ibv_attr && cur_item->convert) {
-			err = cur_item->convert(items,
-						(cur_item->default_mask ?
-						 cur_item->default_mask :
-						 cur_item->mask),
-						 flow);
+		proc = next;
+		/*
+		 * Perform basic sanity checks only once, while handle is
+		 * not allocated.
+		 */
+		if (flow == &temp) {
+			err = mlx4_flow_item_check(item, proc, error);
 			if (err)
-				goto exit_item_not_supported;
+				return err;
 		}
-		flow->offset += cur_item->dst_sz;
+		if (proc->merge) {
+			err = proc->merge(flow, item, proc, error);
+			if (err)
+				return err;
+		}
+		flow->ibv_attr_size += proc->dst_sz;
 	}
-	/* Go over actions list */
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
-		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
-			continue;
-		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			action.drop = 1;
-		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
-			const struct rte_flow_action_queue *queue =
-				(const struct rte_flow_action_queue *)
-				actions->conf;
+	/* Go over actions list. */
+	for (action = actions; action->type; ++action) {
+		switch (action->type) {
+			const struct rte_flow_action_queue *queue;
+			const struct rte_flow_action_rss *rss;
+			const struct rte_eth_rss_conf *rss_conf;
+			unsigned int i;
 
-			if (!queue || (queue->index > (priv->rxqs_n - 1)))
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			continue;
+		case RTE_FLOW_ACTION_TYPE_DROP:
+			flow->drop = 1;
+			break;
+		case RTE_FLOW_ACTION_TYPE_QUEUE:
+			if (flow->rss)
+				break;
+			queue = action->conf;
+			if (queue->index >= priv->dev->data->nb_rx_queues) {
+				msg = "queue target index beyond number of"
+					" configured Rx queues";
 				goto exit_action_not_supported;
-			action.queue = 1;
-			action.queues_n = 1;
-			action.queues[0] = queue->index;
-		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
-			int i;
-			int ierr;
-			const struct rte_flow_action_rss *rss =
-				(const struct rte_flow_action_rss *)
-				actions->conf;
-
-			if (!priv->hw_rss) {
-				rte_flow_error_set(error, ENOTSUP,
-					   RTE_FLOW_ERROR_TYPE_ACTION,
-					   actions,
-					   "RSS cannot be used with "
-					   "the current configuration");
-				return -rte_errno;
 			}
-			if (!priv->isolated) {
-				rte_flow_error_set(error, ENOTSUP,
-					   RTE_FLOW_ERROR_TYPE_ACTION,
-					   actions,
-					   "RSS cannot be used without "
-					   "isolated mode");
-				return -rte_errno;
+			flow->rss = mlx4_rss_get
+				(priv, 0, mlx4_rss_hash_key_default, 1,
+				 &queue->index);
+			if (!flow->rss) {
+				msg = "not enough resources for additional"
+					" single-queue RSS context";
+				goto exit_action_not_supported;
+			}
+			break;
+		case RTE_FLOW_ACTION_TYPE_RSS:
+			if (flow->rss)
+				break;
+			rss = action->conf;
+			/* Default RSS configuration if none is provided. */
+			rss_conf =
+				rss->rss_conf ?
+				rss->rss_conf :
+				&(struct rte_eth_rss_conf){
+					.rss_key = mlx4_rss_hash_key_default,
+					.rss_key_len = MLX4_RSS_HASH_KEY_SIZE,
+					.rss_hf = (ETH_RSS_IPV4 |
+						   ETH_RSS_NONFRAG_IPV4_TCP |
+						   ETH_RSS_IPV6 |
+						   ETH_RSS_NONFRAG_IPV6_TCP),
+				};
+			/* Sanity checks. */
+			for (i = 0; i < rss->num; ++i)
+				if (rss->queue[i] >=
+				    priv->dev->data->nb_rx_queues)
+					break;
+			if (i != rss->num) {
+				msg = "queue index target beyond number of"
+					" configured Rx queues";
+				goto exit_action_not_supported;
 			}
 			if (!rte_is_power_of_2(rss->num)) {
-				rte_flow_error_set(error, ENOTSUP,
-					   RTE_FLOW_ERROR_TYPE_ACTION,
-					   actions,
-					   "the number of queues "
-					   "should be power of two");
-				return -rte_errno;
+				msg = "for RSS, mlx4 requires the number of"
+					" queues to be a power of two";
+				goto exit_action_not_supported;
 			}
-			if (priv->max_rss_tbl_sz < rss->num) {
-				rte_flow_error_set(error, ENOTSUP,
-					   RTE_FLOW_ERROR_TYPE_ACTION,
-					   actions,
-					   "the number of queues "
-					   "is too large");
-				return -rte_errno;
+			if (rss_conf->rss_key_len !=
+			    sizeof(flow->rss->key)) {
+				msg = "mlx4 supports exactly one RSS hash key"
+					" length: "
+					MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE);
+				goto exit_action_not_supported;
 			}
-			/* checking indexes array */
-			ierr = 0;
-			for (i = 0; i < rss->num; ++i) {
-				int j;
-				if (rss->queue[i] >= priv->rxqs_n)
-					ierr = 1;
-				/*
-				 * Prevent the user from specifying
-				 * the same queue twice in the RSS array.
-				 */
-				for (j = i + 1; j < rss->num && !ierr; ++j)
-					if (rss->queue[j] == rss->queue[i])
-						ierr = 1;
-				if (ierr) {
-					rte_flow_error_set(
-						error,
-						ENOTSUP,
-						RTE_FLOW_ERROR_TYPE_HANDLE,
-						NULL,
-						"RSS action only supports "
-						"unique queue indices "
-						"in a list");
-					return -rte_errno;
-				}
+			for (i = 1; i < rss->num; ++i)
+				if (rss->queue[i] - rss->queue[i - 1] != 1)
+					break;
+			if (i != rss->num) {
+				msg = "mlx4 requires RSS contexts to use"
+					" consecutive queue indices only";
+				goto exit_action_not_supported;
 			}
-			action.queue = 1;
-			action.queues_n = rss->num;
-			for (i = 0; i < rss->num; ++i)
-				action.queues[i] = rss->queue[i];
-		} else {
+			if (rss->queue[0] % rss->num) {
+				msg = "mlx4 requires the first queue of a RSS"
+					" context to be aligned on a multiple"
+					" of the context size";
+				goto exit_action_not_supported;
+			}
+			flow->rss = mlx4_rss_get
+				(priv, mlx4_conv_rss_hf(rss_conf->rss_hf),
+				 rss_conf->rss_key, rss->num, rss->queue);
+			if (!flow->rss) {
+				msg = "either invalid parameters or not enough"
+					" resources for additional multi-queue"
+					" RSS context";
+				goto exit_action_not_supported;
+			}
+			break;
+		default:
 			goto exit_action_not_supported;
 		}
 	}
-	if (!action.queue && !action.drop) {
-		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "no valid action");
-		return -rte_errno;
+	if (!flow->rss && !flow->drop)
+		return rte_flow_error_set
+			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "no valid action");
+	/* Validation ends here. */
+	if (!addr) {
+		if (flow->rss)
+			mlx4_rss_put(flow->rss);
+		return 0;
 	}
+	if (flow == &temp) {
+		/* Allocate proper handle based on collected data. */
+		const struct mlx4_malloc_vec vec[] = {
+			{
+				.align = alignof(struct rte_flow),
+				.size = sizeof(*flow),
+				.addr = (void **)&flow,
+			},
+			{
+				.align = alignof(struct ibv_flow_attr),
+				.size = temp.ibv_attr_size,
+				.addr = (void **)&temp.ibv_attr,
+			},
+		};
+
+		if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
+			return rte_flow_error_set
+				(error, -rte_errno,
+				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				 "flow rule handle allocation failure");
+		/* Most fields will be updated by second pass. */
+		*flow = (struct rte_flow){
+			.ibv_attr = temp.ibv_attr,
+			.ibv_attr_size = sizeof(*flow->ibv_attr),
+			.rss = temp.rss,
+		};
+		*flow->ibv_attr = (struct ibv_flow_attr){
+			.type = IBV_FLOW_ATTR_NORMAL,
+			.size = sizeof(*flow->ibv_attr),
+			.priority = attr->priority,
+			.port = priv->port,
+		};
+		goto fill;
+	}
+	*addr = flow;
 	return 0;
 exit_item_not_supported:
-	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
-			   items, "item not supported");
-	return -rte_errno;
+	return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+				  item, msg ? msg : "item not supported");
 exit_action_not_supported:
-	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
-			   actions, "action not supported");
-	return -rte_errno;
+	return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+				  action, msg ? msg : "action not supported");
 }
 
 /**
@@ -769,552 +874,691 @@ exit_action_not_supported:
  * @see rte_flow_validate()
  * @see rte_flow_ops
  */
-int
+static int
 mlx4_flow_validate(struct rte_eth_dev *dev,
 		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
+		   const struct rte_flow_item pattern[],
 		   const struct rte_flow_action actions[],
 		   struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
-	int ret;
-	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
-
-	priv_lock(priv);
-	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
-	priv_unlock(priv);
-	return ret;
-}
-
-/**
- * Destroy a drop queue.
- *
- * @param priv
- *   Pointer to private structure.
- */
-static void
-mlx4_flow_destroy_drop_queue(struct priv *priv)
-{
-	if (priv->flow_drop_queue) {
-		struct rte_flow_drop *fdq = priv->flow_drop_queue;
 
-		priv->flow_drop_queue = NULL;
-		claim_zero(ibv_destroy_qp(fdq->qp));
-		claim_zero(ibv_destroy_cq(fdq->cq));
-		rte_free(fdq);
-	}
+	return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
 }
 
 /**
- * Create a single drop queue for all drop flows.
+ * Get a drop flow rule resources instance.
  *
  * @param priv
  *   Pointer to private structure.
  *
  * @return
- *   0 on success, negative value otherwise.
+ *   Pointer to drop flow resources on success, NULL otherwise and rte_errno
+ *   is set.
  */
-static int
-mlx4_flow_create_drop_queue(struct priv *priv)
+static struct mlx4_drop *
+mlx4_drop_get(struct priv *priv)
 {
-	struct ibv_qp *qp;
-	struct ibv_cq *cq;
-	struct rte_flow_drop *fdq;
+	struct mlx4_drop *drop = priv->drop;
 
-	fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
-	if (!fdq) {
-		ERROR("Cannot allocate memory for drop struct");
-		goto err;
-	}
-	cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
-			      &(struct ibv_exp_cq_init_attr){
-					.comp_mask = 0,
-			      });
-	if (!cq) {
-		ERROR("Cannot create drop CQ");
-		goto err_create_cq;
-	}
-	qp = ibv_exp_create_qp(priv->ctx,
-			      &(struct ibv_exp_qp_init_attr){
-					.send_cq = cq,
-					.recv_cq = cq,
-					.cap = {
-						.max_recv_wr = 1,
-						.max_recv_sge = 1,
-					},
-					.qp_type = IBV_QPT_RAW_PACKET,
-					.comp_mask =
-						IBV_EXP_QP_INIT_ATTR_PD |
-						IBV_EXP_QP_INIT_ATTR_PORT,
-					.pd = priv->pd,
-					.port_num = priv->port,
-			      });
-	if (!qp) {
-		ERROR("Cannot create drop QP");
-		goto err_create_qp;
+	if (drop) {
+		assert(drop->refcnt);
+		assert(drop->priv == priv);
+		++drop->refcnt;
+		return drop;
 	}
-	*fdq = (struct rte_flow_drop){
-		.qp = qp,
-		.cq = cq,
+	drop = rte_malloc(__func__, sizeof(*drop), 0);
+	if (!drop)
+		goto error;
+	*drop = (struct mlx4_drop){
+		.priv = priv,
+		.refcnt = 1,
 	};
-	priv->flow_drop_queue = fdq;
-	return 0;
-err_create_qp:
-	claim_zero(ibv_destroy_cq(cq));
-err_create_cq:
-	rte_free(fdq);
-err:
-	return -1;
+	drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
+	if (!drop->cq)
+		goto error;
+	drop->qp = ibv_create_qp(priv->pd,
+				 &(struct ibv_qp_init_attr){
+					.send_cq = drop->cq,
+					.recv_cq = drop->cq,
+					.qp_type = IBV_QPT_RAW_PACKET,
+				 });
+	if (!drop->qp)
+		goto error;
+	priv->drop = drop;
+	return drop;
+error:
+	if (drop->qp)
+		claim_zero(ibv_destroy_qp(drop->qp));
+	if (drop->cq)
+		claim_zero(ibv_destroy_cq(drop->cq));
+	if (drop)
+		rte_free(drop);
+	rte_errno = ENOMEM;
+	return NULL;
 }
 
 /**
- * Get RSS parent rxq structure for given queues.
+ * Give back a drop flow rule resources instance.
  *
- * Creates a new or returns an existed one.
- *
- * @param priv
- *   Pointer to private structure.
- * @param queues
- *   queues indices array, NULL in default RSS case.
- * @param children_n
- *   the size of queues array.
- *
- * @return
- *   Pointer to a parent rxq structure, NULL on failure.
+ * @param drop
+ *   Pointer to drop flow rule resources.
  */
-static struct rxq *
-priv_parent_get(struct priv *priv,
-		uint16_t queues[],
-		uint16_t children_n,
-		struct rte_flow_error *error)
+static void
+mlx4_drop_put(struct mlx4_drop *drop)
 {
-	unsigned int i;
-	struct rxq *parent;
-
-	for (parent = LIST_FIRST(&priv->parents);
-	     parent;
-	     parent = LIST_NEXT(parent, next)) {
-		unsigned int same = 0;
-		unsigned int overlap = 0;
-
-		/*
-		 * Find out whether an appropriate parent queue already exists
-		 * and can be reused, otherwise make sure there are no overlaps.
-		 */
-		for (i = 0; i < children_n; ++i) {
-			unsigned int j;
-
-			for (j = 0; j < parent->rss.queues_n; ++j) {
-				if (parent->rss.queues[j] != queues[i])
-					continue;
-				++overlap;
-				if (i == j)
-					++same;
-			}
-		}
-		if (same == children_n &&
-			children_n == parent->rss.queues_n)
-			return parent;
-		else if (overlap)
-			goto error;
-	}
-	/* Exclude the cases when some QPs were created without RSS */
-	for (i = 0; i < children_n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[queues[i]];
-		if (rxq->qp)
-			goto error;
-	}
-	parent = priv_parent_create(priv, queues, children_n);
-	if (!parent) {
-		rte_flow_error_set(error,
-				   ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				   NULL, "flow rule creation failure");
-		return NULL;
-	}
-	return parent;
-
-error:
-	rte_flow_error_set(error,
-			   EEXIST,
-			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-			   NULL,
-			   "sharing a queue between several"
-			   " RSS groups is not supported");
-	return NULL;
+	assert(drop->refcnt);
+	if (--drop->refcnt)
+		return;
+	drop->priv->drop = NULL;
+	claim_zero(ibv_destroy_qp(drop->qp));
+	claim_zero(ibv_destroy_cq(drop->cq));
+	rte_free(drop);
 }
 
 /**
- * Complete flow rule creation.
+ * Toggle a configured flow rule.
  *
  * @param priv
  *   Pointer to private structure.
- * @param ibv_attr
- *   Verbs flow attributes.
- * @param action
- *   Target action structure.
+ * @param flow
+ *   Flow rule handle to toggle.
+ * @param enable
+ *   Whether associated Verbs flow must be created or removed.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   A flow if the rule could be created.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static struct rte_flow *
-priv_flow_create_action_queue(struct priv *priv,
-			      struct ibv_flow_attr *ibv_attr,
-			      struct mlx4_flow_action *action,
-			      struct rte_flow_error *error)
+static int
+mlx4_flow_toggle(struct priv *priv,
+		 struct rte_flow *flow,
+		 int enable,
+		 struct rte_flow_error *error)
 {
-	struct ibv_qp *qp;
-	struct rte_flow *rte_flow;
-	struct rxq *rxq_parent = NULL;
+	struct ibv_qp *qp = NULL;
+	const char *msg;
+	int err;
 
-	assert(priv->pd);
-	assert(priv->ctx);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
-	if (!rte_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate flow memory");
-		return NULL;
+	if (!enable) {
+		if (!flow->ibv_flow)
+			return 0;
+		claim_zero(ibv_destroy_flow(flow->ibv_flow));
+		flow->ibv_flow = NULL;
+		if (flow->drop)
+			mlx4_drop_put(priv->drop);
+		else if (flow->rss)
+			mlx4_rss_detach(flow->rss);
+		return 0;
 	}
-	if (action->drop) {
-		qp = priv->flow_drop_queue ? priv->flow_drop_queue->qp : NULL;
-	} else {
-		int ret;
+	assert(flow->ibv_attr);
+	if (!flow->internal &&
+	    !priv->isolated &&
+	    flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
+		if (flow->ibv_flow) {
+			claim_zero(ibv_destroy_flow(flow->ibv_flow));
+			flow->ibv_flow = NULL;
+			if (flow->drop)
+				mlx4_drop_put(priv->drop);
+			else if (flow->rss)
+				mlx4_rss_detach(flow->rss);
+		}
+		err = EACCES;
+		msg = ("priority level "
+		       MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
+		       " is reserved when not in isolated mode");
+		goto error;
+	}
+	if (flow->rss) {
+		struct mlx4_rss *rss = flow->rss;
+		int missing = 0;
 		unsigned int i;
-		struct rxq *rxq = NULL;
 
-		if (action->queues_n > 1) {
-			rxq_parent = priv_parent_get(priv, action->queues,
-						     action->queues_n, error);
-			if (!rxq_parent)
-				goto error;
+		/* Stop at the first nonexistent target queue. */
+		for (i = 0; i != rss->queues; ++i)
+			if (rss->queue_id[i] >=
+			    priv->dev->data->nb_rx_queues ||
+			    !priv->dev->data->rx_queues[rss->queue_id[i]]) {
+				missing = 1;
+				break;
+			}
+		if (flow->ibv_flow) {
+			if (missing ^ !flow->drop)
+				return 0;
+			/* Verbs flow needs updating. */
+			claim_zero(ibv_destroy_flow(flow->ibv_flow));
+			flow->ibv_flow = NULL;
+			if (flow->drop)
+				mlx4_drop_put(priv->drop);
+			else
+				mlx4_rss_detach(rss);
 		}
-		for (i = 0; i < action->queues_n; ++i) {
-			rxq = (*priv->rxqs)[action->queues[i]];
-			/*
-			 * In case of isolated mode we postpone
-			 * ibv receive queue creation till the first
-			 * rte_flow rule will be applied on that queue.
-			 */
-			if (!rxq->qp) {
-				assert(priv->isolated);
-				ret = rxq_create_qp(rxq, rxq->elts_n,
-						    0, 0, rxq_parent);
-				if (ret) {
-					rte_flow_error_set(
-						error,
-						ENOMEM,
-						RTE_FLOW_ERROR_TYPE_HANDLE,
-						NULL,
-						"flow rule creation failure");
-					goto error;
-				}
+		if (!missing) {
+			err = mlx4_rss_attach(rss);
+			if (err) {
+				err = -err;
+				msg = "cannot create indirection table or hash"
+					" QP to associate flow rule with";
+				goto error;
 			}
+			qp = rss->qp;
 		}
-		qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp;
-		rte_flow->qp = qp;
+		/* A missing target queue drops traffic implicitly. */
+		flow->drop = missing;
 	}
-	rte_flow->ibv_attr = ibv_attr;
-	if (!priv->started)
-		return rte_flow;
-	rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
-	if (!rte_flow->ibv_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "flow rule creation failure");
-		goto error;
+	if (flow->drop) {
+		mlx4_drop_get(priv);
+		if (!priv->drop) {
+			err = rte_errno;
+			msg = "resources for drop flow rule cannot be created";
+			goto error;
+		}
+		qp = priv->drop->qp;
 	}
-	return rte_flow;
-
+	assert(qp);
+	if (flow->ibv_flow)
+		return 0;
+	flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
+	if (flow->ibv_flow)
+		return 0;
+	if (flow->drop)
+		mlx4_drop_put(priv->drop);
+	else if (flow->rss)
+		mlx4_rss_detach(flow->rss);
+	err = errno;
+	msg = "flow rule rejected by device";
 error:
-	if (rxq_parent)
-		rxq_parent_cleanup(rxq_parent);
-	rte_free(rte_flow);
-	return NULL;
+	return rte_flow_error_set
+		(error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
 }
 
 /**
- * Convert a flow.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] attr
- *   Flow rule attributes.
- * @param[in] items
- *   Pattern specification (list terminated by the END pattern item).
- * @param[in] actions
- *   Associated actions (list terminated by the END action).
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
+ * Create a flow.
  *
- * @return
- *   A flow on success, NULL otherwise.
+ * @see rte_flow_create()
+ * @see rte_flow_ops
  */
 static struct rte_flow *
-priv_flow_create(struct priv *priv,
+mlx4_flow_create(struct rte_eth_dev *dev,
 		 const struct rte_flow_attr *attr,
-		 const struct rte_flow_item items[],
+		 const struct rte_flow_item pattern[],
 		 const struct rte_flow_action actions[],
 		 struct rte_flow_error *error)
 {
-	struct rte_flow *rte_flow;
-	struct mlx4_flow_action action;
-	struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
+	struct priv *priv = dev->data->dev_private;
+	struct rte_flow *flow;
 	int err;
 
-	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
 	if (err)
 		return NULL;
-	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
-	if (!flow.ibv_attr) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate ibv_attr memory");
-		return NULL;
-	}
-	flow.offset = sizeof(struct ibv_flow_attr);
-	*flow.ibv_attr = (struct ibv_flow_attr){
-		.comp_mask = 0,
-		.type = IBV_FLOW_ATTR_NORMAL,
-		.size = sizeof(struct ibv_flow_attr),
-		.priority = attr->priority,
-		.num_of_specs = 0,
-		.port = priv->port,
-		.flags = 0,
-	};
-	claim_zero(priv_flow_validate(priv, attr, items, actions,
-				      error, &flow));
-	action = (struct mlx4_flow_action){
-		.queue = 0,
-		.drop = 0,
-	};
-	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
-		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
-			continue;
-		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
-			action.queue = 1;
-			action.queues_n = 1;
-			action.queues[0] =
-				((const struct rte_flow_action_queue *)
-				 actions->conf)->index;
-		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			action.drop = 1;
-		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
-			unsigned int i;
-			const struct rte_flow_action_rss *rss =
-				(const struct rte_flow_action_rss *)
-				 actions->conf;
+	err = mlx4_flow_toggle(priv, flow, priv->started, error);
+	if (!err) {
+		struct rte_flow *curr = LIST_FIRST(&priv->flows);
 
-			action.queue = 1;
-			action.queues_n = rss->num;
-			for (i = 0; i < rss->num; ++i)
-				action.queues[i] = rss->queue[i];
+		/* New rules are inserted after internal ones. */
+		if (!curr || !curr->internal) {
+			LIST_INSERT_HEAD(&priv->flows, flow, next);
 		} else {
-			rte_flow_error_set(error, ENOTSUP,
-					   RTE_FLOW_ERROR_TYPE_ACTION,
-					   actions, "unsupported action");
-			goto exit;
+			while (LIST_NEXT(curr, next) &&
+			       LIST_NEXT(curr, next)->internal)
+				curr = LIST_NEXT(curr, next);
+			LIST_INSERT_AFTER(curr, flow, next);
 		}
+		return flow;
 	}
-	rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
-						 &action, error);
-	if (rte_flow)
-		return rte_flow;
-exit:
-	rte_free(flow.ibv_attr);
+	if (flow->rss)
+		mlx4_rss_put(flow->rss);
+	rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+			   error->message);
+	rte_free(flow);
 	return NULL;
 }
 
 /**
- * Create a flow.
+ * Configure isolated mode.
  *
- * @see rte_flow_create()
- * @see rte_flow_ops
- */
-struct rte_flow *
-mlx4_flow_create(struct rte_eth_dev *dev,
-		 const struct rte_flow_attr *attr,
-		 const struct rte_flow_item items[],
-		 const struct rte_flow_action actions[],
-		 struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct rte_flow *flow;
-
-	priv_lock(priv);
-	flow = priv_flow_create(priv, attr, items, actions, error);
-	if (flow) {
-		LIST_INSERT_HEAD(&priv->flows, flow, next);
-		DEBUG("Flow created %p", (void *)flow);
-	}
-	priv_unlock(priv);
-	return flow;
-}
-
-/**
  * @see rte_flow_isolate()
- *
- * Must be done before calling dev_configure().
- *
- * @param dev
- *   Pointer to the ethernet device structure.
- * @param enable
- *   Nonzero to enter isolated mode, attempt to leave it otherwise.
- * @param[out] error
- *   Perform verbose error reporting if not NULL. PMDs initialize this
- *   structure in case of error only.
- *
- * @return
- *   0 on success, a negative value on error.
+ * @see rte_flow_ops
  */
-int
+static int
 mlx4_flow_isolate(struct rte_eth_dev *dev,
 		  int enable,
 		  struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
 
-	priv_lock(priv);
-	if (priv->rxqs) {
-		rte_flow_error_set(error, ENOTSUP,
-				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				   NULL, "isolated mode must be set"
-				   " before configuring the device");
-		priv_unlock(priv);
+	if (!!enable == !!priv->isolated)
+		return 0;
+	priv->isolated = !!enable;
+	if (mlx4_flow_sync(priv, error)) {
+		priv->isolated = !enable;
 		return -rte_errno;
 	}
-	priv->isolated = !!enable;
-	priv_unlock(priv);
 	return 0;
 }
 
 /**
- * Destroy a flow.
+ * Destroy a flow rule.
  *
- * @param priv
- *   Pointer to private structure.
- * @param[in] flow
- *   Flow to destroy.
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
  */
-static void
-priv_flow_destroy(struct priv *priv, struct rte_flow *flow)
+static int
+mlx4_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error)
 {
-	(void)priv;
+	struct priv *priv = dev->data->dev_private;
+	int err = mlx4_flow_toggle(priv, flow, 0, error);
+
+	if (err)
+		return err;
 	LIST_REMOVE(flow, next);
-	if (flow->ibv_flow)
-		claim_zero(ibv_destroy_flow(flow->ibv_flow));
-	rte_free(flow->ibv_attr);
-	DEBUG("Flow destroyed %p", (void *)flow);
+	if (flow->rss)
+		mlx4_rss_put(flow->rss);
 	rte_free(flow);
+	return 0;
 }
 
 /**
- * Destroy a flow.
+ * Destroy user-configured flow rules.
  *
- * @see rte_flow_destroy()
+ * This function skips internal flows rules.
+ *
+ * @see rte_flow_flush()
  * @see rte_flow_ops
  */
-int
-mlx4_flow_destroy(struct rte_eth_dev *dev,
-		  struct rte_flow *flow,
-		  struct rte_flow_error *error)
+static int
+mlx4_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct rte_flow *flow = LIST_FIRST(&priv->flows);
+
+	while (flow) {
+		struct rte_flow *next = LIST_NEXT(flow, next);
 
-	(void)error;
-	priv_lock(priv);
-	priv_flow_destroy(priv, flow);
-	priv_unlock(priv);
+		if (!flow->internal)
+			mlx4_flow_destroy(dev, flow, error);
+		flow = next;
+	}
 	return 0;
 }
 
 /**
- * Destroy all flows.
+ * Helper function to determine the next configured VLAN filter.
  *
  * @param priv
  *   Pointer to private structure.
+ * @param vlan
+ *   VLAN ID to use as a starting point.
+ *
+ * @return
+ *   Next configured VLAN ID or a high value (>= 4096) if there is none.
  */
-static void
-priv_flow_flush(struct priv *priv)
+static uint16_t
+mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan)
+{
+	while (vlan < 4096) {
+		if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
+		    (UINT64_C(1) << (vlan % 64)))
+			return vlan;
+		++vlan;
+	}
+	return vlan;
+}
+
+/**
+ * Generate internal flow rules.
+ *
+ * Various flow rules are created depending on the mode the device is in:
+ *
+ * 1. Promiscuous: port MAC + catch-all (VLAN filtering is ignored).
+ * 2. All multicast: port MAC/VLAN + catch-all multicast.
+ * 3. Otherwise: port MAC/VLAN + broadcast MAC/VLAN.
+ *
+ * About MAC flow rules:
+ *
+ * - MAC flow rules are generated from @p dev->data->mac_addrs
+ *   (@p priv->mac array).
+ * - An additional flow rule for Ethernet broadcasts is also generated.
+ * - All these are per-VLAN if @p dev->data->dev_conf.rxmode.hw_vlan_filter
+ *   is enabled and VLAN filters are configured.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
 {
-	while (!LIST_EMPTY(&priv->flows)) {
-		struct rte_flow *flow;
+	struct rte_flow_attr attr = {
+		.priority = MLX4_FLOW_PRIORITY_LAST,
+		.ingress = 1,
+	};
+	struct rte_flow_item_eth eth_spec;
+	const struct rte_flow_item_eth eth_mask = {
+		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+	};
+	const struct rte_flow_item_eth eth_allmulti = {
+		.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+	};
+	struct rte_flow_item_vlan vlan_spec;
+	const struct rte_flow_item_vlan vlan_mask = {
+		.tci = RTE_BE16(0x0fff),
+	};
+	struct rte_flow_item pattern[] = {
+		{
+			.type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
+		},
+		{
+			.type = RTE_FLOW_ITEM_TYPE_ETH,
+			.spec = &eth_spec,
+			.mask = &eth_mask,
+		},
+		{
+			/* Replaced with VLAN if filtering is enabled. */
+			.type = RTE_FLOW_ITEM_TYPE_END,
+		},
+		{
+			.type = RTE_FLOW_ITEM_TYPE_END,
+		},
+	};
+	/*
+	 * Round number of queues down to their previous power of 2 to
+	 * comply with RSS context limitations. Extra queues silently do not
+	 * get RSS by default.
+	 */
+	uint32_t queues =
+		rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
+	alignas(struct rte_flow_action_rss) uint8_t rss_conf_data
+		[offsetof(struct rte_flow_action_rss, queue) +
+		 sizeof(((struct rte_flow_action_rss *)0)->queue[0]) * queues];
+	struct rte_flow_action_rss *rss_conf = (void *)rss_conf_data;
+	struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_RSS,
+			.conf = rss_conf,
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		},
+	};
+	struct ether_addr *rule_mac = &eth_spec.dst;
+	rte_be16_t *rule_vlan =
+		priv->dev->data->dev_conf.rxmode.hw_vlan_filter &&
+		!priv->dev->data->promiscuous ?
+		&vlan_spec.tci :
+		NULL;
+	int broadcast =
+		!priv->dev->data->promiscuous &&
+		!priv->dev->data->all_multicast;
+	uint16_t vlan = 0;
+	struct rte_flow *flow;
+	unsigned int i;
+	int err = 0;
 
-		flow = LIST_FIRST(&priv->flows);
-		priv_flow_destroy(priv, flow);
+	/* Nothing to be done if there are no Rx queues. */
+	if (!queues)
+		goto error;
+	/* Prepare default RSS configuration. */
+	*rss_conf = (struct rte_flow_action_rss){
+		.rss_conf = NULL, /* Rely on default fallback settings. */
+		.num = queues,
+	};
+	for (i = 0; i != queues; ++i)
+		rss_conf->queue[i] = i;
+	/*
+	 * Set up VLAN item if filtering is enabled and at least one VLAN
+	 * filter is configured.
+	 */
+	if (rule_vlan) {
+		vlan = mlx4_flow_internal_next_vlan(priv, 0);
+		if (vlan < 4096) {
+			pattern[2] = (struct rte_flow_item){
+				.type = RTE_FLOW_ITEM_TYPE_VLAN,
+				.spec = &vlan_spec,
+				.mask = &vlan_mask,
+			};
+next_vlan:
+			*rule_vlan = rte_cpu_to_be_16(vlan);
+		} else {
+			rule_vlan = NULL;
+		}
 	}
+	for (i = 0; i != RTE_DIM(priv->mac) + broadcast; ++i) {
+		const struct ether_addr *mac;
+
+		/* Broadcasts are handled by an extra iteration. */
+		if (i < RTE_DIM(priv->mac))
+			mac = &priv->mac[i];
+		else
+			mac = &eth_mask.dst;
+		if (is_zero_ether_addr(mac))
+			continue;
+		/* Check if MAC flow rule is already present. */
+		for (flow = LIST_FIRST(&priv->flows);
+		     flow && flow->internal;
+		     flow = LIST_NEXT(flow, next)) {
+			const struct ibv_flow_spec_eth *eth =
+				(const void *)((uintptr_t)flow->ibv_attr +
+					       sizeof(*flow->ibv_attr));
+			unsigned int j;
+
+			if (!flow->mac)
+				continue;
+			assert(flow->ibv_attr->type == IBV_FLOW_ATTR_NORMAL);
+			assert(flow->ibv_attr->num_of_specs == 1);
+			assert(eth->type == IBV_FLOW_SPEC_ETH);
+			assert(flow->rss);
+			if (rule_vlan &&
+			    (eth->val.vlan_tag != *rule_vlan ||
+			     eth->mask.vlan_tag != RTE_BE16(0x0fff)))
+				continue;
+			if (!rule_vlan && eth->mask.vlan_tag)
+				continue;
+			for (j = 0; j != sizeof(mac->addr_bytes); ++j)
+				if (eth->val.dst_mac[j] != mac->addr_bytes[j] ||
+				    eth->mask.dst_mac[j] != UINT8_C(0xff) ||
+				    eth->val.src_mac[j] != UINT8_C(0x00) ||
+				    eth->mask.src_mac[j] != UINT8_C(0x00))
+					break;
+			if (j != sizeof(mac->addr_bytes))
+				continue;
+			if (flow->rss->queues != queues ||
+			    memcmp(flow->rss->queue_id, rss_conf->queue,
+				   queues * sizeof(flow->rss->queue_id[0])))
+				continue;
+			break;
+		}
+		if (!flow || !flow->internal) {
+			/* Not found, create a new flow rule. */
+			memcpy(rule_mac, mac, sizeof(*mac));
+			flow = mlx4_flow_create(priv->dev, &attr, pattern,
+						actions, error);
+			if (!flow) {
+				err = -rte_errno;
+				goto error;
+			}
+		}
+		flow->select = 1;
+		flow->mac = 1;
+	}
+	if (rule_vlan) {
+		vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1);
+		if (vlan < 4096)
+			goto next_vlan;
+	}
+	/* Take care of promiscuous and all multicast flow rules. */
+	if (!broadcast) {
+		for (flow = LIST_FIRST(&priv->flows);
+		     flow && flow->internal;
+		     flow = LIST_NEXT(flow, next)) {
+			if (priv->dev->data->promiscuous) {
+				if (flow->promisc)
+					break;
+			} else {
+				assert(priv->dev->data->all_multicast);
+				if (flow->allmulti)
+					break;
+			}
+		}
+		if (flow && flow->internal) {
+			assert(flow->rss);
+			if (flow->rss->queues != queues ||
+			    memcmp(flow->rss->queue_id, rss_conf->queue,
+				   queues * sizeof(flow->rss->queue_id[0])))
+				flow = NULL;
+		}
+		if (!flow || !flow->internal) {
+			/* Not found, create a new flow rule. */
+			if (priv->dev->data->promiscuous) {
+				pattern[1].spec = NULL;
+				pattern[1].mask = NULL;
+			} else {
+				assert(priv->dev->data->all_multicast);
+				pattern[1].spec = &eth_allmulti;
+				pattern[1].mask = &eth_allmulti;
+			}
+			pattern[2] = pattern[3];
+			flow = mlx4_flow_create(priv->dev, &attr, pattern,
+						actions, error);
+			if (!flow) {
+				err = -rte_errno;
+				goto error;
+			}
+		}
+		assert(flow->promisc || flow->allmulti);
+		flow->select = 1;
+	}
+error:
+	/* Clear selection and clean up stale internal flow rules. */
+	flow = LIST_FIRST(&priv->flows);
+	while (flow && flow->internal) {
+		struct rte_flow *next = LIST_NEXT(flow, next);
+
+		if (!flow->select)
+			claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+		else
+			flow->select = 0;
+		flow = next;
+	}
+	return err;
 }
 
 /**
- * Destroy all flows.
+ * Synchronize flow rules.
  *
- * @see rte_flow_flush()
- * @see rte_flow_ops
+ * This function synchronizes flow rules with the state of the device by
+ * taking into account isolated mode and whether target queues are
+ * configured.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx4_flow_flush(struct rte_eth_dev *dev,
-		struct rte_flow_error *error)
+mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error)
 {
-	struct priv *priv = dev->data->dev_private;
+	struct rte_flow *flow;
+	int ret;
 
-	(void)error;
-	priv_lock(priv);
-	priv_flow_flush(priv);
-	priv_unlock(priv);
+	/* Internal flow rules are guaranteed to come first in the list. */
+	if (priv->isolated) {
+		/*
+		 * Get rid of them in isolated mode, stop at the first
+		 * non-internal rule found.
+		 */
+		for (flow = LIST_FIRST(&priv->flows);
+		     flow && flow->internal;
+		     flow = LIST_FIRST(&priv->flows))
+			claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
+	} else {
+		/* Refresh internal rules. */
+		ret = mlx4_flow_internal(priv, error);
+		if (ret)
+			return ret;
+	}
+	/* Toggle the remaining flow rules . */
+	LIST_FOREACH(flow, &priv->flows, next) {
+		ret = mlx4_flow_toggle(priv, flow, priv->started, error);
+		if (ret)
+			return ret;
+	}
+	if (!priv->started)
+		assert(!priv->drop);
 	return 0;
 }
 
 /**
- * Remove all flows.
+ * Clean up all flow rules.
  *
- * Called by dev_stop() to remove all flows.
+ * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
+ * rules regardless of whether they are internal or user-configured.
  *
  * @param priv
  *   Pointer to private structure.
  */
 void
-mlx4_priv_flow_stop(struct priv *priv)
+mlx4_flow_clean(struct priv *priv)
 {
 	struct rte_flow *flow;
 
-	for (flow = LIST_FIRST(&priv->flows);
-	     flow;
-	     flow = LIST_NEXT(flow, next)) {
-		claim_zero(ibv_destroy_flow(flow->ibv_flow));
-		flow->ibv_flow = NULL;
-		DEBUG("Flow %p removed", (void *)flow);
-	}
-	mlx4_flow_destroy_drop_queue(priv);
+	while ((flow = LIST_FIRST(&priv->flows)))
+		mlx4_flow_destroy(priv->dev, flow, NULL);
+	assert(LIST_EMPTY(&priv->rss));
 }
 
+static const struct rte_flow_ops mlx4_flow_ops = {
+	.validate = mlx4_flow_validate,
+	.create = mlx4_flow_create,
+	.destroy = mlx4_flow_destroy,
+	.flush = mlx4_flow_flush,
+	.isolate = mlx4_flow_isolate,
+};
+
 /**
- * Add all flows.
+ * Manage filter operations.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
  *
  * @return
- *   0 on success, a errno value otherwise and rte_errno is set.
+ *   0 on success, negative errno value otherwise and rte_errno is set.
  */
 int
-mlx4_priv_flow_start(struct priv *priv)
+mlx4_filter_ctrl(struct rte_eth_dev *dev,
+		 enum rte_filter_type filter_type,
+		 enum rte_filter_op filter_op,
+		 void *arg)
 {
-	int ret;
-	struct ibv_qp *qp;
-	struct rte_flow *flow;
-
-	ret = mlx4_flow_create_drop_queue(priv);
-	if (ret)
-		return -1;
-	for (flow = LIST_FIRST(&priv->flows);
-	     flow;
-	     flow = LIST_NEXT(flow, next)) {
-		qp = flow->qp ? flow->qp : priv->flow_drop_queue->qp;
-		flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
-		if (!flow->ibv_flow) {
-			DEBUG("Flow %p cannot be applied", (void *)flow);
-			rte_errno = EINVAL;
-			return rte_errno;
-		}
-		DEBUG("Flow %p applied", (void *)flow);
+	switch (filter_type) {
+	case RTE_ETH_FILTER_GENERIC:
+		if (filter_op != RTE_ETH_FILTER_GET)
+			break;
+		*(const void **)arg = &mlx4_flow_ops;
+		return 0;
+	default:
+		ERROR("%p: filter type (%d) not supported",
+		      (void *)dev, filter_type);
+		break;
 	}
-	return 0;
+	rte_errno = ENOTSUP;
+	return -rte_errno;
 }
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index beabcf2d..651fd37b 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -2,7 +2,7 @@
  *   BSD LICENSE
  *
  *   Copyright 2017 6WIND S.A.
- *   Copyright 2017 Mellanox.
+ *   Copyright 2017 Mellanox
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -34,12 +34,10 @@
 #ifndef RTE_PMD_MLX4_FLOW_H_
 #define RTE_PMD_MLX4_FLOW_H_
 
-#include <stddef.h>
 #include <stdint.h>
 #include <sys/queue.h>
 
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+/* Verbs headers do not support -pedantic. */
 #ifdef PEDANTIC
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
@@ -48,61 +46,40 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include <rte_eth_ctrl.h>
+#include <rte_ethdev.h>
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
 #include <rte_byteorder.h>
 
-#include "mlx4.h"
+/** Last and lowest priority level for a flow rule. */
+#define MLX4_FLOW_PRIORITY_LAST UINT32_C(0xfff)
 
+/** Meta pattern item used to distinguish internal rules. */
+#define MLX4_FLOW_ITEM_TYPE_INTERNAL ((enum rte_flow_item_type)-1)
+
+/** PMD-specific (mlx4) definition of a flow rule handle. */
 struct rte_flow {
 	LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct ibv_qp *qp; /**< Verbs queue pair. */
+	uint32_t ibv_attr_size; /**< Size of Verbs attributes. */
+	uint32_t select:1; /**< Used by operations on the linked list. */
+	uint32_t internal:1; /**< Internal flow rule outside isolated mode. */
+	uint32_t mac:1; /**< Rule associated with a configured MAC address. */
+	uint32_t promisc:1; /**< This rule matches everything. */
+	uint32_t allmulti:1; /**< This rule matches all multicast traffic. */
+	uint32_t drop:1; /**< This rule drops packets. */
+	struct mlx4_rss *rss; /**< Rx target. */
 };
 
-int
-mlx4_flow_validate(struct rte_eth_dev *dev,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error);
-
-struct rte_flow *
-mlx4_flow_create(struct rte_eth_dev *dev,
-		 const struct rte_flow_attr *attr,
-		 const struct rte_flow_item items[],
-		 const struct rte_flow_action actions[],
-		 struct rte_flow_error *error);
-
-int
-mlx4_flow_destroy(struct rte_eth_dev *dev,
-		  struct rte_flow *flow,
-		  struct rte_flow_error *error);
-
-int
-mlx4_flow_flush(struct rte_eth_dev *dev,
-		struct rte_flow_error *error);
-
-/** Structure to pass to the conversion function. */
-struct mlx4_flow {
-	struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
-	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
-};
-
-int
-mlx4_flow_isolate(struct rte_eth_dev *dev,
-		  int enable,
-		  struct rte_flow_error *error);
-
-struct mlx4_flow_action {
-	uint32_t drop:1; /**< Target is a drop queue. */
-	uint32_t queue:1; /**< Target is a receive queue. */
-	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queue indices to use. */
-	uint16_t queues_n; /**< Number of entries in queue[] */
-};
+/* mlx4_flow.c */
 
-int mlx4_priv_flow_start(struct priv *priv);
-void mlx4_priv_flow_stop(struct priv *priv);
+int mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error);
+void mlx4_flow_clean(struct priv *priv);
+int mlx4_filter_ctrl(struct rte_eth_dev *dev,
+		     enum rte_filter_type filter_type,
+		     enum rte_filter_op filter_op,
+		     void *arg);
 
 #endif /* RTE_PMD_MLX4_FLOW_H_ */
diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c
new file mode 100644
index 00000000..b17d109a
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_intr.c
@@ -0,0 +1,397 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Interrupts handling for mlx4 driver.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_alarm.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_io.h>
+#include <rte_interrupts.h>
+
+#include "mlx4.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+static int mlx4_link_status_check(struct priv *priv);
+
+/**
+ * Clean up Rx interrupts handler.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+mlx4_rx_intr_vec_disable(struct priv *priv)
+{
+	struct rte_intr_handle *intr_handle = &priv->intr_handle;
+
+	rte_intr_free_epoll_fd(intr_handle);
+	free(intr_handle->intr_vec);
+	intr_handle->nb_efd = 0;
+	intr_handle->intr_vec = NULL;
+}
+
+/**
+ * Allocate queue vector and fill epoll fd list for Rx interrupts.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_rx_intr_vec_enable(struct priv *priv)
+{
+	unsigned int i;
+	unsigned int rxqs_n = priv->dev->data->nb_rx_queues;
+	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+	unsigned int count = 0;
+	struct rte_intr_handle *intr_handle = &priv->intr_handle;
+
+	mlx4_rx_intr_vec_disable(priv);
+	intr_handle->intr_vec = malloc(sizeof(intr_handle->intr_vec[rxqs_n]));
+	if (intr_handle->intr_vec == NULL) {
+		rte_errno = ENOMEM;
+		ERROR("failed to allocate memory for interrupt vector,"
+		      " Rx interrupts will not be supported");
+		return -rte_errno;
+	}
+	for (i = 0; i != n; ++i) {
+		struct rxq *rxq = priv->dev->data->rx_queues[i];
+
+		/* Skip queues that cannot request interrupts. */
+		if (!rxq || !rxq->channel) {
+			/* Use invalid intr_vec[] index to disable entry. */
+			intr_handle->intr_vec[i] =
+				RTE_INTR_VEC_RXTX_OFFSET +
+				RTE_MAX_RXTX_INTR_VEC_ID;
+			continue;
+		}
+		if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
+			rte_errno = E2BIG;
+			ERROR("too many Rx queues for interrupt vector size"
+			      " (%d), Rx interrupts cannot be enabled",
+			      RTE_MAX_RXTX_INTR_VEC_ID);
+			mlx4_rx_intr_vec_disable(priv);
+			return -rte_errno;
+		}
+		intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
+		intr_handle->efds[count] = rxq->channel->fd;
+		count++;
+	}
+	if (!count)
+		mlx4_rx_intr_vec_disable(priv);
+	else
+		intr_handle->nb_efd = count;
+	return 0;
+}
+
+/**
+ * Process scheduled link status check.
+ *
+ * If LSC interrupts are requested, process related callback.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+mlx4_link_status_alarm(struct priv *priv)
+{
+	const struct rte_intr_conf *const intr_conf =
+		&priv->dev->data->dev_conf.intr_conf;
+
+	assert(priv->intr_alarm == 1);
+	priv->intr_alarm = 0;
+	if (intr_conf->lsc && !mlx4_link_status_check(priv))
+		_rte_eth_dev_callback_process(priv->dev,
+					      RTE_ETH_EVENT_INTR_LSC,
+					      NULL, NULL);
+}
+
+/**
+ * Check link status.
+ *
+ * In case of inconsistency, another check is scheduled.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success (link status is consistent), negative errno value
+ *   otherwise and rte_errno is set.
+ */
+static int
+mlx4_link_status_check(struct priv *priv)
+{
+	struct rte_eth_link *link = &priv->dev->data->dev_link;
+	int ret = mlx4_link_update(priv->dev, 0);
+
+	if (ret)
+		return ret;
+	if ((!link->link_speed && link->link_status) ||
+	    (link->link_speed && !link->link_status)) {
+		if (!priv->intr_alarm) {
+			/* Inconsistent status, check again later. */
+			ret = rte_eal_alarm_set(MLX4_INTR_ALARM_TIMEOUT,
+						(void (*)(void *))
+						mlx4_link_status_alarm,
+						priv);
+			if (ret)
+				return ret;
+			priv->intr_alarm = 1;
+		}
+		rte_errno = EINPROGRESS;
+		return -rte_errno;
+	}
+	return 0;
+}
+
+/**
+ * Handle interrupts from the NIC.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+mlx4_interrupt_handler(struct priv *priv)
+{
+	enum { LSC, RMV, };
+	static const enum rte_eth_event_type type[] = {
+		[LSC] = RTE_ETH_EVENT_INTR_LSC,
+		[RMV] = RTE_ETH_EVENT_INTR_RMV,
+	};
+	uint32_t caught[RTE_DIM(type)] = { 0 };
+	struct ibv_async_event event;
+	const struct rte_intr_conf *const intr_conf =
+		&priv->dev->data->dev_conf.intr_conf;
+	unsigned int i;
+
+	/* Read all message and acknowledge them. */
+	while (!ibv_get_async_event(priv->ctx, &event)) {
+		switch (event.event_type) {
+		case IBV_EVENT_PORT_ACTIVE:
+		case IBV_EVENT_PORT_ERR:
+			if (intr_conf->lsc && !mlx4_link_status_check(priv))
+				++caught[LSC];
+			break;
+		case IBV_EVENT_DEVICE_FATAL:
+			if (intr_conf->rmv)
+				++caught[RMV];
+			break;
+		default:
+			DEBUG("event type %d on physical port %d not handled",
+			      event.event_type, event.element.port_num);
+		}
+		ibv_ack_async_event(&event);
+	}
+	for (i = 0; i != RTE_DIM(caught); ++i)
+		if (caught[i])
+			_rte_eth_dev_callback_process(priv->dev, type[i],
+						      NULL, NULL);
+}
+
+/**
+ * MLX4 CQ notification .
+ *
+ * @param rxq
+ *   Pointer to receive queue structure.
+ * @param solicited
+ *   Is request solicited or not.
+ */
+static void
+mlx4_arm_cq(struct rxq *rxq, int solicited)
+{
+	struct mlx4_cq *cq = &rxq->mcq;
+	uint64_t doorbell;
+	uint32_t sn = cq->arm_sn & MLX4_CQ_DB_GEQ_N_MASK;
+	uint32_t ci = cq->cons_index & MLX4_CQ_DB_CI_MASK;
+	uint32_t cmd = solicited ? MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT;
+
+	*cq->arm_db = rte_cpu_to_be_32(sn << 28 | cmd | ci);
+	/*
+	 * Make sure that the doorbell record in host memory is
+	 * written before ringing the doorbell via PCI MMIO.
+	 */
+	rte_wmb();
+	doorbell = sn << 28 | cmd | cq->cqn;
+	doorbell <<= 32;
+	doorbell |= ci;
+	rte_write64(rte_cpu_to_be_64(doorbell), cq->cq_db_reg);
+}
+
+/**
+ * Uninstall interrupt handler.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_intr_uninstall(struct priv *priv)
+{
+	int err = rte_errno; /* Make sure rte_errno remains unchanged. */
+
+	if (priv->intr_handle.fd != -1) {
+		rte_intr_callback_unregister(&priv->intr_handle,
+					     (void (*)(void *))
+					     mlx4_interrupt_handler,
+					     priv);
+		priv->intr_handle.fd = -1;
+	}
+	rte_eal_alarm_cancel((void (*)(void *))mlx4_link_status_alarm, priv);
+	priv->intr_alarm = 0;
+	mlx4_rx_intr_vec_disable(priv);
+	rte_errno = err;
+	return 0;
+}
+
+/**
+ * Install interrupt handler.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_intr_install(struct priv *priv)
+{
+	const struct rte_intr_conf *const intr_conf =
+		&priv->dev->data->dev_conf.intr_conf;
+	int rc;
+
+	mlx4_intr_uninstall(priv);
+	if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0)
+		goto error;
+	if (intr_conf->lsc | intr_conf->rmv) {
+		priv->intr_handle.fd = priv->ctx->async_fd;
+		rc = rte_intr_callback_register(&priv->intr_handle,
+						(void (*)(void *))
+						mlx4_interrupt_handler,
+						priv);
+		if (rc < 0) {
+			rte_errno = -rc;
+			goto error;
+		}
+	}
+	return 0;
+error:
+	mlx4_intr_uninstall(priv);
+	return -rte_errno;
+}
+
+/**
+ * DPDK callback for Rx queue interrupt disable.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   Rx queue index.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx)
+{
+	struct rxq *rxq = dev->data->rx_queues[idx];
+	struct ibv_cq *ev_cq;
+	void *ev_ctx;
+	int ret;
+
+	if (!rxq || !rxq->channel) {
+		ret = EINVAL;
+	} else {
+		ret = ibv_get_cq_event(rxq->cq->channel, &ev_cq, &ev_ctx);
+		if (ret || ev_cq != rxq->cq)
+			ret = EINVAL;
+	}
+	if (ret) {
+		rte_errno = ret;
+		WARN("unable to disable interrupt on rx queue %d",
+		     idx);
+	} else {
+		rxq->mcq.arm_sn++;
+		ibv_ack_cq_events(rxq->cq, 1);
+	}
+	return -ret;
+}
+
+/**
+ * DPDK callback for Rx queue interrupt enable.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   Rx queue index.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx)
+{
+	struct rxq *rxq = dev->data->rx_queues[idx];
+	int ret = 0;
+
+	if (!rxq || !rxq->channel) {
+		ret = EINVAL;
+		rte_errno = ret;
+		WARN("unable to arm interrupt on rx queue %d", idx);
+	} else {
+		mlx4_arm_cq(rxq, 0);
+	}
+	return -ret;
+}
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
new file mode 100644
index 00000000..2a3e2695
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -0,0 +1,293 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Memory management functions for mlx4 driver.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_spinlock.h>
+
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+struct mlx4_check_mempool_data {
+	int ret;
+	char *start;
+	char *end;
+};
+
+/**
+ * Called by mlx4_check_mempool() when iterating the memory chunks.
+ *
+ * @param[in] mp
+ *   Pointer to memory pool (unused).
+ * @param[in, out] data
+ *   Pointer to shared buffer with mlx4_check_mempool().
+ * @param[in] memhdr
+ *   Pointer to mempool chunk header.
+ * @param mem_idx
+ *   Mempool element index (unused).
+ */
+static void
+mlx4_check_mempool_cb(struct rte_mempool *mp, void *opaque,
+		      struct rte_mempool_memhdr *memhdr,
+		      unsigned int mem_idx)
+{
+	struct mlx4_check_mempool_data *data = opaque;
+
+	(void)mp;
+	(void)mem_idx;
+	/* It already failed, skip the next chunks. */
+	if (data->ret != 0)
+		return;
+	/* It is the first chunk. */
+	if (data->start == NULL && data->end == NULL) {
+		data->start = memhdr->addr;
+		data->end = data->start + memhdr->len;
+		return;
+	}
+	if (data->end == memhdr->addr) {
+		data->end += memhdr->len;
+		return;
+	}
+	if (data->start == (char *)memhdr->addr + memhdr->len) {
+		data->start -= memhdr->len;
+		return;
+	}
+	/* Error, mempool is not virtually contiguous. */
+	data->ret = -1;
+}
+
+/**
+ * Check if a mempool can be used: it must be virtually contiguous.
+ *
+ * @param[in] mp
+ *   Pointer to memory pool.
+ * @param[out] start
+ *   Pointer to the start address of the mempool virtual memory area.
+ * @param[out] end
+ *   Pointer to the end address of the mempool virtual memory area.
+ *
+ * @return
+ *   0 on success (mempool is virtually contiguous), -1 on error.
+ */
+static int
+mlx4_check_mempool(struct rte_mempool *mp, uintptr_t *start, uintptr_t *end)
+{
+	struct mlx4_check_mempool_data data;
+
+	memset(&data, 0, sizeof(data));
+	rte_mempool_mem_iter(mp, mlx4_check_mempool_cb, &data);
+	*start = (uintptr_t)data.start;
+	*end = (uintptr_t)data.end;
+	return data.ret;
+}
+
+/**
+ * Obtain a memory region from a memory pool.
+ *
+ * If a matching memory region already exists, it is returned with its
+ * reference count incremented, otherwise a new one is registered.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to memory pool.
+ *
+ * @return
+ *   Memory region pointer, NULL in case of error and rte_errno is set.
+ */
+struct mlx4_mr *
+mlx4_mr_get(struct priv *priv, struct rte_mempool *mp)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	uintptr_t start;
+	uintptr_t end;
+	unsigned int i;
+	struct mlx4_mr *mr;
+
+	if (mlx4_check_mempool(mp, &start, &end) != 0) {
+		rte_errno = EINVAL;
+		ERROR("mempool %p: not virtually contiguous",
+			(void *)mp);
+		return NULL;
+	}
+	DEBUG("mempool %p area start=%p end=%p size=%zu",
+	      (void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	/* Round start and end to page boundary if found in memory segments. */
+	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+		uintptr_t addr = (uintptr_t)ms[i].addr;
+		size_t len = ms[i].len;
+		unsigned int align = ms[i].hugepage_sz;
+
+		if ((start > addr) && (start < addr + len))
+			start = RTE_ALIGN_FLOOR(start, align);
+		if ((end > addr) && (end < addr + len))
+			end = RTE_ALIGN_CEIL(end, align);
+	}
+	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+	      (void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	rte_spinlock_lock(&priv->mr_lock);
+	LIST_FOREACH(mr, &priv->mr, next)
+		if (mp == mr->mp && start >= mr->start && end <= mr->end)
+			break;
+	if (mr) {
+		++mr->refcnt;
+		goto release;
+	}
+	mr = rte_malloc(__func__, sizeof(*mr), 0);
+	if (!mr) {
+		rte_errno = ENOMEM;
+		goto release;
+	}
+	*mr = (struct mlx4_mr){
+		.start = start,
+		.end = end,
+		.refcnt = 1,
+		.priv = priv,
+		.mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
+				 IBV_ACCESS_LOCAL_WRITE),
+		.mp = mp,
+	};
+	if (mr->mr) {
+		mr->lkey = mr->mr->lkey;
+		LIST_INSERT_HEAD(&priv->mr, mr, next);
+	} else {
+		rte_free(mr);
+		mr = NULL;
+		rte_errno = errno ? errno : EINVAL;
+	}
+release:
+	rte_spinlock_unlock(&priv->mr_lock);
+	return mr;
+}
+
+/**
+ * Release a memory region.
+ *
+ * This function decrements its reference count and destroys it after
+ * reaching 0.
+ *
+ * Note to avoid race conditions given this function may be used from the
+ * data plane, it's extremely important that each user holds its own
+ * reference.
+ *
+ * @param mr
+ *   Memory region to release.
+ */
+void
+mlx4_mr_put(struct mlx4_mr *mr)
+{
+	struct priv *priv = mr->priv;
+
+	rte_spinlock_lock(&priv->mr_lock);
+	assert(mr->refcnt);
+	if (--mr->refcnt)
+		goto release;
+	LIST_REMOVE(mr, next);
+	claim_zero(ibv_dereg_mr(mr->mr));
+	rte_free(mr);
+release:
+	rte_spinlock_unlock(&priv->mr_lock);
+}
+
+/**
+ * Add memory region (MR) <-> memory pool (MP) association to txq->mp2mr[].
+ * If mp2mr[] is full, remove an entry first.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ * @param[in] mp
+ *   Memory pool for which a memory region lkey must be added.
+ * @param[in] i
+ *   Index in memory pool (MP) where to add memory region (MR).
+ *
+ * @return
+ *   Added mr->lkey on success, (uint32_t)-1 on failure.
+ */
+uint32_t
+mlx4_txq_add_mr(struct txq *txq, struct rte_mempool *mp, uint32_t i)
+{
+	struct mlx4_mr *mr;
+
+	/* Add a new entry, register MR first. */
+	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
+	      (void *)txq, mp->name, (void *)mp);
+	mr = mlx4_mr_get(txq->priv, mp);
+	if (unlikely(mr == NULL)) {
+		DEBUG("%p: unable to configure MR, mlx4_mr_get() failed",
+		      (void *)txq);
+		return (uint32_t)-1;
+	}
+	if (unlikely(i == RTE_DIM(txq->mp2mr))) {
+		/* Table is full, remove oldest entry. */
+		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
+		      (void *)txq);
+		--i;
+		mlx4_mr_put(txq->mp2mr[0].mr);
+		memmove(&txq->mp2mr[0], &txq->mp2mr[1],
+			(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
+	}
+	/* Store the new entry. */
+	txq->mp2mr[i].mp = mp;
+	txq->mp2mr[i].mr = mr;
+	txq->mp2mr[i].lkey = mr->lkey;
+	DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
+	      (void *)txq, mp->name, (void *)mp, txq->mp2mr[i].lkey);
+	return txq->mp2mr[i].lkey;
+}
diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
new file mode 100644
index 00000000..fcc7c129
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_prm.h
@@ -0,0 +1,177 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MLX4_PRM_H_
+#define MLX4_PRM_H_
+
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/mlx4dv.h>
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+/* ConnectX-3 Tx queue basic block. */
+#define MLX4_TXBB_SHIFT 6
+#define MLX4_TXBB_SIZE (1 << MLX4_TXBB_SHIFT)
+
+/* Typical TSO descriptor with 16 gather entries is 352 bytes. */
+#define MLX4_MAX_WQE_SIZE 512
+#define MLX4_MAX_WQE_TXBBS (MLX4_MAX_WQE_SIZE / MLX4_TXBB_SIZE)
+
+/* Send queue stamping/invalidating information. */
+#define MLX4_SQ_STAMP_STRIDE 64
+#define MLX4_SQ_STAMP_DWORDS (MLX4_SQ_STAMP_STRIDE / 4)
+#define MLX4_SQ_STAMP_SHIFT 31
+#define MLX4_SQ_STAMP_VAL 0x7fffffff
+
+/* Work queue element (WQE) flags. */
+#define MLX4_BIT_WQE_OWN 0x80000000
+#define MLX4_WQE_CTRL_IIP_HDR_CSUM (1 << 28)
+#define MLX4_WQE_CTRL_IL4_HDR_CSUM (1 << 27)
+
+#define MLX4_SIZE_TO_TXBBS(size) \
+	(RTE_ALIGN((size), (MLX4_TXBB_SIZE)) >> (MLX4_TXBB_SHIFT))
+
+/* CQE checksum flags. */
+enum {
+	MLX4_CQE_L2_TUNNEL_IPV4 = (int)(1u << 25),
+	MLX4_CQE_L2_TUNNEL_L4_CSUM = (int)(1u << 26),
+	MLX4_CQE_L2_TUNNEL = (int)(1u << 27),
+	MLX4_CQE_L2_VLAN_MASK = (int)(3u << 29),
+	MLX4_CQE_L2_TUNNEL_IPOK = (int)(1u << 31),
+};
+
+/* CQE status flags. */
+#define MLX4_CQE_STATUS_IPV4 (1 << 22)
+#define MLX4_CQE_STATUS_IPV4F (1 << 23)
+#define MLX4_CQE_STATUS_IPV6 (1 << 24)
+#define MLX4_CQE_STATUS_IPV4OPT (1 << 25)
+#define MLX4_CQE_STATUS_TCP (1 << 26)
+#define MLX4_CQE_STATUS_UDP (1 << 27)
+#define MLX4_CQE_STATUS_PTYPE_MASK \
+	(MLX4_CQE_STATUS_IPV4 | \
+	 MLX4_CQE_STATUS_IPV4F | \
+	 MLX4_CQE_STATUS_IPV6 | \
+	 MLX4_CQE_STATUS_IPV4OPT | \
+	 MLX4_CQE_STATUS_TCP | \
+	 MLX4_CQE_STATUS_UDP)
+
+/* Send queue information. */
+struct mlx4_sq {
+	volatile uint8_t *buf; /**< SQ buffer. */
+	volatile uint8_t *eob; /**< End of SQ buffer */
+	uint32_t head; /**< SQ head counter in units of TXBBS. */
+	uint32_t tail; /**< SQ tail counter in units of TXBBS. */
+	uint32_t txbb_cnt; /**< Num of WQEBB in the Q (should be ^2). */
+	uint32_t txbb_cnt_mask; /**< txbbs_cnt mask (txbb_cnt is ^2). */
+	uint32_t headroom_txbbs; /**< Num of txbbs that should be kept free. */
+	volatile uint32_t *db; /**< Pointer to the doorbell. */
+	uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */
+};
+
+#define mlx4_get_send_wqe(sq, n) ((sq)->buf + ((n) * (MLX4_TXBB_SIZE)))
+
+/* Completion queue events, numbers and masks. */
+#define MLX4_CQ_DB_GEQ_N_MASK 0x3
+#define MLX4_CQ_DOORBELL 0x20
+#define MLX4_CQ_DB_CI_MASK 0xffffff
+
+/* Completion queue information. */
+struct mlx4_cq {
+	volatile void *cq_uar; /**< CQ user access region. */
+	volatile void *cq_db_reg; /**< CQ doorbell register. */
+	volatile uint32_t *set_ci_db; /**< Pointer to the CQ doorbell. */
+	volatile uint32_t *arm_db; /**< Arming Rx events doorbell. */
+	volatile uint8_t *buf; /**< Pointer to the completion queue buffer. */
+	uint32_t cqe_cnt; /**< Number of entries in the queue. */
+	uint32_t cqe_64:1; /**< CQ entry size is 64 bytes. */
+	uint32_t cons_index; /**< Last queue entry that was handled. */
+	uint32_t cqn; /**< CQ number. */
+	int arm_sn; /**< Rx event counter. */
+};
+
+/**
+ * Retrieve a CQE entry from a CQ.
+ *
+ * cqe = cq->buf + cons_index * cqe_size + cqe_offset
+ *
+ * Where cqe_size is 32 or 64 bytes and cqe_offset is 0 or 32 (depending on
+ * cqe_size).
+ *
+ * @param cq
+ *   CQ to retrieve entry from.
+ * @param index
+ *   Entry index.
+ *
+ * @return
+ *   Pointer to CQE entry.
+ */
+static inline volatile struct mlx4_cqe *
+mlx4_get_cqe(struct mlx4_cq *cq, uint32_t index)
+{
+	return (volatile struct mlx4_cqe *)(cq->buf +
+				   ((index & (cq->cqe_cnt - 1)) <<
+				    (5 + cq->cqe_64)) +
+				   (cq->cqe_64 << 5));
+}
+
+/**
+ * Transpose a flag in a value.
+ *
+ * @param val
+ *   Input value.
+ * @param from
+ *   Flag to retrieve from input value.
+ * @param to
+ *   Flag to set in output value.
+ *
+ * @return
+ *   Output value with transposed flag enabled if present on input.
+ */
+static inline uint64_t
+mlx4_transpose(uint64_t val, uint64_t from, uint64_t to)
+{
+	return (from >= to ?
+		(val & from) / (from / to) :
+		(val & from) * (to / from));
+}
+
+#endif /* MLX4_PRM_H_ */
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
new file mode 100644
index 00000000..8b97a894
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -0,0 +1,873 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Rx queues configuration for mlx4 driver.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/mlx4dv.h>
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_flow.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+
+#include "mlx4.h"
+#include "mlx4_flow.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+/**
+ * Historical RSS hash key.
+ *
+ * This used to be the default for mlx4 in Linux before v3.19 switched to
+ * generating random hash keys through netdev_rss_key_fill().
+ *
+ * It is used in this PMD for consistency with past DPDK releases but can
+ * now be overridden through user configuration.
+ *
+ * Note: this is not const to work around API quirks.
+ */
+uint8_t
+mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE] = {
+	0x2c, 0xc6, 0x81, 0xd1,
+	0x5b, 0xdb, 0xf4, 0xf7,
+	0xfc, 0xa2, 0x83, 0x19,
+	0xdb, 0x1a, 0x3e, 0x94,
+	0x6b, 0x9e, 0x38, 0xd9,
+	0x2c, 0x9c, 0x03, 0xd1,
+	0xad, 0x99, 0x44, 0xa7,
+	0xd9, 0x56, 0x3d, 0x59,
+	0x06, 0x3c, 0x25, 0xf3,
+	0xfc, 0x1f, 0xdc, 0x2a,
+};
+
+/**
+ * Obtain a RSS context with specified properties.
+ *
+ * Used when creating a flow rule targeting one or several Rx queues.
+ *
+ * If a matching RSS context already exists, it is returned with its
+ * reference count incremented.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param fields
+ *   Fields for RSS processing (Verbs format).
+ * @param[in] key
+ *   Hash key to use (whose size is exactly MLX4_RSS_HASH_KEY_SIZE).
+ * @param queues
+ *   Number of target queues.
+ * @param[in] queue_id
+ *   Target queues.
+ *
+ * @return
+ *   Pointer to RSS context on success, NULL otherwise and rte_errno is set.
+ */
+struct mlx4_rss *
+mlx4_rss_get(struct priv *priv, uint64_t fields,
+	     uint8_t key[MLX4_RSS_HASH_KEY_SIZE],
+	     uint16_t queues, const uint16_t queue_id[])
+{
+	struct mlx4_rss *rss;
+	size_t queue_id_size = sizeof(queue_id[0]) * queues;
+
+	LIST_FOREACH(rss, &priv->rss, next)
+		if (fields == rss->fields &&
+		    queues == rss->queues &&
+		    !memcmp(key, rss->key, MLX4_RSS_HASH_KEY_SIZE) &&
+		    !memcmp(queue_id, rss->queue_id, queue_id_size)) {
+			++rss->refcnt;
+			return rss;
+		}
+	rss = rte_malloc(__func__, offsetof(struct mlx4_rss, queue_id) +
+			 queue_id_size, 0);
+	if (!rss)
+		goto error;
+	*rss = (struct mlx4_rss){
+		.priv = priv,
+		.refcnt = 1,
+		.usecnt = 0,
+		.qp = NULL,
+		.ind = NULL,
+		.fields = fields,
+		.queues = queues,
+	};
+	memcpy(rss->key, key, MLX4_RSS_HASH_KEY_SIZE);
+	memcpy(rss->queue_id, queue_id, queue_id_size);
+	LIST_INSERT_HEAD(&priv->rss, rss, next);
+	return rss;
+error:
+	rte_errno = ENOMEM;
+	return NULL;
+}
+
+/**
+ * Release a RSS context instance.
+ *
+ * Used when destroying a flow rule targeting one or several Rx queues.
+ *
+ * This function decrements the reference count of the context and destroys
+ * it after reaching 0. The context must have no users at this point; all
+ * prior calls to mlx4_rss_attach() must have been followed by matching
+ * calls to mlx4_rss_detach().
+ *
+ * @param rss
+ *   RSS context to release.
+ */
+void
+mlx4_rss_put(struct mlx4_rss *rss)
+{
+	assert(rss->refcnt);
+	if (--rss->refcnt)
+		return;
+	assert(!rss->usecnt);
+	assert(!rss->qp);
+	assert(!rss->ind);
+	LIST_REMOVE(rss, next);
+	rte_free(rss);
+}
+
+/**
+ * Attach a user to a RSS context instance.
+ *
+ * Used when the RSS QP and indirection table objects must be instantiated,
+ * that is, when a flow rule must be enabled.
+ *
+ * This function increments the usage count of the context.
+ *
+ * @param rss
+ *   RSS context to attach to.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_rss_attach(struct mlx4_rss *rss)
+{
+	assert(rss->refcnt);
+	if (rss->usecnt++) {
+		assert(rss->qp);
+		assert(rss->ind);
+		return 0;
+	}
+
+	struct ibv_wq *ind_tbl[rss->queues];
+	struct priv *priv = rss->priv;
+	const char *msg;
+	unsigned int i = 0;
+	int ret;
+
+	if (!rte_is_power_of_2(RTE_DIM(ind_tbl))) {
+		ret = EINVAL;
+		msg = "number of RSS queues must be a power of two";
+		goto error;
+	}
+	for (i = 0; i != RTE_DIM(ind_tbl); ++i) {
+		uint16_t id = rss->queue_id[i];
+		struct rxq *rxq = NULL;
+
+		if (id < priv->dev->data->nb_rx_queues)
+			rxq = priv->dev->data->rx_queues[id];
+		if (!rxq) {
+			ret = EINVAL;
+			msg = "RSS target queue is not configured";
+			goto error;
+		}
+		ret = mlx4_rxq_attach(rxq);
+		if (ret) {
+			ret = -ret;
+			msg = "unable to attach RSS target queue";
+			goto error;
+		}
+		ind_tbl[i] = rxq->wq;
+	}
+	rss->ind = ibv_create_rwq_ind_table
+		(priv->ctx,
+		 &(struct ibv_rwq_ind_table_init_attr){
+			.log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)),
+			.ind_tbl = ind_tbl,
+			.comp_mask = 0,
+		 });
+	if (!rss->ind) {
+		ret = errno ? errno : EINVAL;
+		msg = "RSS indirection table creation failure";
+		goto error;
+	}
+	rss->qp = ibv_create_qp_ex
+		(priv->ctx,
+		 &(struct ibv_qp_init_attr_ex){
+			.comp_mask = (IBV_QP_INIT_ATTR_PD |
+				      IBV_QP_INIT_ATTR_RX_HASH |
+				      IBV_QP_INIT_ATTR_IND_TABLE),
+			.qp_type = IBV_QPT_RAW_PACKET,
+			.pd = priv->pd,
+			.rwq_ind_tbl = rss->ind,
+			.rx_hash_conf = {
+				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
+				.rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE,
+				.rx_hash_key = rss->key,
+				.rx_hash_fields_mask = rss->fields,
+			},
+		 });
+	if (!rss->qp) {
+		ret = errno ? errno : EINVAL;
+		msg = "RSS hash QP creation failure";
+		goto error;
+	}
+	ret = ibv_modify_qp
+		(rss->qp,
+		 &(struct ibv_qp_attr){
+			.qp_state = IBV_QPS_INIT,
+			.port_num = priv->port,
+		 },
+		 IBV_QP_STATE | IBV_QP_PORT);
+	if (ret) {
+		msg = "failed to switch RSS hash QP to INIT state";
+		goto error;
+	}
+	ret = ibv_modify_qp
+		(rss->qp,
+		 &(struct ibv_qp_attr){
+			.qp_state = IBV_QPS_RTR,
+		 },
+		 IBV_QP_STATE);
+	if (ret) {
+		msg = "failed to switch RSS hash QP to RTR state";
+		goto error;
+	}
+	return 0;
+error:
+	if (rss->qp) {
+		claim_zero(ibv_destroy_qp(rss->qp));
+		rss->qp = NULL;
+	}
+	if (rss->ind) {
+		claim_zero(ibv_destroy_rwq_ind_table(rss->ind));
+		rss->ind = NULL;
+	}
+	while (i--)
+		mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+	ERROR("mlx4: %s", msg);
+	--rss->usecnt;
+	rte_errno = ret;
+	return -ret;
+}
+
+/**
+ * Detach a user from a RSS context instance.
+ *
+ * Used when disabling (not destroying) a flow rule.
+ *
+ * This function decrements the usage count of the context and destroys
+ * usage resources after reaching 0.
+ *
+ * @param rss
+ *   RSS context to detach from.
+ */
+void
+mlx4_rss_detach(struct mlx4_rss *rss)
+{
+	struct priv *priv = rss->priv;
+	unsigned int i;
+
+	assert(rss->refcnt);
+	assert(rss->qp);
+	assert(rss->ind);
+	if (--rss->usecnt)
+		return;
+	claim_zero(ibv_destroy_qp(rss->qp));
+	rss->qp = NULL;
+	claim_zero(ibv_destroy_rwq_ind_table(rss->ind));
+	rss->ind = NULL;
+	for (i = 0; i != rss->queues; ++i)
+		mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]);
+}
+
+/**
+ * Initialize common RSS context resources.
+ *
+ * Because ConnectX-3 hardware limitations require a fixed order in the
+ * indirection table, WQs must be allocated sequentially to be part of a
+ * common RSS context.
+ *
+ * Since a newly created WQ cannot be moved to a different context, this
+ * function allocates them all at once, one for each configured Rx queue,
+ * as well as all related resources (CQs and mbufs).
+ *
+ * This must therefore be done before creating any Rx flow rules relying on
+ * indirection tables.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_rss_init(struct priv *priv)
+{
+	struct rte_eth_dev *dev = priv->dev;
+	uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
+	uint32_t wq_num_prev = 0;
+	const char *msg;
+	unsigned int i;
+	int ret;
+
+	/* Prepare range for RSS contexts before creating the first WQ. */
+	ret = mlx4dv_set_context_attr(priv->ctx,
+				      MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ,
+				      &log2_range);
+	if (ret) {
+		ERROR("cannot set up range size for RSS context to %u"
+		      " (for %u Rx queues), error: %s",
+		      1 << log2_range, dev->data->nb_rx_queues, strerror(ret));
+		rte_errno = ret;
+		return -ret;
+	}
+	for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
+		struct rxq *rxq = priv->dev->data->rx_queues[i];
+		struct ibv_cq *cq;
+		struct ibv_wq *wq;
+		uint32_t wq_num;
+
+		/* Attach the configured Rx queues. */
+		if (rxq) {
+			assert(!rxq->usecnt);
+			ret = mlx4_rxq_attach(rxq);
+			if (!ret) {
+				wq_num = rxq->wq->wq_num;
+				goto wq_num_check;
+			}
+			ret = -ret;
+			msg = "unable to create Rx queue resources";
+			goto error;
+		}
+		/*
+		 * WQs are temporarily allocated for unconfigured Rx queues
+		 * to maintain proper index alignment in indirection table
+		 * by skipping unused WQ numbers.
+		 *
+		 * The reason this works at all even though these WQs are
+		 * immediately destroyed is that WQNs are allocated
+		 * sequentially and are guaranteed to never be reused in the
+		 * same context by the underlying implementation.
+		 */
+		cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
+		if (!cq) {
+			ret = ENOMEM;
+			msg = "placeholder CQ creation failure";
+			goto error;
+		}
+		wq = ibv_create_wq
+			(priv->ctx,
+			 &(struct ibv_wq_init_attr){
+				.wq_type = IBV_WQT_RQ,
+				.max_wr = 1,
+				.max_sge = 1,
+				.pd = priv->pd,
+				.cq = cq,
+			 });
+		if (wq) {
+			wq_num = wq->wq_num;
+			claim_zero(ibv_destroy_wq(wq));
+		} else {
+			wq_num = 0; /* Shut up GCC 4.8 warnings. */
+		}
+		claim_zero(ibv_destroy_cq(cq));
+		if (!wq) {
+			ret = ENOMEM;
+			msg = "placeholder WQ creation failure";
+			goto error;
+		}
+wq_num_check:
+		/*
+		 * While guaranteed by the implementation, make sure WQ
+		 * numbers are really sequential (as the saying goes,
+		 * trust, but verify).
+		 */
+		if (i && wq_num - wq_num_prev != 1) {
+			if (rxq)
+				mlx4_rxq_detach(rxq);
+			ret = ERANGE;
+			msg = "WQ numbers are not sequential";
+			goto error;
+		}
+		wq_num_prev = wq_num;
+	}
+	return 0;
+error:
+	ERROR("cannot initialize common RSS resources (queue %u): %s: %s",
+	      i, msg, strerror(ret));
+	while (i--) {
+		struct rxq *rxq = priv->dev->data->rx_queues[i];
+
+		if (rxq)
+			mlx4_rxq_detach(rxq);
+	}
+	rte_errno = ret;
+	return -ret;
+}
+
+/**
+ * Release common RSS context resources.
+ *
+ * As the reverse of mlx4_rss_init(), this must be done after removing all
+ * flow rules relying on indirection tables.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+mlx4_rss_deinit(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
+		struct rxq *rxq = priv->dev->data->rx_queues[i];
+
+		if (rxq) {
+			assert(rxq->usecnt == 1);
+			mlx4_rxq_detach(rxq);
+		}
+	}
+}
+
+/**
+ * Attach a user to a Rx queue.
+ *
+ * Used when the resources of an Rx queue must be instantiated for it to
+ * become in a usable state.
+ *
+ * This function increments the usage count of the Rx queue.
+ *
+ * @param rxq
+ *   Pointer to Rx queue structure.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_rxq_attach(struct rxq *rxq)
+{
+	if (rxq->usecnt++) {
+		assert(rxq->cq);
+		assert(rxq->wq);
+		assert(rxq->wqes);
+		assert(rxq->rq_db);
+		return 0;
+	}
+
+	struct priv *priv = rxq->priv;
+	const uint32_t elts_n = 1 << rxq->elts_n;
+	const uint32_t sges_n = 1 << rxq->sges_n;
+	struct rte_mbuf *(*elts)[elts_n] = rxq->elts;
+	struct mlx4dv_obj mlxdv;
+	struct mlx4dv_rwq dv_rwq;
+	struct mlx4dv_cq dv_cq = { .comp_mask = MLX4DV_CQ_MASK_UAR, };
+	const char *msg;
+	struct ibv_cq *cq = NULL;
+	struct ibv_wq *wq = NULL;
+	volatile struct mlx4_wqe_data_seg (*wqes)[];
+	unsigned int i;
+	int ret;
+
+	assert(rte_is_power_of_2(elts_n));
+	cq = ibv_create_cq(priv->ctx, elts_n / sges_n, NULL, rxq->channel, 0);
+	if (!cq) {
+		ret = ENOMEM;
+		msg = "CQ creation failure";
+		goto error;
+	}
+	wq = ibv_create_wq
+		(priv->ctx,
+		 &(struct ibv_wq_init_attr){
+			.wq_type = IBV_WQT_RQ,
+			.max_wr = elts_n / sges_n,
+			.max_sge = sges_n,
+			.pd = priv->pd,
+			.cq = cq,
+		 });
+	if (!wq) {
+		ret = errno ? errno : EINVAL;
+		msg = "WQ creation failure";
+		goto error;
+	}
+	ret = ibv_modify_wq
+		(wq,
+		 &(struct ibv_wq_attr){
+			.attr_mask = IBV_WQ_ATTR_STATE,
+			.wq_state = IBV_WQS_RDY,
+		 });
+	if (ret) {
+		msg = "WQ state change to IBV_WQS_RDY failed";
+		goto error;
+	}
+	/* Retrieve device queue information. */
+	mlxdv.cq.in = cq;
+	mlxdv.cq.out = &dv_cq;
+	mlxdv.rwq.in = wq;
+	mlxdv.rwq.out = &dv_rwq;
+	ret = mlx4dv_init_obj(&mlxdv, MLX4DV_OBJ_RWQ | MLX4DV_OBJ_CQ);
+	if (ret) {
+		msg = "failed to obtain device information from WQ/CQ objects";
+		goto error;
+	}
+	wqes = (volatile struct mlx4_wqe_data_seg (*)[])
+		((uintptr_t)dv_rwq.buf.buf + dv_rwq.rq.offset);
+	for (i = 0; i != RTE_DIM(*elts); ++i) {
+		volatile struct mlx4_wqe_data_seg *scat = &(*wqes)[i];
+		struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
+
+		if (buf == NULL) {
+			while (i--) {
+				rte_pktmbuf_free_seg((*elts)[i]);
+				(*elts)[i] = NULL;
+			}
+			ret = ENOMEM;
+			msg = "cannot allocate mbuf";
+			goto error;
+		}
+		/* Headroom is reserved by rte_pktmbuf_alloc(). */
+		assert(buf->data_off == RTE_PKTMBUF_HEADROOM);
+		/* Buffer is supposed to be empty. */
+		assert(rte_pktmbuf_data_len(buf) == 0);
+		assert(rte_pktmbuf_pkt_len(buf) == 0);
+		/* Only the first segment keeps headroom. */
+		if (i % sges_n)
+			buf->data_off = 0;
+		buf->port = rxq->port_id;
+		buf->data_len = rte_pktmbuf_tailroom(buf);
+		buf->pkt_len = rte_pktmbuf_tailroom(buf);
+		buf->nb_segs = 1;
+		*scat = (struct mlx4_wqe_data_seg){
+			.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+								  uintptr_t)),
+			.byte_count = rte_cpu_to_be_32(buf->data_len),
+			.lkey = rte_cpu_to_be_32(rxq->mr->lkey),
+		};
+		(*elts)[i] = buf;
+	}
+	DEBUG("%p: allocated and configured %u segments (max %u packets)",
+	      (void *)rxq, elts_n, elts_n / sges_n);
+	rxq->cq = cq;
+	rxq->wq = wq;
+	rxq->wqes = wqes;
+	rxq->rq_db = dv_rwq.rdb;
+	rxq->mcq.buf = dv_cq.buf.buf;
+	rxq->mcq.cqe_cnt = dv_cq.cqe_cnt;
+	rxq->mcq.set_ci_db = dv_cq.set_ci_db;
+	rxq->mcq.cqe_64 = (dv_cq.cqe_size & 64) ? 1 : 0;
+	rxq->mcq.arm_db = dv_cq.arm_db;
+	rxq->mcq.arm_sn = dv_cq.arm_sn;
+	rxq->mcq.cqn = dv_cq.cqn;
+	rxq->mcq.cq_uar = dv_cq.cq_uar;
+	rxq->mcq.cq_db_reg = (uint8_t *)dv_cq.cq_uar + MLX4_CQ_DOORBELL;
+	/* Update doorbell counter. */
+	rxq->rq_ci = elts_n / sges_n;
+	rte_wmb();
+	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+	return 0;
+error:
+	if (wq)
+		claim_zero(ibv_destroy_wq(wq));
+	if (cq)
+		claim_zero(ibv_destroy_cq(cq));
+	rte_errno = ret;
+	ERROR("error while attaching Rx queue %p: %s: %s",
+	      (void *)rxq, msg, strerror(ret));
+	return -ret;
+}
+
+/**
+ * Detach a user from a Rx queue.
+ *
+ * This function decrements the usage count of the Rx queue and destroys
+ * usage resources after reaching 0.
+ *
+ * @param rxq
+ *   Pointer to Rx queue structure.
+ */
+void
+mlx4_rxq_detach(struct rxq *rxq)
+{
+	unsigned int i;
+	struct rte_mbuf *(*elts)[1 << rxq->elts_n] = rxq->elts;
+
+	if (--rxq->usecnt)
+		return;
+	rxq->rq_ci = 0;
+	memset(&rxq->mcq, 0, sizeof(rxq->mcq));
+	rxq->rq_db = NULL;
+	rxq->wqes = NULL;
+	claim_zero(ibv_destroy_wq(rxq->wq));
+	rxq->wq = NULL;
+	claim_zero(ibv_destroy_cq(rxq->cq));
+	rxq->cq = NULL;
+	DEBUG("%p: freeing Rx queue elements", (void *)rxq);
+	for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+		if (!(*elts)[i])
+			continue;
+		rte_pktmbuf_free_seg((*elts)[i]);
+		(*elts)[i] = NULL;
+	}
+}
+
+/**
+ * DPDK callback to configure a Rx queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   Rx queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param[in] conf
+ *   Thresholds parameters.
+ * @param mp
+ *   Memory pool for buffer allocations.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+		    unsigned int socket, const struct rte_eth_rxconf *conf,
+		    struct rte_mempool *mp)
+{
+	struct priv *priv = dev->data->dev_private;
+	uint32_t mb_len = rte_pktmbuf_data_room_size(mp);
+	struct rte_mbuf *(*elts)[rte_align32pow2(desc)];
+	struct rxq *rxq;
+	struct mlx4_malloc_vec vec[] = {
+		{
+			.align = RTE_CACHE_LINE_SIZE,
+			.size = sizeof(*rxq),
+			.addr = (void **)&rxq,
+		},
+		{
+			.align = RTE_CACHE_LINE_SIZE,
+			.size = sizeof(*elts),
+			.addr = (void **)&elts,
+		},
+	};
+	int ret;
+
+	(void)conf; /* Thresholds configuration (ignored). */
+	DEBUG("%p: configuring queue %u for %u descriptors",
+	      (void *)dev, idx, desc);
+	if (idx >= dev->data->nb_rx_queues) {
+		rte_errno = EOVERFLOW;
+		ERROR("%p: queue index out of range (%u >= %u)",
+		      (void *)dev, idx, dev->data->nb_rx_queues);
+		return -rte_errno;
+	}
+	rxq = dev->data->rx_queues[idx];
+	if (rxq) {
+		rte_errno = EEXIST;
+		ERROR("%p: Rx queue %u already configured, release it first",
+		      (void *)dev, idx);
+		return -rte_errno;
+	}
+	if (!desc) {
+		rte_errno = EINVAL;
+		ERROR("%p: invalid number of Rx descriptors", (void *)dev);
+		return -rte_errno;
+	}
+	if (desc != RTE_DIM(*elts)) {
+		desc = RTE_DIM(*elts);
+		WARN("%p: increased number of descriptors in Rx queue %u"
+		     " to the next power of two (%u)",
+		     (void *)dev, idx, desc);
+	}
+	/* Allocate and initialize Rx queue. */
+	mlx4_zmallocv_socket("RXQ", vec, RTE_DIM(vec), socket);
+	if (!rxq) {
+		ERROR("%p: unable to allocate queue index %u",
+		      (void *)dev, idx);
+		return -rte_errno;
+	}
+	*rxq = (struct rxq){
+		.priv = priv,
+		.mp = mp,
+		.port_id = dev->data->port_id,
+		.sges_n = 0,
+		.elts_n = rte_log2_u32(desc),
+		.elts = elts,
+		/* Toggle Rx checksum offload if hardware supports it. */
+		.csum = (priv->hw_csum &&
+			 dev->data->dev_conf.rxmode.hw_ip_checksum),
+		.csum_l2tun = (priv->hw_csum_l2tun &&
+			       dev->data->dev_conf.rxmode.hw_ip_checksum),
+		.stats = {
+			.idx = idx,
+		},
+		.socket = socket,
+	};
+	/* Enable scattered packets support for this queue if necessary. */
+	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
+		;
+	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
+		uint32_t size =
+			RTE_PKTMBUF_HEADROOM +
+			dev->data->dev_conf.rxmode.max_rx_pkt_len;
+		uint32_t sges_n;
+
+		/*
+		 * Determine the number of SGEs needed for a full packet
+		 * and round it to the next power of two.
+		 */
+		sges_n = rte_log2_u32((size / mb_len) + !!(size % mb_len));
+		rxq->sges_n = sges_n;
+		/* Make sure sges_n did not overflow. */
+		size = mb_len * (1 << rxq->sges_n);
+		size -= RTE_PKTMBUF_HEADROOM;
+		if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+			rte_errno = EOVERFLOW;
+			ERROR("%p: too many SGEs (%u) needed to handle"
+			      " requested maximum packet size %u",
+			      (void *)dev,
+			      1 << sges_n,
+			      dev->data->dev_conf.rxmode.max_rx_pkt_len);
+			goto error;
+		}
+	} else {
+		WARN("%p: the requested maximum Rx packet size (%u) is"
+		     " larger than a single mbuf (%u) and scattered"
+		     " mode has not been requested",
+		     (void *)dev,
+		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
+		     mb_len - RTE_PKTMBUF_HEADROOM);
+	}
+	DEBUG("%p: maximum number of segments per packet: %u",
+	      (void *)dev, 1 << rxq->sges_n);
+	if (desc % (1 << rxq->sges_n)) {
+		rte_errno = EINVAL;
+		ERROR("%p: number of Rx queue descriptors (%u) is not a"
+		      " multiple of maximum segments per packet (%u)",
+		      (void *)dev,
+		      desc,
+		      1 << rxq->sges_n);
+		goto error;
+	}
+	/* Use the entire Rx mempool as the memory region. */
+	rxq->mr = mlx4_mr_get(priv, mp);
+	if (!rxq->mr) {
+		ERROR("%p: MR creation failure: %s",
+		      (void *)dev, strerror(rte_errno));
+		goto error;
+	}
+	if (dev->data->dev_conf.intr_conf.rxq) {
+		rxq->channel = ibv_create_comp_channel(priv->ctx);
+		if (rxq->channel == NULL) {
+			rte_errno = ENOMEM;
+			ERROR("%p: Rx interrupt completion channel creation"
+			      " failure: %s",
+			      (void *)dev, strerror(rte_errno));
+			goto error;
+		}
+		if (mlx4_fd_set_non_blocking(rxq->channel->fd) < 0) {
+			ERROR("%p: unable to make Rx interrupt completion"
+			      " channel non-blocking: %s",
+			      (void *)dev, strerror(rte_errno));
+			goto error;
+		}
+	}
+	DEBUG("%p: adding Rx queue %p to list", (void *)dev, (void *)rxq);
+	dev->data->rx_queues[idx] = rxq;
+	return 0;
+error:
+	dev->data->rx_queues[idx] = NULL;
+	ret = rte_errno;
+	mlx4_rx_queue_release(rxq);
+	rte_errno = ret;
+	assert(rte_errno > 0);
+	return -rte_errno;
+}
+
+/**
+ * DPDK callback to release a Rx queue.
+ *
+ * @param dpdk_rxq
+ *   Generic Rx queue pointer.
+ */
+void
+mlx4_rx_queue_release(void *dpdk_rxq)
+{
+	struct rxq *rxq = (struct rxq *)dpdk_rxq;
+	struct priv *priv;
+	unsigned int i;
+
+	if (rxq == NULL)
+		return;
+	priv = rxq->priv;
+	for (i = 0; i != priv->dev->data->nb_rx_queues; ++i)
+		if (priv->dev->data->rx_queues[i] == rxq) {
+			DEBUG("%p: removing Rx queue %p from list",
+			      (void *)priv->dev, (void *)rxq);
+			priv->dev->data->rx_queues[i] = NULL;
+			break;
+		}
+	assert(!rxq->cq);
+	assert(!rxq->wq);
+	assert(!rxq->wqes);
+	assert(!rxq->rq_db);
+	if (rxq->channel)
+		claim_zero(ibv_destroy_comp_channel(rxq->channel));
+	if (rxq->mr)
+		mlx4_mr_put(rxq->mr);
+	rte_free(rxq);
+}
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
new file mode 100644
index 00000000..3985e06d
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -0,0 +1,1071 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Data plane functions for mlx4 driver.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_io.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_prefetch.h>
+
+#include "mlx4.h"
+#include "mlx4_prm.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+#define WQE_ONE_DATA_SEG_SIZE \
+	(sizeof(struct mlx4_wqe_ctrl_seg) + sizeof(struct mlx4_wqe_data_seg))
+
+/**
+ * Pointer-value pair structure used in tx_post_send for saving the first
+ * DWORD (32 byte) of a TXBB.
+ */
+struct pv {
+	volatile struct mlx4_wqe_data_seg *dseg;
+	uint32_t val;
+};
+
+/** A table to translate Rx completion flags to packet type. */
+uint32_t mlx4_ptype_table[0x100] __rte_cache_aligned = {
+	/*
+	 * The index to the array should have:
+	 *  bit[7] - MLX4_CQE_L2_TUNNEL
+	 *  bit[6] - MLX4_CQE_L2_TUNNEL_IPV4
+	 *  bit[5] - MLX4_CQE_STATUS_UDP
+	 *  bit[4] - MLX4_CQE_STATUS_TCP
+	 *  bit[3] - MLX4_CQE_STATUS_IPV4OPT
+	 *  bit[2] - MLX4_CQE_STATUS_IPV6
+	 *  bit[1] - MLX4_CQE_STATUS_IPV4F
+	 *  bit[0] - MLX4_CQE_STATUS_IPV4
+	 * giving a total of up to 256 entries.
+	 */
+	[0x00] = RTE_PTYPE_L2_ETHER,
+	[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
+	[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_L4_FRAG,
+	[0x03] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_L4_FRAG,
+	[0x04] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
+	[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT,
+	[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
+		     RTE_PTYPE_L4_FRAG,
+	[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_L4_TCP,
+	[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_L4_TCP,
+	[0x14] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_L4_TCP,
+	[0x18] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
+		     RTE_PTYPE_L4_TCP,
+	[0x19] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
+		     RTE_PTYPE_L4_TCP,
+	[0x1a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
+		     RTE_PTYPE_L4_TCP,
+	[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_L4_UDP,
+	[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_L4_UDP,
+	[0x24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_L4_UDP,
+	[0x28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
+		     RTE_PTYPE_L4_UDP,
+	[0x29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
+		     RTE_PTYPE_L4_UDP,
+	[0x2a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT |
+		     RTE_PTYPE_L4_UDP,
+	/* Tunneled - L3 IPV6 */
+	[0x80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
+	[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
+	[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG,
+	[0x83] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG,
+	[0x84] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
+	[0x88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT,
+	[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT,
+	[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG,
+	/* Tunneled - L3 IPV6, TCP */
+	[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0x93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0x94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0x98] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0x99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0x9a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_TCP,
+	/* Tunneled - L3 IPV6, UDP */
+	[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xa3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xa4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xa8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xa9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xaa] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_UDP,
+	/* Tunneled - L3 IPV4 */
+	[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
+	[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
+	[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG,
+	[0xc3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG,
+	[0xc4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
+	[0xc8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT,
+	[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT,
+	[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT |
+		     RTE_PTYPE_INNER_L4_FRAG,
+	/* Tunneled - L3 IPV4, TCP */
+	[0xd0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0xd3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0xd4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0xd8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0xd9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT |
+		     RTE_PTYPE_INNER_L4_TCP,
+	[0xda] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_TCP,
+	/* Tunneled - L3 IPV4, UDP */
+	[0xe0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xe3] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xe4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L4_UDP,
+	[0xe8] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
+	[0xe9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
+	[0xea] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_FRAG |
+		     RTE_PTYPE_INNER_L4_UDP,
+};
+
+/**
+ * Stamp a WQE so it won't be reused by the HW.
+ *
+ * Routine is used when freeing WQE used by the chip or when failing
+ * building an WQ entry has failed leaving partial information on the queue.
+ *
+ * @param sq
+ *   Pointer to the SQ structure.
+ * @param index
+ *   Index of the freed WQE.
+ * @param num_txbbs
+ *   Number of blocks to stamp.
+ *   If < 0 the routine will use the size written in the WQ entry.
+ * @param owner
+ *   The value of the WQE owner bit to use in the stamp.
+ *
+ * @return
+ *   The number of Tx basic blocs (TXBB) the WQE contained.
+ */
+static int
+mlx4_txq_stamp_freed_wqe(struct mlx4_sq *sq, uint16_t index, uint8_t owner)
+{
+	uint32_t stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL |
+					  (!!owner << MLX4_SQ_STAMP_SHIFT));
+	volatile uint8_t *wqe = mlx4_get_send_wqe(sq,
+						(index & sq->txbb_cnt_mask));
+	volatile uint32_t *ptr = (volatile uint32_t *)wqe;
+	int i;
+	int txbbs_size;
+	int num_txbbs;
+
+	/* Extract the size from the control segment of the WQE. */
+	num_txbbs = MLX4_SIZE_TO_TXBBS((((volatile struct mlx4_wqe_ctrl_seg *)
+					 wqe)->fence_size & 0x3f) << 4);
+	txbbs_size = num_txbbs * MLX4_TXBB_SIZE;
+	/* Optimize the common case when there is no wrap-around. */
+	if (wqe + txbbs_size <= sq->eob) {
+		/* Stamp the freed descriptor. */
+		for (i = 0; i < txbbs_size; i += MLX4_SQ_STAMP_STRIDE) {
+			*ptr = stamp;
+			ptr += MLX4_SQ_STAMP_DWORDS;
+		}
+	} else {
+		/* Stamp the freed descriptor. */
+		for (i = 0; i < txbbs_size; i += MLX4_SQ_STAMP_STRIDE) {
+			*ptr = stamp;
+			ptr += MLX4_SQ_STAMP_DWORDS;
+			if ((volatile uint8_t *)ptr >= sq->eob) {
+				ptr = (volatile uint32_t *)sq->buf;
+				stamp ^= RTE_BE32(0x80000000);
+			}
+		}
+	}
+	return num_txbbs;
+}
+
+/**
+ * Manage Tx completions.
+ *
+ * When sending a burst, mlx4_tx_burst() posts several WRs.
+ * To improve performance, a completion event is only required once every
+ * MLX4_PMD_TX_PER_COMP_REQ sends. Doing so discards completion information
+ * for other WRs, but this information would not be used anyway.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+static int
+mlx4_txq_complete(struct txq *txq, const unsigned int elts_n,
+				  struct mlx4_sq *sq)
+{
+	unsigned int elts_comp = txq->elts_comp;
+	unsigned int elts_tail = txq->elts_tail;
+	struct mlx4_cq *cq = &txq->mcq;
+	volatile struct mlx4_cqe *cqe;
+	uint32_t cons_index = cq->cons_index;
+	uint16_t new_index;
+	uint16_t nr_txbbs = 0;
+	int pkts = 0;
+
+	/*
+	 * Traverse over all CQ entries reported and handle each WQ entry
+	 * reported by them.
+	 */
+	do {
+		cqe = (volatile struct mlx4_cqe *)mlx4_get_cqe(cq, cons_index);
+		if (unlikely(!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+		    !!(cons_index & cq->cqe_cnt)))
+			break;
+		/*
+		 * Make sure we read the CQE after we read the ownership bit.
+		 */
+		rte_io_rmb();
+#ifndef NDEBUG
+		if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
+			     MLX4_CQE_OPCODE_ERROR)) {
+			volatile struct mlx4_err_cqe *cqe_err =
+				(volatile struct mlx4_err_cqe *)cqe;
+			ERROR("%p CQE error - vendor syndrome: 0x%x"
+			      " syndrome: 0x%x\n",
+			      (void *)txq, cqe_err->vendor_err,
+			      cqe_err->syndrome);
+		}
+#endif /* NDEBUG */
+		/* Get WQE index reported in the CQE. */
+		new_index =
+			rte_be_to_cpu_16(cqe->wqe_index) & sq->txbb_cnt_mask;
+		do {
+			/* Free next descriptor. */
+			nr_txbbs +=
+				mlx4_txq_stamp_freed_wqe(sq,
+				     (sq->tail + nr_txbbs) & sq->txbb_cnt_mask,
+				     !!((sq->tail + nr_txbbs) & sq->txbb_cnt));
+			pkts++;
+		} while (((sq->tail + nr_txbbs) & sq->txbb_cnt_mask) !=
+			 new_index);
+		cons_index++;
+	} while (1);
+	if (unlikely(pkts == 0))
+		return 0;
+	/* Update CQ. */
+	cq->cons_index = cons_index;
+	*cq->set_ci_db = rte_cpu_to_be_32(cq->cons_index & MLX4_CQ_DB_CI_MASK);
+	sq->tail = sq->tail + nr_txbbs;
+	/* Update the list of packets posted for transmission. */
+	elts_comp -= pkts;
+	assert(elts_comp <= txq->elts_comp);
+	/*
+	 * Assume completion status is successful as nothing can be done about
+	 * it anyway.
+	 */
+	elts_tail += pkts;
+	if (elts_tail >= elts_n)
+		elts_tail -= elts_n;
+	txq->elts_tail = elts_tail;
+	txq->elts_comp = elts_comp;
+	return 0;
+}
+
+/**
+ * Get memory pool (MP) from mbuf. If mbuf is indirect, the pool from which
+ * the cloned mbuf is allocated is returned instead.
+ *
+ * @param buf
+ *   Pointer to mbuf.
+ *
+ * @return
+ *   Memory pool where data is located for given mbuf.
+ */
+static struct rte_mempool *
+mlx4_txq_mb2mp(struct rte_mbuf *buf)
+{
+	if (unlikely(RTE_MBUF_INDIRECT(buf)))
+		return rte_mbuf_from_indirect(buf)->pool;
+	return buf->pool;
+}
+
+static int
+mlx4_tx_burst_segs(struct rte_mbuf *buf, struct txq *txq,
+		   volatile struct mlx4_wqe_ctrl_seg **pctrl)
+{
+	int wqe_real_size;
+	int nr_txbbs;
+	struct pv *pv = (struct pv *)txq->bounce_buf;
+	struct mlx4_sq *sq = &txq->msq;
+	uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
+	volatile struct mlx4_wqe_ctrl_seg *ctrl;
+	volatile struct mlx4_wqe_data_seg *dseg;
+	struct rte_mbuf *sbuf;
+	uint32_t lkey;
+	uintptr_t addr;
+	uint32_t byte_count;
+	int pv_counter = 0;
+
+	/* Calculate the needed work queue entry size for this packet. */
+	wqe_real_size = sizeof(volatile struct mlx4_wqe_ctrl_seg) +
+		buf->nb_segs * sizeof(volatile struct mlx4_wqe_data_seg);
+	nr_txbbs = MLX4_SIZE_TO_TXBBS(wqe_real_size);
+	/*
+	 * Check that there is room for this WQE in the send queue and that
+	 * the WQE size is legal.
+	 */
+	if (((sq->head - sq->tail) + nr_txbbs +
+				sq->headroom_txbbs) >= sq->txbb_cnt ||
+			nr_txbbs > MLX4_MAX_WQE_TXBBS) {
+		return -1;
+	}
+	/* Get the control and data entries of the WQE. */
+	ctrl = (volatile struct mlx4_wqe_ctrl_seg *)
+			mlx4_get_send_wqe(sq, head_idx);
+	dseg = (volatile struct mlx4_wqe_data_seg *)
+			((uintptr_t)ctrl + sizeof(struct mlx4_wqe_ctrl_seg));
+	*pctrl = ctrl;
+	/* Fill the data segments with buffer information. */
+	for (sbuf = buf; sbuf != NULL; sbuf = sbuf->next, dseg++) {
+		addr = rte_pktmbuf_mtod(sbuf, uintptr_t);
+		rte_prefetch0((volatile void *)addr);
+		/* Handle WQE wraparound. */
+		if (dseg >= (volatile struct mlx4_wqe_data_seg *)sq->eob)
+			dseg = (volatile struct mlx4_wqe_data_seg *)sq->buf;
+		dseg->addr = rte_cpu_to_be_64(addr);
+		/* Memory region key (big endian) for this memory pool. */
+		lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf));
+		dseg->lkey = rte_cpu_to_be_32(lkey);
+#ifndef NDEBUG
+		/* Calculate the needed work queue entry size for this packet */
+		if (unlikely(dseg->lkey == rte_cpu_to_be_32((uint32_t)-1))) {
+			/* MR does not exist. */
+			DEBUG("%p: unable to get MP <-> MR association",
+					(void *)txq);
+			/*
+			 * Restamp entry in case of failure.
+			 * Make sure that size is written correctly
+			 * Note that we give ownership to the SW, not the HW.
+			 */
+			wqe_real_size = sizeof(struct mlx4_wqe_ctrl_seg) +
+				buf->nb_segs * sizeof(struct mlx4_wqe_data_seg);
+			ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
+			mlx4_txq_stamp_freed_wqe(sq, head_idx,
+					(sq->head & sq->txbb_cnt) ? 0 : 1);
+			return -1;
+		}
+#endif /* NDEBUG */
+		if (likely(sbuf->data_len)) {
+			byte_count = rte_cpu_to_be_32(sbuf->data_len);
+		} else {
+			/*
+			 * Zero length segment is treated as inline segment
+			 * with zero data.
+			 */
+			byte_count = RTE_BE32(0x80000000);
+		}
+		/*
+		 * If the data segment is not at the beginning of a
+		 * Tx basic block (TXBB) then write the byte count,
+		 * else postpone the writing to just before updating the
+		 * control segment.
+		 */
+		if ((uintptr_t)dseg & (uintptr_t)(MLX4_TXBB_SIZE - 1)) {
+#if RTE_CACHE_LINE_SIZE < 64
+			/*
+			 * Need a barrier here before writing the byte_count
+			 * fields to make sure that all the data is visible
+			 * before the byte_count field is set.
+			 * Otherwise, if the segment begins a new cacheline,
+			 * the HCA prefetcher could grab the 64-byte chunk and
+			 * get a valid (!= 0xffffffff) byte count but stale
+			 * data, and end up sending the wrong data.
+			 */
+			rte_io_wmb();
+#endif /* RTE_CACHE_LINE_SIZE */
+			dseg->byte_count = byte_count;
+		} else {
+			/*
+			 * This data segment starts at the beginning of a new
+			 * TXBB, so we need to postpone its byte_count writing
+			 * for later.
+			 */
+			pv[pv_counter].dseg = dseg;
+			pv[pv_counter++].val = byte_count;
+		}
+	}
+	/* Write the first DWORD of each TXBB save earlier. */
+	if (pv_counter) {
+		/* Need a barrier here before writing the byte_count. */
+		rte_io_wmb();
+		for (--pv_counter; pv_counter  >= 0; pv_counter--)
+			pv[pv_counter].dseg->byte_count = pv[pv_counter].val;
+	}
+	/* Fill the control parameters for this packet. */
+	ctrl->fence_size = (wqe_real_size >> 4) & 0x3f;
+	return nr_txbbs;
+}
+
+/**
+ * DPDK callback for Tx.
+ *
+ * @param dpdk_txq
+ *   Generic pointer to Tx queue structure.
+ * @param[in] pkts
+ *   Packets to transmit.
+ * @param pkts_n
+ *   Number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully transmitted (<= pkts_n).
+ */
+uint16_t
+mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	struct txq *txq = (struct txq *)dpdk_txq;
+	unsigned int elts_head = txq->elts_head;
+	const unsigned int elts_n = txq->elts_n;
+	unsigned int bytes_sent = 0;
+	unsigned int i;
+	unsigned int max;
+	struct mlx4_sq *sq = &txq->msq;
+	int nr_txbbs;
+
+	assert(txq->elts_comp_cd != 0);
+	if (likely(txq->elts_comp != 0))
+		mlx4_txq_complete(txq, elts_n, sq);
+	max = (elts_n - (elts_head - txq->elts_tail));
+	if (max > elts_n)
+		max -= elts_n;
+	assert(max >= 1);
+	assert(max <= elts_n);
+	/* Always leave one free entry in the ring. */
+	--max;
+	if (max > pkts_n)
+		max = pkts_n;
+	for (i = 0; (i != max); ++i) {
+		struct rte_mbuf *buf = pkts[i];
+		unsigned int elts_head_next =
+			(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
+		struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
+		struct txq_elt *elt = &(*txq->elts)[elts_head];
+		uint32_t owner_opcode = MLX4_OPCODE_SEND;
+		volatile struct mlx4_wqe_ctrl_seg *ctrl;
+		volatile struct mlx4_wqe_data_seg *dseg;
+		union {
+			uint32_t flags;
+			uint16_t flags16[2];
+		} srcrb;
+		uint32_t head_idx = sq->head & sq->txbb_cnt_mask;
+		uint32_t lkey;
+		uintptr_t addr;
+
+		/* Clean up old buffer. */
+		if (likely(elt->buf != NULL)) {
+			struct rte_mbuf *tmp = elt->buf;
+
+#ifndef NDEBUG
+			/* Poisoning. */
+			memset(elt, 0x66, sizeof(*elt));
+#endif
+			/* Faster than rte_pktmbuf_free(). */
+			do {
+				struct rte_mbuf *next = tmp->next;
+
+				rte_pktmbuf_free_seg(tmp);
+				tmp = next;
+			} while (tmp != NULL);
+		}
+		RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
+		if (buf->nb_segs == 1) {
+			/*
+			 * Check that there is room for this WQE in the send
+			 * queue and that the WQE size is legal
+			 */
+			if (((sq->head - sq->tail) + 1 + sq->headroom_txbbs) >=
+			     sq->txbb_cnt || 1 > MLX4_MAX_WQE_TXBBS) {
+				elt->buf = NULL;
+				break;
+			}
+			/* Get the control and data entries of the WQE. */
+			ctrl = (volatile struct mlx4_wqe_ctrl_seg *)
+					mlx4_get_send_wqe(sq, head_idx);
+			dseg = (volatile struct mlx4_wqe_data_seg *)
+					((uintptr_t)ctrl +
+					sizeof(struct mlx4_wqe_ctrl_seg));
+			addr = rte_pktmbuf_mtod(buf, uintptr_t);
+			rte_prefetch0((volatile void *)addr);
+			/* Handle WQE wraparound. */
+			if (dseg >=
+				(volatile struct mlx4_wqe_data_seg *)sq->eob)
+				dseg = (volatile struct mlx4_wqe_data_seg *)
+						sq->buf;
+			dseg->addr = rte_cpu_to_be_64(addr);
+			/* Memory region key (big endian). */
+			lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(buf));
+			dseg->lkey = rte_cpu_to_be_32(lkey);
+#ifndef NDEBUG
+			if (unlikely(dseg->lkey ==
+				rte_cpu_to_be_32((uint32_t)-1))) {
+				/* MR does not exist. */
+				DEBUG("%p: unable to get MP <-> MR association",
+				      (void *)txq);
+				/*
+				 * Restamp entry in case of failure.
+				 * Make sure that size is written correctly
+				 * Note that we give ownership to the SW,
+				 * not the HW.
+				 */
+				ctrl->fence_size =
+					(WQE_ONE_DATA_SEG_SIZE >> 4) & 0x3f;
+				mlx4_txq_stamp_freed_wqe(sq, head_idx,
+					     (sq->head & sq->txbb_cnt) ? 0 : 1);
+				elt->buf = NULL;
+				break;
+			}
+#endif /* NDEBUG */
+			/* Never be TXBB aligned, no need compiler barrier. */
+			dseg->byte_count = rte_cpu_to_be_32(buf->data_len);
+			/* Fill the control parameters for this packet. */
+			ctrl->fence_size = (WQE_ONE_DATA_SEG_SIZE >> 4) & 0x3f;
+			nr_txbbs = 1;
+		} else {
+			nr_txbbs = mlx4_tx_burst_segs(buf, txq, &ctrl);
+			if (nr_txbbs < 0) {
+				elt->buf = NULL;
+				break;
+			}
+		}
+		/*
+		 * For raw Ethernet, the SOLICIT flag is used to indicate
+		 * that no ICRC should be calculated.
+		 */
+		txq->elts_comp_cd -= nr_txbbs;
+		if (unlikely(txq->elts_comp_cd <= 0)) {
+			txq->elts_comp_cd = txq->elts_comp_cd_init;
+			srcrb.flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT |
+					       MLX4_WQE_CTRL_CQ_UPDATE);
+		} else {
+			srcrb.flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT);
+		}
+		/* Enable HW checksum offload if requested */
+		if (txq->csum &&
+		    (buf->ol_flags &
+		     (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))) {
+			const uint64_t is_tunneled = (buf->ol_flags &
+						      (PKT_TX_TUNNEL_GRE |
+						       PKT_TX_TUNNEL_VXLAN));
+
+			if (is_tunneled && txq->csum_l2tun) {
+				owner_opcode |= MLX4_WQE_CTRL_IIP_HDR_CSUM |
+						MLX4_WQE_CTRL_IL4_HDR_CSUM;
+				if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+					srcrb.flags |=
+					    RTE_BE32(MLX4_WQE_CTRL_IP_HDR_CSUM);
+			} else {
+				srcrb.flags |=
+					RTE_BE32(MLX4_WQE_CTRL_IP_HDR_CSUM |
+						MLX4_WQE_CTRL_TCP_UDP_CSUM);
+			}
+		}
+		if (txq->lb) {
+			/*
+			 * Copy destination MAC address to the WQE, this allows
+			 * loopback in eSwitch, so that VFs and PF can
+			 * communicate with each other.
+			 */
+			srcrb.flags16[0] = *(rte_pktmbuf_mtod(buf, uint16_t *));
+			ctrl->imm = *(rte_pktmbuf_mtod_offset(buf, uint32_t *,
+					      sizeof(uint16_t)));
+		} else {
+			ctrl->imm = 0;
+		}
+		ctrl->srcrb_flags = srcrb.flags;
+		/*
+		 * Make sure descriptor is fully written before
+		 * setting ownership bit (because HW can start
+		 * executing as soon as we do).
+		 */
+		rte_io_wmb();
+		ctrl->owner_opcode = rte_cpu_to_be_32(owner_opcode |
+					      ((sq->head & sq->txbb_cnt) ?
+						       MLX4_BIT_WQE_OWN : 0));
+		sq->head += nr_txbbs;
+		elt->buf = buf;
+		bytes_sent += buf->pkt_len;
+		elts_head = elts_head_next;
+	}
+	/* Take a shortcut if nothing must be sent. */
+	if (unlikely(i == 0))
+		return 0;
+	/* Increment send statistics counters. */
+	txq->stats.opackets += i;
+	txq->stats.obytes += bytes_sent;
+	/* Make sure that descriptors are written before doorbell record. */
+	rte_wmb();
+	/* Ring QP doorbell. */
+	rte_write32(txq->msq.doorbell_qpn, txq->msq.db);
+	txq->elts_head = elts_head;
+	txq->elts_comp += i;
+	return i;
+}
+
+/**
+ * Translate Rx completion flags to packet type.
+ *
+ * @param[in] cqe
+ *   Pointer to CQE.
+ *
+ * @return
+ *   Packet type for struct rte_mbuf.
+ */
+static inline uint32_t
+rxq_cq_to_pkt_type(volatile struct mlx4_cqe *cqe)
+{
+	uint8_t idx = 0;
+	uint32_t pinfo = rte_be_to_cpu_32(cqe->vlan_my_qpn);
+	uint32_t status = rte_be_to_cpu_32(cqe->status);
+
+	/*
+	 * The index to the array should have:
+	 *  bit[7] - MLX4_CQE_L2_TUNNEL
+	 *  bit[6] - MLX4_CQE_L2_TUNNEL_IPV4
+	 */
+	if (!(pinfo & MLX4_CQE_L2_VLAN_MASK) && (pinfo & MLX4_CQE_L2_TUNNEL))
+		idx |= ((pinfo & MLX4_CQE_L2_TUNNEL) >> 20) |
+		       ((pinfo & MLX4_CQE_L2_TUNNEL_IPV4) >> 19);
+	/*
+	 * The index to the array should have:
+	 *  bit[5] - MLX4_CQE_STATUS_UDP
+	 *  bit[4] - MLX4_CQE_STATUS_TCP
+	 *  bit[3] - MLX4_CQE_STATUS_IPV4OPT
+	 *  bit[2] - MLX4_CQE_STATUS_IPV6
+	 *  bit[1] - MLX4_CQE_STATUS_IPV4F
+	 *  bit[0] - MLX4_CQE_STATUS_IPV4
+	 * giving a total of up to 256 entries.
+	 */
+	idx |= ((status & MLX4_CQE_STATUS_PTYPE_MASK) >> 22);
+	return mlx4_ptype_table[idx];
+}
+
+/**
+ * Translate Rx completion flags to offload flags.
+ *
+ * @param flags
+ *   Rx completion flags returned by mlx4_cqe_flags().
+ * @param csum
+ *   Whether Rx checksums are enabled.
+ * @param csum_l2tun
+ *   Whether Rx L2 tunnel checksums are enabled.
+ *
+ * @return
+ *   Offload flags (ol_flags) in mbuf format.
+ */
+static inline uint32_t
+rxq_cq_to_ol_flags(uint32_t flags, int csum, int csum_l2tun)
+{
+	uint32_t ol_flags = 0;
+
+	if (csum)
+		ol_flags |=
+			mlx4_transpose(flags,
+				       MLX4_CQE_STATUS_IP_HDR_CSUM_OK,
+				       PKT_RX_IP_CKSUM_GOOD) |
+			mlx4_transpose(flags,
+				       MLX4_CQE_STATUS_TCP_UDP_CSUM_OK,
+				       PKT_RX_L4_CKSUM_GOOD);
+	if ((flags & MLX4_CQE_L2_TUNNEL) && csum_l2tun)
+		ol_flags |=
+			mlx4_transpose(flags,
+				       MLX4_CQE_L2_TUNNEL_IPOK,
+				       PKT_RX_IP_CKSUM_GOOD) |
+			mlx4_transpose(flags,
+				       MLX4_CQE_L2_TUNNEL_L4_CSUM,
+				       PKT_RX_L4_CKSUM_GOOD);
+	return ol_flags;
+}
+
+/**
+ * Extract checksum information from CQE flags.
+ *
+ * @param cqe
+ *   Pointer to CQE structure.
+ * @param csum
+ *   Whether Rx checksums are enabled.
+ * @param csum_l2tun
+ *   Whether Rx L2 tunnel checksums are enabled.
+ *
+ * @return
+ *   CQE checksum information.
+ */
+static inline uint32_t
+mlx4_cqe_flags(volatile struct mlx4_cqe *cqe, int csum, int csum_l2tun)
+{
+	uint32_t flags = 0;
+
+	/*
+	 * The relevant bits are in different locations on their
+	 * CQE fields therefore we can join them in one 32bit
+	 * variable.
+	 */
+	if (csum)
+		flags = (rte_be_to_cpu_32(cqe->status) &
+			 MLX4_CQE_STATUS_IPV4_CSUM_OK);
+	if (csum_l2tun)
+		flags |= (rte_be_to_cpu_32(cqe->vlan_my_qpn) &
+			  (MLX4_CQE_L2_TUNNEL |
+			   MLX4_CQE_L2_TUNNEL_IPOK |
+			   MLX4_CQE_L2_TUNNEL_L4_CSUM |
+			   MLX4_CQE_L2_TUNNEL_IPV4));
+	return flags;
+}
+
+/**
+ * Poll one CQE from CQ.
+ *
+ * @param rxq
+ *   Pointer to the receive queue structure.
+ * @param[out] out
+ *   Just polled CQE.
+ *
+ * @return
+ *   Number of bytes of the CQE, 0 in case there is no completion.
+ */
+static unsigned int
+mlx4_cq_poll_one(struct rxq *rxq, volatile struct mlx4_cqe **out)
+{
+	int ret = 0;
+	volatile struct mlx4_cqe *cqe = NULL;
+	struct mlx4_cq *cq = &rxq->mcq;
+
+	cqe = (volatile struct mlx4_cqe *)mlx4_get_cqe(cq, cq->cons_index);
+	if (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+	    !!(cq->cons_index & cq->cqe_cnt))
+		goto out;
+	/*
+	 * Make sure we read CQ entry contents after we've checked the
+	 * ownership bit.
+	 */
+	rte_rmb();
+	assert(!(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK));
+	assert((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) !=
+	       MLX4_CQE_OPCODE_ERROR);
+	ret = rte_be_to_cpu_32(cqe->byte_cnt);
+	++cq->cons_index;
+out:
+	*out = cqe;
+	return ret;
+}
+
+/**
+ * DPDK callback for Rx with scattered packets support.
+ *
+ * @param dpdk_rxq
+ *   Generic pointer to Rx queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully received (<= pkts_n).
+ */
+uint16_t
+mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	struct rxq *rxq = dpdk_rxq;
+	const uint32_t wr_cnt = (1 << rxq->elts_n) - 1;
+	const uint16_t sges_n = rxq->sges_n;
+	struct rte_mbuf *pkt = NULL;
+	struct rte_mbuf *seg = NULL;
+	unsigned int i = 0;
+	uint32_t rq_ci = rxq->rq_ci << sges_n;
+	int len = 0;
+
+	while (pkts_n) {
+		volatile struct mlx4_cqe *cqe;
+		uint32_t idx = rq_ci & wr_cnt;
+		struct rte_mbuf *rep = (*rxq->elts)[idx];
+		volatile struct mlx4_wqe_data_seg *scat = &(*rxq->wqes)[idx];
+
+		/* Update the 'next' pointer of the previous segment. */
+		if (pkt)
+			seg->next = rep;
+		seg = rep;
+		rte_prefetch0(seg);
+		rte_prefetch0(scat);
+		rep = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(rep == NULL)) {
+			++rxq->stats.rx_nombuf;
+			if (!pkt) {
+				/*
+				 * No buffers before we even started,
+				 * bail out silently.
+				 */
+				break;
+			}
+			while (pkt != seg) {
+				assert(pkt != (*rxq->elts)[idx]);
+				rep = pkt->next;
+				pkt->next = NULL;
+				pkt->nb_segs = 1;
+				rte_mbuf_raw_free(pkt);
+				pkt = rep;
+			}
+			break;
+		}
+		if (!pkt) {
+			/* Looking for the new packet. */
+			len = mlx4_cq_poll_one(rxq, &cqe);
+			if (!len) {
+				rte_mbuf_raw_free(rep);
+				break;
+			}
+			if (unlikely(len < 0)) {
+				/* Rx error, packet is likely too large. */
+				rte_mbuf_raw_free(rep);
+				++rxq->stats.idropped;
+				goto skip;
+			}
+			pkt = seg;
+			/* Update packet information. */
+			pkt->packet_type = rxq_cq_to_pkt_type(cqe);
+			pkt->ol_flags = 0;
+			pkt->pkt_len = len;
+			if (rxq->csum | rxq->csum_l2tun) {
+				uint32_t flags =
+					mlx4_cqe_flags(cqe,
+						       rxq->csum,
+						       rxq->csum_l2tun);
+
+				pkt->ol_flags =
+					rxq_cq_to_ol_flags(flags,
+							   rxq->csum,
+							   rxq->csum_l2tun);
+			}
+		}
+		rep->nb_segs = 1;
+		rep->port = rxq->port_id;
+		rep->data_len = seg->data_len;
+		rep->data_off = seg->data_off;
+		(*rxq->elts)[idx] = rep;
+		/*
+		 * Fill NIC descriptor with the new buffer. The lkey and size
+		 * of the buffers are already known, only the buffer address
+		 * changes.
+		 */
+		scat->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
+		if (len > seg->data_len) {
+			len -= seg->data_len;
+			++pkt->nb_segs;
+			++rq_ci;
+			continue;
+		}
+		/* The last segment. */
+		seg->data_len = len;
+		/* Increment bytes counter. */
+		rxq->stats.ibytes += pkt->pkt_len;
+		/* Return packet. */
+		*(pkts++) = pkt;
+		pkt = NULL;
+		--pkts_n;
+		++i;
+skip:
+		/* Align consumer index to the next stride. */
+		rq_ci >>= sges_n;
+		++rq_ci;
+		rq_ci <<= sges_n;
+	}
+	if (unlikely(i == 0 && (rq_ci >> sges_n) == rxq->rq_ci))
+		return 0;
+	/* Update the consumer index. */
+	rxq->rq_ci = rq_ci >> sges_n;
+	rte_wmb();
+	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+	*rxq->mcq.set_ci_db =
+		rte_cpu_to_be_32(rxq->mcq.cons_index & MLX4_CQ_DB_CI_MASK);
+	/* Increment packets counter. */
+	rxq->stats.ipackets += i;
+	return i;
+}
+
+/**
+ * Dummy DPDK callback for Tx.
+ *
+ * This function is used to temporarily replace the real callback during
+ * unsafe control operations on the queue, or in case of error.
+ *
+ * @param dpdk_txq
+ *   Generic pointer to Tx queue structure.
+ * @param[in] pkts
+ *   Packets to transmit.
+ * @param pkts_n
+ *   Number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully transmitted (<= pkts_n).
+ */
+uint16_t
+mlx4_tx_burst_removed(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	(void)dpdk_txq;
+	(void)pkts;
+	(void)pkts_n;
+	return 0;
+}
+
+/**
+ * Dummy DPDK callback for Rx.
+ *
+ * This function is used to temporarily replace the real callback during
+ * unsafe control operations on the queue, or in case of error.
+ *
+ * @param dpdk_rxq
+ *   Generic pointer to Rx queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully received (<= pkts_n).
+ */
+uint16_t
+mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	(void)dpdk_rxq;
+	(void)pkts;
+	(void)pkts_n;
+	return 0;
+}
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
new file mode 100644
index 00000000..4acad801
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -0,0 +1,214 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MLX4_RXTX_H_
+#define MLX4_RXTX_H_
+
+#include <stdint.h>
+#include <sys/queue.h>
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/mlx4dv.h>
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_ethdev.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+
+#include "mlx4.h"
+#include "mlx4_prm.h"
+
+/** Rx queue counters. */
+struct mlx4_rxq_stats {
+	unsigned int idx; /**< Mapping index. */
+	uint64_t ipackets; /**< Total of successfully received packets. */
+	uint64_t ibytes; /**< Total of successfully received bytes. */
+	uint64_t idropped; /**< Total of packets dropped when Rx ring full. */
+	uint64_t rx_nombuf; /**< Total of Rx mbuf allocation failures. */
+};
+
+/** Rx queue descriptor. */
+struct rxq {
+	struct priv *priv; /**< Back pointer to private data. */
+	struct rte_mempool *mp; /**< Memory pool for allocations. */
+	struct mlx4_mr *mr; /**< Memory region. */
+	struct ibv_cq *cq; /**< Completion queue. */
+	struct ibv_wq *wq; /**< Work queue. */
+	struct ibv_comp_channel *channel; /**< Rx completion channel. */
+	uint16_t rq_ci; /**< Saved RQ consumer index. */
+	uint16_t port_id; /**< Port ID for incoming packets. */
+	uint16_t sges_n; /**< Number of segments per packet (log2 value). */
+	uint16_t elts_n; /**< Mbuf queue size (log2 value). */
+	struct rte_mbuf *(*elts)[]; /**< Rx elements. */
+	volatile struct mlx4_wqe_data_seg (*wqes)[]; /**< HW queue entries. */
+	volatile uint32_t *rq_db; /**< RQ doorbell record. */
+	uint32_t csum:1; /**< Enable checksum offloading. */
+	uint32_t csum_l2tun:1; /**< Same for L2 tunnels. */
+	struct mlx4_cq mcq;  /**< Info for directly manipulating the CQ. */
+	struct mlx4_rxq_stats stats; /**< Rx queue counters. */
+	unsigned int socket; /**< CPU socket ID for allocations. */
+	uint32_t usecnt; /**< Number of users relying on queue resources. */
+	uint8_t data[]; /**< Remaining queue resources. */
+};
+
+/** Shared flow target for Rx queues. */
+struct mlx4_rss {
+	LIST_ENTRY(mlx4_rss) next; /**< Next entry in list. */
+	struct priv *priv; /**< Back pointer to private data. */
+	uint32_t refcnt; /**< Reference count for this object. */
+	uint32_t usecnt; /**< Number of users relying on @p qp and @p ind. */
+	struct ibv_qp *qp; /**< Queue pair. */
+	struct ibv_rwq_ind_table *ind; /**< Indirection table. */
+	uint64_t fields; /**< Fields for RSS processing (Verbs format). */
+	uint8_t key[MLX4_RSS_HASH_KEY_SIZE]; /**< Hash key to use. */
+	uint16_t queues; /**< Number of target queues. */
+	uint16_t queue_id[]; /**< Target queues. */
+};
+
+/** Tx element. */
+struct txq_elt {
+	struct rte_mbuf *buf; /**< Buffer. */
+};
+
+/** Rx queue counters. */
+struct mlx4_txq_stats {
+	unsigned int idx; /**< Mapping index. */
+	uint64_t opackets; /**< Total of successfully sent packets. */
+	uint64_t obytes; /**< Total of successfully sent bytes. */
+	uint64_t odropped; /**< Total of packets not sent when Tx ring full. */
+};
+
+/** Tx queue descriptor. */
+struct txq {
+	struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */
+	struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+	unsigned int elts_head; /**< Current index in (*elts)[]. */
+	unsigned int elts_tail; /**< First element awaiting completion. */
+	unsigned int elts_comp; /**< Number of packets awaiting completion. */
+	int elts_comp_cd; /**< Countdown for next completion. */
+	unsigned int elts_comp_cd_init; /**< Initial value for countdown. */
+	unsigned int elts_n; /**< (*elts)[] length. */
+	struct txq_elt (*elts)[]; /**< Tx elements. */
+	struct mlx4_txq_stats stats; /**< Tx queue counters. */
+	uint32_t max_inline; /**< Max inline send size. */
+	uint32_t csum:1; /**< Enable checksum offloading. */
+	uint32_t csum_l2tun:1; /**< Same for L2 tunnels. */
+	uint32_t lb:1; /**< Whether packets should be looped back by eSwitch. */
+	uint8_t *bounce_buf;
+	/**< Memory used for storing the first DWORD of data TXBBs. */
+	struct {
+		const struct rte_mempool *mp; /**< Cached memory pool. */
+		struct mlx4_mr *mr; /**< Memory region (for mp). */
+		uint32_t lkey; /**< mr->lkey copy. */
+	} mp2mr[MLX4_PMD_TX_MP_CACHE]; /**< MP to MR translation table. */
+	struct priv *priv; /**< Back pointer to private data. */
+	unsigned int socket; /**< CPU socket ID for allocations. */
+	struct ibv_cq *cq; /**< Completion queue. */
+	struct ibv_qp *qp; /**< Queue pair. */
+	uint8_t data[]; /**< Remaining queue resources. */
+};
+
+/* mlx4_rxq.c */
+
+uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE];
+int mlx4_rss_init(struct priv *priv);
+void mlx4_rss_deinit(struct priv *priv);
+struct mlx4_rss *mlx4_rss_get(struct priv *priv, uint64_t fields,
+			      uint8_t key[MLX4_RSS_HASH_KEY_SIZE],
+			      uint16_t queues, const uint16_t queue_id[]);
+void mlx4_rss_put(struct mlx4_rss *rss);
+int mlx4_rss_attach(struct mlx4_rss *rss);
+void mlx4_rss_detach(struct mlx4_rss *rss);
+int mlx4_rxq_attach(struct rxq *rxq);
+void mlx4_rxq_detach(struct rxq *rxq);
+int mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
+			uint16_t desc, unsigned int socket,
+			const struct rte_eth_rxconf *conf,
+			struct rte_mempool *mp);
+void mlx4_rx_queue_release(void *dpdk_rxq);
+
+/* mlx4_rxtx.c */
+
+uint16_t mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts,
+		       uint16_t pkts_n);
+uint16_t mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts,
+		       uint16_t pkts_n);
+uint16_t mlx4_tx_burst_removed(void *dpdk_txq, struct rte_mbuf **pkts,
+			       uint16_t pkts_n);
+uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts,
+			       uint16_t pkts_n);
+
+/* mlx4_txq.c */
+
+int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
+			uint16_t desc, unsigned int socket,
+			const struct rte_eth_txconf *conf);
+void mlx4_tx_queue_release(void *dpdk_txq);
+
+/**
+ * Get memory region (MR) <-> memory pool (MP) association from txq->mp2mr[].
+ * Call mlx4_txq_add_mr() if MP is not registered yet.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ * @param[in] mp
+ *   Memory pool for which a memory region lkey must be returned.
+ *
+ * @return
+ *   mr->lkey on success, (uint32_t)-1 on failure.
+ */
+static inline uint32_t
+mlx4_txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
+{
+	unsigned int i;
+
+	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
+		if (unlikely(txq->mp2mr[i].mp == NULL)) {
+			/* Unknown MP, add a new MR for it. */
+			break;
+		}
+		if (txq->mp2mr[i].mp == mp) {
+			/* MP found MP. */
+			return txq->mp2mr[i].lkey;
+		}
+	}
+	return mlx4_txq_add_mr(txq, mp, i);
+}
+
+#endif /* MLX4_RXTX_H_ */
diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
new file mode 100644
index 00000000..7882a4d0
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_txq.c
@@ -0,0 +1,414 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Tx queues configuration for mlx4 driver.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+
+#include "mlx4.h"
+#include "mlx4_autoconf.h"
+#include "mlx4_prm.h"
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+/**
+ * Free Tx queue elements.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ */
+static void
+mlx4_txq_free_elts(struct txq *txq)
+{
+	unsigned int elts_head = txq->elts_head;
+	unsigned int elts_tail = txq->elts_tail;
+	struct txq_elt (*elts)[txq->elts_n] = txq->elts;
+
+	DEBUG("%p: freeing WRs", (void *)txq);
+	while (elts_tail != elts_head) {
+		struct txq_elt *elt = &(*elts)[elts_tail];
+
+		assert(elt->buf != NULL);
+		rte_pktmbuf_free(elt->buf);
+		elt->buf = NULL;
+		if (++elts_tail == RTE_DIM(*elts))
+			elts_tail = 0;
+	}
+	txq->elts_tail = txq->elts_head;
+}
+
+struct txq_mp2mr_mbuf_check_data {
+	int ret;
+};
+
+/**
+ * Callback function for rte_mempool_obj_iter() to check whether a given
+ * mempool object looks like a mbuf.
+ *
+ * @param[in] mp
+ *   The mempool pointer
+ * @param[in] arg
+ *   Context data (struct mlx4_txq_mp2mr_mbuf_check_data). Contains the
+ *   return value.
+ * @param[in] obj
+ *   Object address.
+ * @param index
+ *   Object index, unused.
+ */
+static void
+mlx4_txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
+			  uint32_t index)
+{
+	struct txq_mp2mr_mbuf_check_data *data = arg;
+	struct rte_mbuf *buf = obj;
+
+	(void)index;
+	/*
+	 * Check whether mbuf structure fits element size and whether mempool
+	 * pointer is valid.
+	 */
+	if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
+		data->ret = -1;
+}
+
+/**
+ * Iterator function for rte_mempool_walk() to register existing mempools and
+ * fill the MP to MR cache of a Tx queue.
+ *
+ * @param[in] mp
+ *   Memory Pool to register.
+ * @param *arg
+ *   Pointer to Tx queue structure.
+ */
+static void
+mlx4_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
+{
+	struct txq *txq = arg;
+	struct txq_mp2mr_mbuf_check_data data = {
+		.ret = 0,
+	};
+
+	/* Register mempool only if the first element looks like a mbuf. */
+	if (rte_mempool_obj_iter(mp, mlx4_txq_mp2mr_mbuf_check, &data) == 0 ||
+			data.ret == -1)
+		return;
+	mlx4_txq_mp2mr(txq, mp);
+}
+
+/**
+ * Retrieves information needed in order to directly access the Tx queue.
+ *
+ * @param txq
+ *   Pointer to Tx queue structure.
+ * @param mlxdv
+ *   Pointer to device information for this Tx queue.
+ */
+static void
+mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
+{
+	struct mlx4_sq *sq = &txq->msq;
+	struct mlx4_cq *cq = &txq->mcq;
+	struct mlx4dv_qp *dqp = mlxdv->qp.out;
+	struct mlx4dv_cq *dcq = mlxdv->cq.out;
+	uint32_t sq_size = (uint32_t)dqp->rq.offset - (uint32_t)dqp->sq.offset;
+
+	sq->buf = (uint8_t *)dqp->buf.buf + dqp->sq.offset;
+	/* Total length, including headroom and spare WQEs. */
+	sq->eob = sq->buf + sq_size;
+	sq->head = 0;
+	sq->tail = 0;
+	sq->txbb_cnt =
+		(dqp->sq.wqe_cnt << dqp->sq.wqe_shift) >> MLX4_TXBB_SHIFT;
+	sq->txbb_cnt_mask = sq->txbb_cnt - 1;
+	sq->db = dqp->sdb;
+	sq->doorbell_qpn = dqp->doorbell_qpn;
+	sq->headroom_txbbs =
+		(2048 + (1 << dqp->sq.wqe_shift)) >> MLX4_TXBB_SHIFT;
+	cq->buf = dcq->buf.buf;
+	cq->cqe_cnt = dcq->cqe_cnt;
+	cq->set_ci_db = dcq->set_ci_db;
+	cq->cqe_64 = (dcq->cqe_size & 64) ? 1 : 0;
+}
+
+/**
+ * DPDK callback to configure a Tx queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   Tx queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param[in] conf
+ *   Thresholds parameters.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+		    unsigned int socket, const struct rte_eth_txconf *conf)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mlx4dv_obj mlxdv;
+	struct mlx4dv_qp dv_qp;
+	struct mlx4dv_cq dv_cq;
+	struct txq_elt (*elts)[desc];
+	struct ibv_qp_init_attr qp_init_attr;
+	struct txq *txq;
+	uint8_t *bounce_buf;
+	struct mlx4_malloc_vec vec[] = {
+		{
+			.align = RTE_CACHE_LINE_SIZE,
+			.size = sizeof(*txq),
+			.addr = (void **)&txq,
+		},
+		{
+			.align = RTE_CACHE_LINE_SIZE,
+			.size = sizeof(*elts),
+			.addr = (void **)&elts,
+		},
+		{
+			.align = RTE_CACHE_LINE_SIZE,
+			.size = MLX4_MAX_WQE_SIZE,
+			.addr = (void **)&bounce_buf,
+		},
+	};
+	int ret;
+
+	(void)conf; /* Thresholds configuration (ignored). */
+	DEBUG("%p: configuring queue %u for %u descriptors",
+	      (void *)dev, idx, desc);
+	if (idx >= dev->data->nb_tx_queues) {
+		rte_errno = EOVERFLOW;
+		ERROR("%p: queue index out of range (%u >= %u)",
+		      (void *)dev, idx, dev->data->nb_tx_queues);
+		return -rte_errno;
+	}
+	txq = dev->data->tx_queues[idx];
+	if (txq) {
+		rte_errno = EEXIST;
+		DEBUG("%p: Tx queue %u already configured, release it first",
+		      (void *)dev, idx);
+		return -rte_errno;
+	}
+	if (!desc) {
+		rte_errno = EINVAL;
+		ERROR("%p: invalid number of Tx descriptors", (void *)dev);
+		return -rte_errno;
+	}
+	/* Allocate and initialize Tx queue. */
+	mlx4_zmallocv_socket("TXQ", vec, RTE_DIM(vec), socket);
+	if (!txq) {
+		ERROR("%p: unable to allocate queue index %u",
+		      (void *)dev, idx);
+		return -rte_errno;
+	}
+	*txq = (struct txq){
+		.priv = priv,
+		.stats = {
+			.idx = idx,
+		},
+		.socket = socket,
+		.elts_n = desc,
+		.elts = elts,
+		.elts_head = 0,
+		.elts_tail = 0,
+		.elts_comp = 0,
+		/*
+		 * Request send completion every MLX4_PMD_TX_PER_COMP_REQ
+		 * packets or at least 4 times per ring.
+		 */
+		.elts_comp_cd =
+			RTE_MIN(MLX4_PMD_TX_PER_COMP_REQ, desc / 4),
+		.elts_comp_cd_init =
+			RTE_MIN(MLX4_PMD_TX_PER_COMP_REQ, desc / 4),
+		.csum = priv->hw_csum,
+		.csum_l2tun = priv->hw_csum_l2tun,
+		/* Enable Tx loopback for VF devices. */
+		.lb = !!priv->vf,
+		.bounce_buf = bounce_buf,
+	};
+	txq->cq = ibv_create_cq(priv->ctx, desc, NULL, NULL, 0);
+	if (!txq->cq) {
+		rte_errno = ENOMEM;
+		ERROR("%p: CQ creation failure: %s",
+		      (void *)dev, strerror(rte_errno));
+		goto error;
+	}
+	qp_init_attr = (struct ibv_qp_init_attr){
+		.send_cq = txq->cq,
+		.recv_cq = txq->cq,
+		.cap = {
+			.max_send_wr =
+				RTE_MIN(priv->device_attr.max_qp_wr, desc),
+			.max_send_sge = 1,
+			.max_inline_data = MLX4_PMD_MAX_INLINE,
+		},
+		.qp_type = IBV_QPT_RAW_PACKET,
+		/* No completion events must occur by default. */
+		.sq_sig_all = 0,
+	};
+	txq->qp = ibv_create_qp(priv->pd, &qp_init_attr);
+	if (!txq->qp) {
+		rte_errno = errno ? errno : EINVAL;
+		ERROR("%p: QP creation failure: %s",
+		      (void *)dev, strerror(rte_errno));
+		goto error;
+	}
+	txq->max_inline = qp_init_attr.cap.max_inline_data;
+	ret = ibv_modify_qp
+		(txq->qp,
+		 &(struct ibv_qp_attr){
+			.qp_state = IBV_QPS_INIT,
+			.port_num = priv->port,
+		 },
+		 IBV_QP_STATE | IBV_QP_PORT);
+	if (ret) {
+		rte_errno = ret;
+		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
+		      (void *)dev, strerror(rte_errno));
+		goto error;
+	}
+	ret = ibv_modify_qp
+		(txq->qp,
+		 &(struct ibv_qp_attr){
+			.qp_state = IBV_QPS_RTR,
+		 },
+		 IBV_QP_STATE);
+	if (ret) {
+		rte_errno = ret;
+		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
+		      (void *)dev, strerror(rte_errno));
+		goto error;
+	}
+	ret = ibv_modify_qp
+		(txq->qp,
+		 &(struct ibv_qp_attr){
+			.qp_state = IBV_QPS_RTS,
+		 },
+		 IBV_QP_STATE);
+	if (ret) {
+		rte_errno = ret;
+		ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
+		      (void *)dev, strerror(rte_errno));
+		goto error;
+	}
+	/* Retrieve device queue information. */
+	mlxdv.cq.in = txq->cq;
+	mlxdv.cq.out = &dv_cq;
+	mlxdv.qp.in = txq->qp;
+	mlxdv.qp.out = &dv_qp;
+	ret = mlx4dv_init_obj(&mlxdv, MLX4DV_OBJ_QP | MLX4DV_OBJ_CQ);
+	if (ret) {
+		rte_errno = EINVAL;
+		ERROR("%p: failed to obtain information needed for"
+		      " accessing the device queues", (void *)dev);
+		goto error;
+	}
+	mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
+	/* Pre-register known mempools. */
+	rte_mempool_walk(mlx4_txq_mp2mr_iter, txq);
+	DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq);
+	dev->data->tx_queues[idx] = txq;
+	return 0;
+error:
+	dev->data->tx_queues[idx] = NULL;
+	ret = rte_errno;
+	mlx4_tx_queue_release(txq);
+	rte_errno = ret;
+	assert(rte_errno > 0);
+	return -rte_errno;
+}
+
+/**
+ * DPDK callback to release a Tx queue.
+ *
+ * @param dpdk_txq
+ *   Generic Tx queue pointer.
+ */
+void
+mlx4_tx_queue_release(void *dpdk_txq)
+{
+	struct txq *txq = (struct txq *)dpdk_txq;
+	struct priv *priv;
+	unsigned int i;
+
+	if (txq == NULL)
+		return;
+	priv = txq->priv;
+	for (i = 0; i != priv->dev->data->nb_tx_queues; ++i)
+		if (priv->dev->data->tx_queues[i] == txq) {
+			DEBUG("%p: removing Tx queue %p from list",
+			      (void *)priv->dev, (void *)txq);
+			priv->dev->data->tx_queues[i] = NULL;
+			break;
+		}
+	mlx4_txq_free_elts(txq);
+	if (txq->qp)
+		claim_zero(ibv_destroy_qp(txq->qp));
+	if (txq->cq)
+		claim_zero(ibv_destroy_cq(txq->cq));
+	for (i = 0; i != RTE_DIM(txq->mp2mr); ++i) {
+		if (!txq->mp2mr[i].mp)
+			break;
+		assert(txq->mp2mr[i].mr);
+		mlx4_mr_put(txq->mp2mr[i].mr);
+	}
+	rte_free(txq);
+}
diff --git a/drivers/net/mlx4/mlx4_utils.c b/drivers/net/mlx4/mlx4_utils.c
new file mode 100644
index 00000000..f18c7145
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_utils.c
@@ -0,0 +1,217 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Utility functions used by the mlx4 driver.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+
+#include "mlx4_utils.h"
+
+/**
+ * Make a file descriptor non-blocking.
+ *
+ * @param fd
+ *   File descriptor to alter.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_fd_set_non_blocking(int fd)
+{
+	int ret = fcntl(fd, F_GETFL);
+
+	if (ret != -1 && !fcntl(fd, F_SETFL, ret | O_NONBLOCK))
+		return 0;
+	assert(errno);
+	rte_errno = errno;
+	return -rte_errno;
+}
+
+/**
+ * Internal helper to allocate memory once for several disparate objects.
+ *
+ * The most restrictive alignment constraint for standard objects is assumed
+ * to be sizeof(double) and is used as a default value.
+ *
+ * C11 code would include stdalign.h and use alignof(max_align_t) however
+ * we'll stick with C99 for the time being.
+ */
+static inline size_t
+mlx4_mallocv_inline(const char *type, const struct mlx4_malloc_vec *vec,
+		    unsigned int cnt, int zero, int socket)
+{
+	unsigned int i;
+	size_t size;
+	size_t least;
+	uint8_t *data = NULL;
+	int fill = !vec[0].addr;
+
+fill:
+	size = 0;
+	least = 0;
+	for (i = 0; i < cnt; ++i) {
+		size_t align = (uintptr_t)vec[i].align;
+
+		if (!align) {
+			align = sizeof(double);
+		} else if (!rte_is_power_of_2(align)) {
+			rte_errno = EINVAL;
+			goto error;
+		}
+		if (least < align)
+			least = align;
+		align = RTE_ALIGN_CEIL(size, align);
+		size = align + vec[i].size;
+		if (fill && vec[i].addr)
+			*vec[i].addr = data + align;
+	}
+	if (fill)
+		return size;
+	if (!zero)
+		data = rte_malloc_socket(type, size, least, socket);
+	else
+		data = rte_zmalloc_socket(type, size, least, socket);
+	if (data) {
+		fill = 1;
+		goto fill;
+	}
+	rte_errno = ENOMEM;
+error:
+	for (i = 0; i != cnt; ++i)
+		if (vec[i].addr)
+			*vec[i].addr = NULL;
+	return 0;
+}
+
+/**
+ * Allocate memory once for several disparate objects.
+ *
+ * This function adds iovec-like semantics (e.g. readv()) to rte_malloc().
+ * Memory is allocated once for several contiguous objects of nonuniform
+ * sizes and alignment constraints.
+ *
+ * Each entry of @p vec describes the size, alignment constraint and
+ * provides a buffer address where the resulting object pointer must be
+ * stored.
+ *
+ * The buffer of the first entry is guaranteed to point to the beginning of
+ * the allocated region and is safe to use with rte_free().
+ *
+ * NULL buffers are silently ignored.
+ *
+ * Providing a NULL buffer in the first entry prevents this function from
+ * allocating any memory but has otherwise no effect on its behavior. In
+ * this case, the contents of remaining non-NULL buffers are updated with
+ * addresses relative to zero (i.e. offsets that would have been used during
+ * the allocation).
+ *
+ * @param[in] type
+ *   A string identifying the type of allocated objects (useful for debug
+ *   purposes, such as identifying the cause of a memory leak). Can be NULL.
+ * @param[in, out] vec
+ *   Description of objects to allocate memory for.
+ * @param cnt
+ *   Number of entries in @p vec.
+ *
+ * @return
+ *   Size in bytes of the allocated region including any padding. In case of
+ *   error, rte_errno is set, 0 is returned and NULL is stored in the
+ *   non-NULL buffers pointed by @p vec.
+ *
+ * @see struct mlx4_malloc_vec
+ * @see rte_malloc()
+ */
+size_t
+mlx4_mallocv(const char *type, const struct mlx4_malloc_vec *vec,
+	     unsigned int cnt)
+{
+	return mlx4_mallocv_inline(type, vec, cnt, 0, SOCKET_ID_ANY);
+}
+
+/**
+ * Combines the semantics of mlx4_mallocv() with those of rte_zmalloc().
+ *
+ * @see mlx4_mallocv()
+ * @see rte_zmalloc()
+ */
+size_t
+mlx4_zmallocv(const char *type, const struct mlx4_malloc_vec *vec,
+	      unsigned int cnt)
+{
+	return mlx4_mallocv_inline(type, vec, cnt, 1, SOCKET_ID_ANY);
+}
+
+/**
+ * Socket-aware version of mlx4_mallocv().
+ *
+ * This function takes one additional parameter.
+ *
+ * @param socket
+ *   NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this
+ *   function will behave the same as mlx4_mallocv().
+ *
+ * @see mlx4_mallocv()
+ * @see rte_malloc_socket()
+ */
+size_t
+mlx4_mallocv_socket(const char *type, const struct mlx4_malloc_vec *vec,
+		    unsigned int cnt, int socket)
+{
+	return mlx4_mallocv_inline(type, vec, cnt, 0, socket);
+}
+
+/**
+ * Combines the semantics of mlx4_mallocv_socket() with those of
+ * mlx4_zmalloc_socket().
+ *
+ * @see mlx4_mallocv_socket()
+ * @see rte_zmalloc_socket()
+ */
+size_t
+mlx4_zmallocv_socket(const char *type, const struct mlx4_malloc_vec *vec,
+		     unsigned int cnt, int socket)
+{
+	return mlx4_mallocv_inline(type, vec, cnt, 1, socket);
+}
diff --git a/drivers/net/mlx4/mlx4_utils.h b/drivers/net/mlx4/mlx4_utils.h
new file mode 100644
index 00000000..dc529c9c
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_utils.h
@@ -0,0 +1,133 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MLX4_UTILS_H_
+#define MLX4_UTILS_H_
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include "mlx4.h"
+
+#ifndef NDEBUG
+
+/*
+ * When debugging is enabled (NDEBUG not defined), file, line and function
+ * information replace the driver name (MLX4_DRIVER_NAME) in log messages.
+ */
+
+/** Return the file name part of a path. */
+static inline const char *
+pmd_drv_log_basename(const char *s)
+{
+	const char *n = s;
+
+	while (*n)
+		if (*(n++) == '/')
+			s = n;
+	return s;
+}
+
+#define PMD_DRV_LOG(level, ...) \
+	RTE_LOG(level, PMD, \
+		RTE_FMT("%s:%u: %s(): " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+			pmd_drv_log_basename(__FILE__), \
+			__LINE__, \
+			__func__, \
+			RTE_FMT_TAIL(__VA_ARGS__,)))
+#define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__)
+#ifndef MLX4_PMD_DEBUG_BROKEN_VERBS
+#define claim_zero(...) assert((__VA_ARGS__) == 0)
+#else /* MLX4_PMD_DEBUG_BROKEN_VERBS */
+#define claim_zero(...) \
+	(void)(((__VA_ARGS__) == 0) || \
+		DEBUG("Assertion `(" # __VA_ARGS__ ") == 0' failed (IGNORED)."))
+#endif /* MLX4_PMD_DEBUG_BROKEN_VERBS */
+
+#else /* NDEBUG */
+
+/*
+ * Like assert(), DEBUG() becomes a no-op and claim_zero() does not perform
+ * any check when debugging is disabled.
+ */
+
+#define PMD_DRV_LOG(level, ...) \
+	RTE_LOG(level, PMD, \
+		RTE_FMT(MLX4_DRIVER_NAME ": " \
+			RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+		RTE_FMT_TAIL(__VA_ARGS__,)))
+#define DEBUG(...) (void)0
+#define claim_zero(...) (__VA_ARGS__)
+
+#endif /* NDEBUG */
+
+#define INFO(...) PMD_DRV_LOG(INFO, __VA_ARGS__)
+#define WARN(...) PMD_DRV_LOG(WARNING, __VA_ARGS__)
+#define ERROR(...) PMD_DRV_LOG(ERR, __VA_ARGS__)
+
+/** Allocate a buffer on the stack and fill it with a printf format string. */
+#define MKSTR(name, ...) \
+	char name[snprintf(NULL, 0, __VA_ARGS__) + 1]; \
+	\
+	snprintf(name, sizeof(name), __VA_ARGS__)
+
+/** Generate a string out of the provided arguments. */
+#define MLX4_STR(...) # __VA_ARGS__
+
+/** Similar to MLX4_STR() with enclosed macros expanded first. */
+#define MLX4_STR_EXPAND(...) MLX4_STR(__VA_ARGS__)
+
+/** Object description used with mlx4_mallocv() and similar functions. */
+struct mlx4_malloc_vec {
+	size_t align; /**< Alignment constraint (power of 2), 0 if unknown. */
+	size_t size; /**< Object size. */
+	void **addr; /**< Storage for allocation address. */
+};
+
+/* mlx4_utils.c */
+
+int mlx4_fd_set_non_blocking(int fd);
+size_t mlx4_mallocv(const char *type, const struct mlx4_malloc_vec *vec,
+		    unsigned int cnt);
+size_t mlx4_zmallocv(const char *type, const struct mlx4_malloc_vec *vec,
+		     unsigned int cnt);
+size_t mlx4_mallocv_socket(const char *type, const struct mlx4_malloc_vec *vec,
+			   unsigned int cnt, int socket);
+size_t mlx4_zmallocv_socket(const char *type, const struct mlx4_malloc_vec *vec,
+			    unsigned int cnt, int socket);
+
+#endif /* MLX4_UTILS_H_ */
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 8736de5d..a3984eb9 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -39,8 +39,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxq.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_txq.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx.c
-ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx_vec_sse.c
+ifneq ($(filter y,$(CONFIG_RTE_ARCH_X86_64) \
+		  $(CONFIG_RTE_ARCH_ARM64)),)
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx_vec.c
 endif
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_trigger.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_ethdev.c
@@ -49,9 +50,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxmode.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_vlan.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 
 # Basic CFLAGS.
 CFLAGS += -O3
@@ -63,7 +64,10 @@ CFLAGS += -D_DEFAULT_SOURCE
 CFLAGS += -D_XOPEN_SOURCE=600
 CFLAGS += $(WERROR_FLAGS)
 CFLAGS += -Wno-strict-prototypes
-LDLIBS += -libverbs
+LDLIBS += -libverbs -lmlx5
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 # A few warnings cannot be avoided in external headers.
 CFLAGS += -Wno-error=cast-qual
@@ -104,24 +108,24 @@ mlx5_autoconf.h.new: FORCE
 mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 	$Q $(RM) -f -- '$@'
 	$Q sh -- '$<' '$@' \
-		HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP \
-		infiniband/verbs_exp.h \
-		enum IBV_EXP_FLOW_SPEC_ACTION_DROP \
+		HAVE_IBV_DEVICE_VXLAN_SUPPORT \
+		infiniband/verbs.h \
+		enum IBV_DEVICE_VXLAN_SUPPORT \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
-		HAVE_VERBS_IBV_EXP_CQ_COMPRESSED_CQE \
-		infiniband/verbs_exp.h \
-		enum IBV_EXP_CQ_COMPRESSED_CQE \
+		HAVE_IBV_WQ_FLAG_RX_END_PADDING \
+		infiniband/verbs.h \
+		enum IBV_WQ_FLAG_RX_END_PADDING \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
-		HAVE_VERBS_MLX5_ETH_VLAN_INLINE_HEADER_SIZE \
-		infiniband/mlx5_hw.h \
-		enum MLX5_ETH_VLAN_INLINE_HEADER_SIZE \
+		HAVE_IBV_MLX5_MOD_MPW \
+		infiniband/mlx5dv.h \
+		enum MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
-		HAVE_VERBS_MLX5_OPCODE_TSO \
-		infiniband/mlx5_hw.h \
-		enum MLX5_OPCODE_TSO \
+		HAVE_IBV_MLX5_MOD_CQE_128B_COMP \
+		infiniband/mlx5dv.h \
+		enum MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
 		HAVE_ETHTOOL_LINK_MODE_25G \
@@ -139,9 +143,9 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		enum ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
-		HAVE_UPDATE_CQ_CI \
-		infiniband/mlx5_hw.h \
-		func ibv_mlx5_exp_update_cq_ci \
+		HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT \
+		infiniband/verbs.h \
+		enum IBV_FLOW_SPEC_ACTION_COUNT \
 		$(AUTOCONF_OUTPUT)
 
 # Create mlx5_autoconf.h or update it in case it differs from the new one.
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b7e50463..0548d17a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -50,19 +50,13 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_ethdev_pci.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_common.h>
 #include <rte_kvargs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5.h"
 #include "mlx5_utils.h"
@@ -103,6 +97,15 @@
 /* Default PMD specific parameter value. */
 #define MLX5_ARG_UNSET (-1)
 
+#ifndef HAVE_IBV_MLX5_MOD_MPW
+#define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2)
+#define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3)
+#endif
+
+#ifndef HAVE_IBV_MLX5_MOD_CQE_128B_COMP
+#define MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP (1 << 4)
+#endif
+
 struct mlx5_args {
 	int cqe_comp;
 	int txq_inline;
@@ -134,6 +137,52 @@ mlx5_getenv_int(const char *name)
 }
 
 /**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx5
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ *   The size in bytes of the memory to allocate.
+ * @param[in] data
+ *   A pointer to the callback data.
+ *
+ * @return
+ *   a pointer to the allocate space.
+ */
+static void *
+mlx5_alloc_verbs_buf(size_t size, void *data)
+{
+	struct priv *priv = data;
+	void *ret;
+	size_t alignment = sysconf(_SC_PAGESIZE);
+
+	assert(data != NULL);
+	assert(!mlx5_is_secondary());
+	ret = rte_malloc_socket(__func__, size, alignment,
+				priv->dev->device->numa_node);
+	DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
+	return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ *   A pointer to the memory to free.
+ * @param[in] data
+ *   A pointer to the callback data.
+ */
+static void
+mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+	assert(data != NULL);
+	assert(!mlx5_is_secondary());
+	DEBUG("Extern free request: %p", ptr);
+	rte_free(ptr);
+}
+
+/**
  * DPDK callback to close the device.
  *
  * Destroy all queues and objects, free memory.
@@ -146,6 +195,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 {
 	struct priv *priv = mlx5_get_priv(dev);
 	unsigned int i;
+	int ret;
 
 	priv_lock(priv);
 	DEBUG("%p: closing device \"%s\"",
@@ -153,48 +203,23 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
 	/* In case mlx5_dev_stop() has not been called. */
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
-	priv_destroy_hash_rxqs(priv);
-
-	/* Remove flow director elements. */
-	priv_fdir_disable(priv);
-	priv_fdir_delete_filters_list(priv);
-
+	priv_dev_traffic_disable(priv, dev);
 	/* Prevent crashes when queues are still in use. */
 	dev->rx_pkt_burst = removed_rx_burst;
 	dev->tx_pkt_burst = removed_tx_burst;
 	if (priv->rxqs != NULL) {
 		/* XXX race condition if mlx5_rx_burst() is still running. */
 		usleep(1000);
-		for (i = 0; (i != priv->rxqs_n); ++i) {
-			struct rxq *rxq = (*priv->rxqs)[i];
-			struct rxq_ctrl *rxq_ctrl;
-
-			if (rxq == NULL)
-				continue;
-			rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
-			(*priv->rxqs)[i] = NULL;
-			rxq_cleanup(rxq_ctrl);
-			rte_free(rxq_ctrl);
-		}
+		for (i = 0; (i != priv->rxqs_n); ++i)
+			mlx5_priv_rxq_release(priv, i);
 		priv->rxqs_n = 0;
 		priv->rxqs = NULL;
 	}
 	if (priv->txqs != NULL) {
 		/* XXX race condition if mlx5_tx_burst() is still running. */
 		usleep(1000);
-		for (i = 0; (i != priv->txqs_n); ++i) {
-			struct txq *txq = (*priv->txqs)[i];
-			struct txq_ctrl *txq_ctrl;
-
-			if (txq == NULL)
-				continue;
-			txq_ctrl = container_of(txq, struct txq_ctrl, txq);
-			(*priv->txqs)[i] = NULL;
-			txq_cleanup(txq_ctrl);
-			rte_free(txq_ctrl);
-		}
+		for (i = 0; (i != priv->txqs_n); ++i)
+			mlx5_priv_txq_release(priv, i);
 		priv->txqs_n = 0;
 		priv->txqs = NULL;
 	}
@@ -204,18 +229,40 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		claim_zero(ibv_close_device(priv->ctx));
 	} else
 		assert(priv->ctx == NULL);
-	if (priv->rss_conf != NULL) {
-		for (i = 0; (i != hash_rxq_init_n); ++i)
-			rte_free((*priv->rss_conf)[i]);
-		rte_free(priv->rss_conf);
-	}
+	if (priv->rss_conf.rss_key != NULL)
+		rte_free(priv->rss_conf.rss_key);
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
+	priv_socket_uninit(priv);
+	ret = mlx5_priv_hrxq_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Hash Rx queue still remain", (void *)priv);
+	ret = mlx5_priv_ind_table_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Indirection table still remain", (void *)priv);
+	ret = mlx5_priv_rxq_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Verbs Rx queue still remain", (void *)priv);
+	ret = mlx5_priv_rxq_verify(priv);
+	if (ret)
+		WARN("%p: some Rx Queues still remain", (void *)priv);
+	ret = mlx5_priv_txq_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Verbs Tx queue still remain", (void *)priv);
+	ret = mlx5_priv_txq_verify(priv);
+	if (ret)
+		WARN("%p: some Tx Queues still remain", (void *)priv);
+	ret = priv_flow_verify(priv);
+	if (ret)
+		WARN("%p: some flows still remain", (void *)priv);
+	ret = priv_mr_verify(priv);
+	if (ret)
+		WARN("%p: some Memory Region still remain", (void *)priv);
 	priv_unlock(priv);
 	memset(priv, 0, sizeof(*priv));
 }
 
-static const struct eth_dev_ops mlx5_dev_ops = {
+const struct eth_dev_ops mlx5_dev_ops = {
 	.dev_configure = mlx5_dev_configure,
 	.dev_start = mlx5_dev_start,
 	.dev_stop = mlx5_dev_stop,
@@ -254,10 +301,55 @@ static const struct eth_dev_ops mlx5_dev_ops = {
 	.filter_ctrl = mlx5_dev_filter_ctrl,
 	.rx_descriptor_status = mlx5_rx_descriptor_status,
 	.tx_descriptor_status = mlx5_tx_descriptor_status,
-#ifdef HAVE_UPDATE_CQ_CI
 	.rx_queue_intr_enable = mlx5_rx_intr_enable,
 	.rx_queue_intr_disable = mlx5_rx_intr_disable,
-#endif
+};
+
+static const struct eth_dev_ops mlx5_dev_sec_ops = {
+	.stats_get = mlx5_stats_get,
+	.stats_reset = mlx5_stats_reset,
+	.xstats_get = mlx5_xstats_get,
+	.xstats_reset = mlx5_xstats_reset,
+	.xstats_get_names = mlx5_xstats_get_names,
+	.dev_infos_get = mlx5_dev_infos_get,
+	.rx_descriptor_status = mlx5_rx_descriptor_status,
+	.tx_descriptor_status = mlx5_tx_descriptor_status,
+};
+
+/* Available operators in flow isolated mode. */
+const struct eth_dev_ops mlx5_dev_ops_isolate = {
+	.dev_configure = mlx5_dev_configure,
+	.dev_start = mlx5_dev_start,
+	.dev_stop = mlx5_dev_stop,
+	.dev_set_link_down = mlx5_set_link_down,
+	.dev_set_link_up = mlx5_set_link_up,
+	.dev_close = mlx5_dev_close,
+	.link_update = mlx5_link_update,
+	.stats_get = mlx5_stats_get,
+	.stats_reset = mlx5_stats_reset,
+	.xstats_get = mlx5_xstats_get,
+	.xstats_reset = mlx5_xstats_reset,
+	.xstats_get_names = mlx5_xstats_get_names,
+	.dev_infos_get = mlx5_dev_infos_get,
+	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
+	.vlan_filter_set = mlx5_vlan_filter_set,
+	.rx_queue_setup = mlx5_rx_queue_setup,
+	.tx_queue_setup = mlx5_tx_queue_setup,
+	.rx_queue_release = mlx5_rx_queue_release,
+	.tx_queue_release = mlx5_tx_queue_release,
+	.flow_ctrl_get = mlx5_dev_get_flow_ctrl,
+	.flow_ctrl_set = mlx5_dev_set_flow_ctrl,
+	.mac_addr_remove = mlx5_mac_addr_remove,
+	.mac_addr_add = mlx5_mac_addr_add,
+	.mac_addr_set = mlx5_mac_addr_set,
+	.mtu_set = mlx5_dev_set_mtu,
+	.vlan_strip_queue_set = mlx5_vlan_strip_queue_set,
+	.vlan_offload_set = mlx5_vlan_offload_set,
+	.filter_ctrl = mlx5_dev_filter_ctrl,
+	.rx_descriptor_status = mlx5_rx_descriptor_status,
+	.tx_descriptor_status = mlx5_tx_descriptor_status,
+	.rx_queue_intr_enable = mlx5_rx_intr_enable,
+	.rx_queue_intr_disable = mlx5_rx_intr_disable,
 };
 
 static struct {
@@ -449,12 +541,17 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 	struct ibv_device *ibv_dev;
 	int err = 0;
 	struct ibv_context *attr_ctx = NULL;
-	struct ibv_device_attr device_attr;
+	struct ibv_device_attr_ex device_attr;
 	unsigned int sriov;
 	unsigned int mps;
-	unsigned int tunnel_en;
+	unsigned int cqe_comp;
+	unsigned int tunnel_en = 0;
 	int idx;
 	int i;
+	struct mlx5dv_context attrs_out;
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	struct ibv_counter_set_description cs_desc;
+#endif
 
 	(void)pci_drv;
 	assert(pci_drv == &mlx5_driver);
@@ -500,34 +597,24 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		       PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) ||
 		      (pci_dev->id.device_id ==
 		       PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF));
-		/*
-		 * Multi-packet send is supported by ConnectX-4 Lx PF as well
-		 * as all ConnectX-5 devices.
-		 */
 		switch (pci_dev->id.device_id) {
 		case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
 			tunnel_en = 1;
-			mps = MLX5_MPW_DISABLED;
 			break;
 		case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
-			mps = MLX5_MPW;
-			break;
 		case PCI_DEVICE_ID_MELLANOX_CONNECTX5:
 		case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
 		case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX:
 		case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:
 			tunnel_en = 1;
-			mps = MLX5_MPW_ENHANCED;
 			break;
 		default:
-			mps = MLX5_MPW_DISABLED;
+			break;
 		}
 		INFO("PCI information matches, using device \"%s\""
-		     " (SR-IOV: %s, %sMPS: %s)",
+		     " (SR-IOV: %s)",
 		     list[i]->name,
-		     sriov ? "true" : "false",
-		     mps == MLX5_MPW_ENHANCED ? "Enhanced " : "",
-		     mps != MLX5_MPW_DISABLED ? "true" : "false");
+		     sriov ? "true" : "false");
 		attr_ctx = ibv_open_device(list[i]);
 		err = errno;
 		break;
@@ -548,11 +635,33 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 	ibv_dev = list[i];
 
 	DEBUG("device opened");
-	if (ibv_query_device(attr_ctx, &device_attr))
+	/*
+	 * Multi-packet send is supported by ConnectX-4 Lx PF as well
+	 * as all ConnectX-5 devices.
+	 */
+	mlx5dv_query_device(attr_ctx, &attrs_out);
+	if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) {
+		if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) {
+			DEBUG("Enhanced MPW is supported");
+			mps = MLX5_MPW_ENHANCED;
+		} else {
+			DEBUG("MPW is supported");
+			mps = MLX5_MPW;
+		}
+	} else {
+		DEBUG("MPW isn't supported");
+		mps = MLX5_MPW_DISABLED;
+	}
+	if (RTE_CACHE_LINE_SIZE == 128 &&
+	    !(attrs_out.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP))
+		cqe_comp = 0;
+	else
+		cqe_comp = 1;
+	if (ibv_query_device_ex(attr_ctx, NULL, &device_attr))
 		goto error;
-	INFO("%u port(s) detected", device_attr.phys_port_cnt);
+	INFO("%u port(s) detected", device_attr.orig_attr.phys_port_cnt);
 
-	for (i = 0; i < device_attr.phys_port_cnt; i++) {
+	for (i = 0; i < device_attr.orig_attr.phys_port_cnt; i++) {
 		uint32_t port = i + 1; /* ports are indexed from one */
 		uint32_t test = (1 << i);
 		struct ibv_context *ctx = NULL;
@@ -560,9 +669,10 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		struct ibv_pd *pd = NULL;
 		struct priv *priv = NULL;
 		struct rte_eth_dev *eth_dev;
-		struct ibv_exp_device_attr exp_device_attr;
+		struct ibv_device_attr_ex device_attr_ex;
 		struct ether_addr mac;
 		uint16_t num_vfs = 0;
+		struct ibv_device_attr_ex device_attr;
 		struct mlx5_args args = {
 			.cqe_comp = MLX5_ARG_UNSET,
 			.txq_inline = MLX5_ARG_UNSET,
@@ -575,20 +685,49 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			.rx_vec_en = MLX5_ARG_UNSET,
 		};
 
-		exp_device_attr.comp_mask =
-			IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
-			IBV_EXP_DEVICE_ATTR_RX_HASH |
-			IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS |
-			IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN |
-			IBV_EXP_DEVICE_ATTR_TSO_CAPS |
-			0;
+		mlx5_dev[idx].ports |= test;
+
+		if (mlx5_is_secondary()) {
+			/* from rte_ethdev.c */
+			char name[RTE_ETH_NAME_MAX_LEN];
+
+			snprintf(name, sizeof(name), "%s port %u",
+				 ibv_get_device_name(ibv_dev), port);
+			eth_dev = rte_eth_dev_attach_secondary(name);
+			if (eth_dev == NULL) {
+				ERROR("can not attach rte ethdev");
+				err = ENOMEM;
+				goto error;
+			}
+			eth_dev->device = &pci_dev->device;
+			eth_dev->dev_ops = &mlx5_dev_sec_ops;
+			priv = eth_dev->data->dev_private;
+			/* Receive command fd from primary process */
+			err = priv_socket_connect(priv);
+			if (err < 0) {
+				err = -err;
+				goto error;
+			}
+			/* Remap UAR for Tx queues. */
+			err = priv_tx_uar_remap(priv, err);
+			if (err < 0) {
+				err = -err;
+				goto error;
+			}
+			priv_dev_select_rx_function(priv, eth_dev);
+			priv_dev_select_tx_function(priv, eth_dev);
+			continue;
+		}
 
 		DEBUG("using port %u (%08" PRIx32 ")", port, test);
 
 		ctx = ibv_open_device(ibv_dev);
-		if (ctx == NULL)
+		if (ctx == NULL) {
+			err = ENODEV;
 			goto port_error;
+		}
 
+		ibv_query_device_ex(ctx, NULL, &device_attr);
 		/* Check port status. */
 		err = ibv_query_port(ctx, port, &port_attr);
 		if (err) {
@@ -599,6 +738,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
 			ERROR("port %d is not configured in Ethernet mode",
 			      port);
+			err = EINVAL;
 			goto port_error;
 		}
 
@@ -628,12 +768,14 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		}
 
 		priv->ctx = ctx;
+		strncpy(priv->ibdev_path, priv->ctx->device->ibdev_path,
+			sizeof(priv->ibdev_path));
 		priv->device_attr = device_attr;
 		priv->port = port;
 		priv->pd = pd;
 		priv->mtu = ETHER_MTU;
 		priv->mps = mps; /* Enable MPW by default if supported. */
-		priv->cqe_comp = 1; /* Enable compression by default. */
+		priv->cqe_comp = cqe_comp;
 		priv->tunnel_en = tunnel_en;
 		/* Enable vector by default if supported. */
 		priv->tx_vec_en = 1;
@@ -645,25 +787,33 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			goto port_error;
 		}
 		mlx5_args_assign(priv, &args);
-		if (ibv_exp_query_device(ctx, &exp_device_attr)) {
-			ERROR("ibv_exp_query_device() failed");
+		if (ibv_query_device_ex(ctx, NULL, &device_attr_ex)) {
+			ERROR("ibv_query_device_ex() failed");
 			goto port_error;
 		}
 
 		priv->hw_csum =
-			((exp_device_attr.exp_device_cap_flags &
-			  IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) &&
-			 (exp_device_attr.exp_device_cap_flags &
-			  IBV_EXP_DEVICE_RX_CSUM_IP_PKT));
+			!!(device_attr_ex.device_cap_flags_ex &
+			   IBV_DEVICE_RAW_IP_CSUM);
 		DEBUG("checksum offloading is %ssupported",
 		      (priv->hw_csum ? "" : "not "));
 
+#ifdef HAVE_IBV_DEVICE_VXLAN_SUPPORT
 		priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
-					 IBV_EXP_DEVICE_VXLAN_SUPPORT);
+					 IBV_DEVICE_VXLAN_SUPPORT);
+#endif
 		DEBUG("L2 tunnel checksum offloads are %ssupported",
 		      (priv->hw_csum_l2tun ? "" : "not "));
 
-		priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size;
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+		priv->counter_set_supported = !!(device_attr.max_counter_sets);
+		ibv_describe_counter_set(ctx, 0, &cs_desc);
+		DEBUG("counter type = %d, num of cs = %ld, attributes = %d",
+		      cs_desc.counter_type, cs_desc.num_of_cs,
+		      cs_desc.attributes);
+#endif
+		priv->ind_table_max_size =
+			device_attr_ex.rss_caps.max_rwq_indirection_table_size;
 		/* Remove this check once DPDK supports larger/variable
 		 * indirection tables. */
 		if (priv->ind_table_max_size >
@@ -671,29 +821,32 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512;
 		DEBUG("maximum RX indirection table size is %u",
 		      priv->ind_table_max_size);
-		priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap &
-					 IBV_EXP_RECEIVE_WQ_CVLAN_STRIP);
+		priv->hw_vlan_strip = !!(device_attr_ex.raw_packet_caps &
+					 IBV_RAW_PACKET_CAP_CVLAN_STRIPPING);
 		DEBUG("VLAN stripping is %ssupported",
 		      (priv->hw_vlan_strip ? "" : "not "));
 
-		priv->hw_fcs_strip = !!(exp_device_attr.exp_device_cap_flags &
-					IBV_EXP_DEVICE_SCATTER_FCS);
+		priv->hw_fcs_strip =
+				!!(device_attr_ex.orig_attr.device_cap_flags &
+				IBV_WQ_FLAGS_SCATTER_FCS);
 		DEBUG("FCS stripping configuration is %ssupported",
 		      (priv->hw_fcs_strip ? "" : "not "));
 
-		priv->hw_padding = !!exp_device_attr.rx_pad_end_addr_align;
+#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
+		priv->hw_padding = !!device_attr_ex.rx_pad_end_addr_align;
+#endif
 		DEBUG("hardware RX end alignment padding is %ssupported",
 		      (priv->hw_padding ? "" : "not "));
 
 		priv_get_num_vfs(priv, &num_vfs);
 		priv->sriov = (num_vfs || sriov);
 		priv->tso = ((priv->tso) &&
-			    (exp_device_attr.tso_caps.max_tso > 0) &&
-			    (exp_device_attr.tso_caps.supported_qpts &
-			    (1 << IBV_QPT_RAW_ETH)));
+			    (device_attr_ex.tso_caps.max_tso > 0) &&
+			    (device_attr_ex.tso_caps.supported_qpts &
+			    (1 << IBV_QPT_RAW_PACKET)));
 		if (priv->tso)
 			priv->max_tso_payload_sz =
-				exp_device_attr.tso_caps.max_tso;
+				device_attr_ex.tso_caps.max_tso;
 		if (priv->mps && !mps) {
 			ERROR("multi-packet send not supported on this device"
 			      " (" MLX5_TXQ_MPW_EN ")");
@@ -718,23 +871,15 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 				priv->txq_inline = MLX5_WQE_SIZE_MAX -
 						   MLX5_WQE_SIZE;
 		}
-		/* Allocate and register default RSS hash keys. */
-		priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
-					    sizeof((*priv->rss_conf)[0]), 0);
-		if (priv->rss_conf == NULL) {
-			err = ENOMEM;
-			goto port_error;
+		if (priv->cqe_comp && !cqe_comp) {
+			WARN("Rx CQE compression isn't supported");
+			priv->cqe_comp = 0;
 		}
-		err = rss_hash_rss_conf_new_key(priv,
-						rss_hash_default_key,
-						rss_hash_default_key_len,
-						ETH_RSS_PROTO_MASK);
-		if (err)
-			goto port_error;
 		/* Configure the first MAC address by default. */
 		if (priv_get_mac(priv, &mac.addr_bytes)) {
 			ERROR("cannot get MAC address, is mlx5_en loaded?"
 			      " (errno: %s)", strerror(errno));
+			err = ENODEV;
 			goto port_error;
 		}
 		INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
@@ -742,14 +887,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		     mac.addr_bytes[0], mac.addr_bytes[1],
 		     mac.addr_bytes[2], mac.addr_bytes[3],
 		     mac.addr_bytes[4], mac.addr_bytes[5]);
-		/* Register MAC address. */
-		claim_zero(priv_mac_addr_add(priv, 0,
-					     (const uint8_t (*)[ETHER_ADDR_LEN])
-					     mac.addr_bytes));
-		/* Initialize FD filters list. */
-		err = fdir_init_filters_list(priv);
-		if (err)
-			goto port_error;
 #ifndef NDEBUG
 		{
 			char ifname[IF_NAMESIZE];
@@ -778,44 +915,26 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 			err = ENOMEM;
 			goto port_error;
 		}
-
-		/* Secondary processes have to use local storage for their
-		 * private data as well as a copy of eth_dev->data, but this
-		 * pointer must not be modified before burst functions are
-		 * actually called. */
-		if (mlx5_is_secondary()) {
-			struct mlx5_secondary_data *sd =
-				&mlx5_secondary_data[eth_dev->data->port_id];
-			sd->primary_priv = eth_dev->data->dev_private;
-			if (sd->primary_priv == NULL) {
-				ERROR("no private data for port %u",
-						eth_dev->data->port_id);
-				err = EINVAL;
-				goto port_error;
-			}
-			sd->shared_dev_data = eth_dev->data;
-			rte_spinlock_init(&sd->lock);
-			memcpy(sd->data.name, sd->shared_dev_data->name,
-				   sizeof(sd->data.name));
-			sd->data.dev_private = priv;
-			sd->data.rx_mbuf_alloc_failed = 0;
-			sd->data.mtu = ETHER_MTU;
-			sd->data.port_id = sd->shared_dev_data->port_id;
-			sd->data.mac_addrs = priv->mac;
-			eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup;
-			eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup;
-		} else {
-			eth_dev->data->dev_private = priv;
-			eth_dev->data->mac_addrs = priv->mac;
-		}
-
+		eth_dev->data->dev_private = priv;
+		eth_dev->data->mac_addrs = priv->mac;
 		eth_dev->device = &pci_dev->device;
 		rte_eth_copy_pci_info(eth_dev, pci_dev);
-		eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 		eth_dev->device->driver = &mlx5_driver.driver;
 		priv->dev = eth_dev;
 		eth_dev->dev_ops = &mlx5_dev_ops;
+		/* Register MAC address. */
+		claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
 		TAILQ_INIT(&priv->flows);
+		TAILQ_INIT(&priv->ctrl_flows);
+
+		/* Hint libmlx5 to use PMD allocator for data plane resources */
+		struct mlx5dv_ctx_allocators alctr = {
+			.alloc = &mlx5_alloc_verbs_buf,
+			.free = &mlx5_free_verbs_buf,
+			.data = priv,
+		};
+		mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
+					(void *)((uintptr_t)&alctr));
 
 		/* Bring Ethernet device up. */
 		DEBUG("forcing Ethernet interface up");
@@ -824,10 +943,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		continue;
 
 port_error:
-		if (priv) {
-			rte_free(priv->rss_conf);
+		if (priv)
 			rte_free(priv);
-		}
 		if (pd)
 			claim_zero(ibv_dealloc_pd(pd));
 		if (ctx)
@@ -901,7 +1018,7 @@ static struct rte_pci_driver mlx5_driver = {
 	},
 	.id_table = mlx5_pci_id_map,
 	.probe = mlx5_pci_probe,
-	.drv_flags = RTE_PCI_DRV_INTR_LSC,
+	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
 };
 
 /**
@@ -920,6 +1037,9 @@ rte_mlx5_pmd_init(void)
 	 * using this PMD, which is not supported in forked processes.
 	 */
 	setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
+	/* Match the size of Rx completion entry to the size of a cacheline. */
+	if (RTE_CACHE_LINE_SIZE == 128)
+		setenv("MLX5_CQE_SIZE", "128", 0);
 	ibv_fork_init();
 	rte_pci_register(&mlx5_driver);
 }
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 43c53841..e6a69b82 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -39,6 +39,7 @@
 #include <limits.h>
 #include <net/if.h>
 #include <netinet/in.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -50,10 +51,6 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_pci.h>
 #include <rte_ether.h>
 #include <rte_ethdev.h>
@@ -61,20 +58,12 @@
 #include <rte_interrupts.h>
 #include <rte_errno.h>
 #include <rte_flow.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5_utils.h"
 #include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
-#if !defined(HAVE_VERBS_IBV_EXP_CQ_COMPRESSED_CQE) || \
-	!defined(HAVE_VERBS_MLX5_ETH_VLAN_INLINE_HEADER_SIZE)
-#error Mellanox OFED >= 3.3 is required, please refer to the documentation.
-#endif
-
 enum {
 	PCI_VENDOR_ID_MELLANOX = 0x15b3,
 };
@@ -98,26 +87,21 @@ struct mlx5_xstats_ctrl {
 	uint64_t base[MLX5_MAX_XSTATS];
 };
 
+/* Flow list . */
+TAILQ_HEAD(mlx5_flows, rte_flow);
+
 struct priv {
-	struct rte_eth_dev *dev; /* Ethernet device. */
+	struct rte_eth_dev *dev; /* Ethernet device of master process. */
 	struct ibv_context *ctx; /* Verbs context. */
-	struct ibv_device_attr device_attr; /* Device properties. */
+	struct ibv_device_attr_ex device_attr; /* Device properties. */
 	struct ibv_pd *pd; /* Protection Domain. */
-	/*
-	 * MAC addresses array and configuration bit-field.
-	 * An extra entry that cannot be modified by the DPDK is reserved
-	 * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
-	 */
-	struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES];
-	BITFIELD_DECLARE(mac_configured, uint32_t, MLX5_MAX_MAC_ADDRESSES);
+	char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
+	struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES]; /* MAC addresses. */
 	uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
 	unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
 	/* Device properties. */
 	uint16_t mtu; /* Configured MTU. */
 	uint8_t port; /* Physical port number. */
-	unsigned int started:1; /* Device started, flows enabled. */
-	unsigned int promisc_req:1; /* Promiscuous mode requested. */
-	unsigned int allmulti_req:1; /* All multicast mode requested. */
 	unsigned int hw_csum:1; /* Checksum offload is supported. */
 	unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
 	unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */
@@ -133,6 +117,7 @@ struct priv {
 	unsigned int isolated:1; /* Whether isolated mode is enabled. */
 	unsigned int tx_vec_en:1; /* Whether Tx vector is enabled. */
 	unsigned int rx_vec_en:1; /* Whether Rx vector is enabled. */
+	unsigned int counter_set_supported:1; /* Counter set is supported. */
 	/* Whether Tx offloads for tunneled packets are supported. */
 	unsigned int max_tso_payload_sz; /* Maximum TCP payload for TSO. */
 	unsigned int txq_inline; /* Maximum packet size for inlining. */
@@ -141,38 +126,31 @@ struct priv {
 	/* RX/TX queues. */
 	unsigned int rxqs_n; /* RX queues array size. */
 	unsigned int txqs_n; /* TX queues array size. */
-	struct rxq *(*rxqs)[]; /* RX queues. */
-	struct txq *(*txqs)[]; /* TX queues. */
-	/* Indirection tables referencing all RX WQs. */
-	struct ibv_exp_rwq_ind_table *(*ind_tables)[];
-	unsigned int ind_tables_n; /* Number of indirection tables. */
+	struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
+	struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
 	unsigned int ind_table_max_size; /* Maximum indirection table size. */
-	/* Hash RX QPs feeding the indirection table. */
-	struct hash_rxq (*hash_rxqs)[];
-	unsigned int hash_rxqs_n; /* Hash RX QPs array size. */
-	/* RSS configuration array indexed by hash RX queue type. */
-	struct rte_eth_rss_conf *(*rss_conf)[];
-	uint64_t rss_hf; /* RSS DPDK bit field of active RSS. */
+	struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
 	unsigned int (*reta_idx)[]; /* RETA index table. */
 	unsigned int reta_idx_n; /* RETA index size. */
-	struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
-	struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
-	struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
-	TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
+	struct mlx5_hrxq_drop *flow_drop_queue; /* Flow drop queue. */
+	struct mlx5_flows flows; /* RTE Flow rules. */
+	struct mlx5_flows ctrl_flows; /* Control flow rules. */
+	LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
+	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
+	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+	LIST_HEAD(hrxq, mlx5_hrxq) hrxqs; /* Verbs Hash Rx queues. */
+	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
+	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
+	/* Verbs Indirection tables. */
+	LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls;
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
+	int primary_socket; /* Unix socket for primary process. */
+	struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
 };
 
-/* Local storage for secondary process data. */
-struct mlx5_secondary_data {
-	struct rte_eth_dev_data data; /* Local device data. */
-	struct priv *primary_priv; /* Private structure from primary. */
-	struct rte_eth_dev_data *shared_dev_data; /* Shared device data. */
-	rte_spinlock_t lock; /* Port configuration lock. */
-} mlx5_secondary_data[RTE_MAX_ETHPORTS];
-
 /**
  * Lock private structure to protect it from concurrent access in the
  * control path.
@@ -228,28 +206,19 @@ void priv_dev_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *);
 void priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
 int mlx5_set_link_down(struct rte_eth_dev *dev);
 int mlx5_set_link_up(struct rte_eth_dev *dev);
-struct priv *mlx5_secondary_data_setup(struct priv *priv);
-void priv_select_tx_function(struct priv *);
-void priv_select_rx_function(struct priv *);
+void priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev);
+void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev);
 
 /* mlx5_mac.c */
 
 int priv_get_mac(struct priv *, uint8_t (*)[ETHER_ADDR_LEN]);
-void hash_rxq_mac_addrs_del(struct hash_rxq *);
-void priv_mac_addrs_disable(struct priv *);
 void mlx5_mac_addr_remove(struct rte_eth_dev *, uint32_t);
-int hash_rxq_mac_addrs_add(struct hash_rxq *);
-int priv_mac_addr_add(struct priv *, unsigned int,
-		      const uint8_t (*)[ETHER_ADDR_LEN]);
-int priv_mac_addrs_enable(struct priv *);
 int mlx5_mac_addr_add(struct rte_eth_dev *, struct ether_addr *, uint32_t,
 		      uint32_t);
 void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *);
 
 /* mlx5_rss.c */
 
-int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int,
-			      uint64_t);
 int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
 int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
 int priv_rss_reta_index_resize(struct priv *, unsigned int);
@@ -260,10 +229,6 @@ int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
 
 /* mlx5_rxmode.c */
 
-int priv_special_flow_enable(struct priv *, enum hash_rxq_flow_type);
-void priv_special_flow_disable(struct priv *, enum hash_rxq_flow_type);
-int priv_special_flow_enable_all(struct priv *);
-void priv_special_flow_disable_all(struct priv *);
 void mlx5_promiscuous_enable(struct rte_eth_dev *);
 void mlx5_promiscuous_disable(struct rte_eth_dev *);
 void mlx5_allmulticast_enable(struct rte_eth_dev *);
@@ -272,7 +237,7 @@ void mlx5_allmulticast_disable(struct rte_eth_dev *);
 /* mlx5_stats.c */
 
 void priv_xstats_init(struct priv *);
-void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);
+int mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);
 void mlx5_stats_reset(struct rte_eth_dev *);
 int mlx5_xstats_get(struct rte_eth_dev *,
 		    struct rte_eth_xstat *, unsigned int);
@@ -283,26 +248,22 @@ int mlx5_xstats_get_names(struct rte_eth_dev *,
 /* mlx5_vlan.c */
 
 int mlx5_vlan_filter_set(struct rte_eth_dev *, uint16_t, int);
-void mlx5_vlan_offload_set(struct rte_eth_dev *, int);
+int mlx5_vlan_offload_set(struct rte_eth_dev *, int);
 void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int);
 
 /* mlx5_trigger.c */
 
 int mlx5_dev_start(struct rte_eth_dev *);
 void mlx5_dev_stop(struct rte_eth_dev *);
+int priv_dev_traffic_enable(struct priv *, struct rte_eth_dev *);
+int priv_dev_traffic_disable(struct priv *, struct rte_eth_dev *);
+int priv_dev_traffic_restart(struct priv *, struct rte_eth_dev *);
+int mlx5_traffic_restart(struct rte_eth_dev *);
 
-/* mlx5_fdir.c */
+/* mlx5_flow.c */
 
-void priv_fdir_queue_destroy(struct priv *, struct fdir_queue *);
-int fdir_init_filters_list(struct priv *);
-void priv_fdir_delete_filters_list(struct priv *);
-void priv_fdir_disable(struct priv *);
-void priv_fdir_enable(struct priv *);
 int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type,
 			 enum rte_filter_op, void *);
-
-/* mlx5_flow.c */
-
 int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *,
 		       const struct rte_flow_item [],
 		       const struct rte_flow_action [],
@@ -314,10 +275,35 @@ struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
 				  struct rte_flow_error *);
 int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
 		      struct rte_flow_error *);
+void priv_flow_flush(struct priv *, struct mlx5_flows *);
 int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
+int mlx5_flow_query(struct rte_eth_dev *, struct rte_flow *,
+		    enum rte_flow_action_type, void *,
+		    struct rte_flow_error *);
 int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
-int priv_flow_start(struct priv *);
-void priv_flow_stop(struct priv *);
-int priv_flow_rxq_in_use(struct priv *, struct rxq *);
+int priv_flow_start(struct priv *, struct mlx5_flows *);
+void priv_flow_stop(struct priv *, struct mlx5_flows *);
+int priv_flow_verify(struct priv *);
+int mlx5_ctrl_flow_vlan(struct rte_eth_dev *, struct rte_flow_item_eth *,
+			struct rte_flow_item_eth *, struct rte_flow_item_vlan *,
+			struct rte_flow_item_vlan *);
+int mlx5_ctrl_flow(struct rte_eth_dev *, struct rte_flow_item_eth *,
+		   struct rte_flow_item_eth *);
+int priv_flow_create_drop_queue(struct priv *);
+void priv_flow_delete_drop_queue(struct priv *);
+
+/* mlx5_socket.c */
+
+int priv_socket_init(struct priv *priv);
+int priv_socket_uninit(struct priv *priv);
+void priv_socket_handle(struct priv *priv);
+int priv_socket_connect(struct priv *priv);
+
+/* mlx5_mr.c */
+
+struct mlx5_mr *priv_mr_new(struct priv *, struct rte_mempool *);
+struct mlx5_mr *priv_mr_get(struct priv *, struct rte_mempool *);
+int priv_mr_release(struct priv *, struct mlx5_mr *);
+int priv_mr_verify(struct priv *);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index a76bc6f6..3a7706cf 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -45,9 +45,6 @@
 /* Maximum number of simultaneous VLAN filters. */
 #define MLX5_MAX_VLAN_IDS 128
 
-/* Maximum number of special flows. */
-#define MLX5_MAX_SPECIAL_FLOWS 4
-
 /*
  * Request TX completion every time descriptors reach this threshold since
  * the previous request. Must be a power of two for performance reasons.
@@ -100,7 +97,8 @@
 
 /*
  * Maximum size of burst for vectorized Tx. This is related to the maximum size
- * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * Careful when changing, large value can cause WQE DS to overlap.
  */
 #define MLX5_VPMD_TX_MAX_BURST        32U
 
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index b0eb3cdf..c31ea4b6 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -31,6 +31,8 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#define _GNU_SOURCE
+
 #include <stddef.h>
 #include <assert.h>
 #include <unistd.h>
@@ -49,21 +51,17 @@
 #include <linux/sockios.h>
 #include <linux/version.h>
 #include <fcntl.h>
+#include <stdalign.h>
+#include <sys/un.h>
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_atomic.h>
 #include <rte_ethdev.h>
+#include <rte_bus_pci.h>
 #include <rte_mbuf.h>
 #include <rte_common.h>
 #include <rte_interrupts.h>
 #include <rte_alarm.h>
 #include <rte_malloc.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
@@ -119,7 +117,6 @@ struct ethtool_link_settings {
 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38
 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
 #endif
-#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32 (SCHAR_MAX)
 
 /**
  * Return private structure associated with an Ethernet device.
@@ -133,12 +130,7 @@ struct ethtool_link_settings {
 struct priv *
 mlx5_get_priv(struct rte_eth_dev *dev)
 {
-	struct mlx5_secondary_data *sd;
-
-	if (!mlx5_is_secondary())
-		return dev->data->dev_private;
-	sd = &mlx5_secondary_data[dev->data->port_id];
-	return sd->data.dev_private;
+	return dev->data->dev_private;
 }
 
 /**
@@ -150,7 +142,7 @@ mlx5_get_priv(struct rte_eth_dev *dev)
 inline int
 mlx5_is_secondary(void)
 {
-	return rte_eal_process_type() != RTE_PROC_PRIMARY;
+	return rte_eal_process_type() == RTE_PROC_SECONDARY;
 }
 
 /**
@@ -174,7 +166,7 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
 	char match[IF_NAMESIZE] = "";
 
 	{
-		MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
+		MKSTR(path, "%s/device/net", priv->ibdev_path);
 
 		dir = opendir(path);
 		if (dir == NULL)
@@ -192,7 +184,7 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
 			continue;
 
 		MKSTR(path, "%s/device/net/%s/%s",
-		      priv->ctx->device->ibdev_path, name,
+		      priv->ibdev_path, name,
 		      (dev_type ? "dev_id" : "dev_port"));
 
 		file = fopen(path, "rb");
@@ -280,11 +272,11 @@ priv_sysfs_read(const struct priv *priv, const char *entry,
 
 	if (priv_is_ib_cntr(entry)) {
 		MKSTR(path, "%s/ports/1/hw_counters/%s",
-		      priv->ctx->device->ibdev_path, entry);
+		      priv->ibdev_path, entry);
 		file = fopen(path, "rb");
 	} else {
 		MKSTR(path, "%s/device/net/%s/%s",
-		      priv->ctx->device->ibdev_path, ifname, entry);
+		      priv->ibdev_path, ifname, entry);
 		file = fopen(path, "rb");
 	}
 	if (file == NULL)
@@ -327,8 +319,7 @@ priv_sysfs_write(const struct priv *priv, const char *entry,
 	if (priv_get_ifname(priv, &ifname))
 		return -1;
 
-	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
-	      ifname, entry);
+	MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
 
 	file = fopen(path, "wb");
 	if (file == NULL)
@@ -585,8 +576,29 @@ dev_configure(struct rte_eth_dev *dev)
 	unsigned int i;
 	unsigned int j;
 	unsigned int reta_idx_n;
+	const uint8_t use_app_rss_key =
+		!!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
 
-	priv->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+	if (use_app_rss_key &&
+	    (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len !=
+	     rss_hash_default_key_len)) {
+		/* MLX5 RSS only support 40bytes key. */
+		return EINVAL;
+	}
+	priv->rss_conf.rss_key =
+		rte_realloc(priv->rss_conf.rss_key,
+			    rss_hash_default_key_len, 0);
+	if (!priv->rss_conf.rss_key) {
+		ERROR("cannot allocate RSS hash key memory (%u)", rxqs_n);
+		return ENOMEM;
+	}
+	memcpy(priv->rss_conf.rss_key,
+	       use_app_rss_key ?
+	       dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key :
+	       rss_hash_default_key,
+	       rss_hash_default_key_len);
+	priv->rss_conf.rss_key_len = rss_hash_default_key_len;
+	priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
 	if (txqs_n != priv->txqs_n) {
@@ -672,8 +684,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 	 * Since we need one CQ per QP, the limit is the minimum number
 	 * between the two values.
 	 */
-	max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ?
-	       priv->device_attr.max_qp : priv->device_attr.max_cq);
+	max = RTE_MIN(priv->device_attr.orig_attr.max_cq,
+		      priv->device_attr.orig_attr.max_qp);
 	/* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */
 	if (max >= 65535)
 		max = 65535;
@@ -686,7 +698,9 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 		  DEV_RX_OFFLOAD_UDP_CKSUM |
 		  DEV_RX_OFFLOAD_TCP_CKSUM) :
 		 0) |
-		(priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0);
+		(priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0) |
+		DEV_RX_OFFLOAD_TIMESTAMP;
+
 	if (!priv->mps)
 		info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
 	if (priv->hw_csum)
@@ -704,9 +718,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 		info->if_index = if_nametoindex(ifname);
 	info->reta_size = priv->reta_idx_n ?
 		priv->reta_idx_n : priv->ind_table_max_size;
-	info->hash_key_size = ((*priv->rss_conf) ?
-			       (*priv->rss_conf)[0]->rss_key_len :
-			       0);
+	info->hash_key_size = priv->rss_conf.rss_key_len;
 	info->speed_capa = priv->link_speed_capa;
 	priv_unlock(priv);
 }
@@ -816,12 +828,7 @@ static int
 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	__extension__ struct {
-		struct ethtool_link_settings edata;
-		uint32_t link_mode_data[3 *
-					ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
-	} ecmd;
-
+	struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
 	struct ifreq ifr;
 	struct rte_eth_link dev_link;
 	uint64_t sc;
@@ -834,23 +841,29 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
 	memset(&dev_link, 0, sizeof(dev_link));
 	dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
 				(ifr.ifr_flags & IFF_RUNNING));
-	memset(&ecmd, 0, sizeof(ecmd));
-	ecmd.edata.cmd = ETHTOOL_GLINKSETTINGS;
-	ifr.ifr_data = (void *)&ecmd;
+	ifr.ifr_data = (void *)&gcmd;
 	if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
 		DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s",
 		      strerror(errno));
 		return -1;
 	}
-	ecmd.edata.link_mode_masks_nwords = -ecmd.edata.link_mode_masks_nwords;
+	gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords;
+
+	alignas(struct ethtool_link_settings)
+	uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) +
+		     sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3];
+	struct ethtool_link_settings *ecmd = (void *)data;
+
+	*ecmd = gcmd;
+	ifr.ifr_data = (void *)ecmd;
 	if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
 		DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s",
 		      strerror(errno));
 		return -1;
 	}
-	dev_link.link_speed = ecmd.edata.speed;
-	sc = ecmd.edata.link_mode_masks[0] |
-		((uint64_t)ecmd.edata.link_mode_masks[1] << 32);
+	dev_link.link_speed = ecmd->speed;
+	sc = ecmd->link_mode_masks[0] |
+		((uint64_t)ecmd->link_mode_masks[1] << 32);
 	priv->link_speed_capa = 0;
 	if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT)
 		priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
@@ -886,7 +899,7 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
 		  ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT |
 		  ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))
 		priv->link_speed_capa |= ETH_LINK_SPEED_100G;
-	dev_link.link_duplex = ((ecmd.edata.duplex == DUPLEX_HALF) ?
+	dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ?
 				ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
 	dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
 				  ETH_LINK_SPEED_FIXED);
@@ -1124,47 +1137,77 @@ mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
 }
 
 /**
- * Link status handler.
+ * Update the link status.
  *
  * @param priv
  *   Pointer to private structure.
- * @param dev
- *   Pointer to the rte_eth_dev structure.
  *
  * @return
- *   Nonzero if the callback process can be called immediately.
+ *   Zero if the callback process can be called immediately.
  */
 static int
-priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
+priv_link_status_update(struct priv *priv)
+{
+	struct rte_eth_link *link = &priv->dev->data->dev_link;
+
+	mlx5_link_update(priv->dev, 0);
+	if (((link->link_speed == 0) && link->link_status) ||
+		((link->link_speed != 0) && !link->link_status)) {
+		/*
+		 * Inconsistent status. Event likely occurred before the
+		 * kernel netdevice exposes the new status.
+		 */
+		if (!priv->pending_alarm) {
+			priv->pending_alarm = 1;
+			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
+					  mlx5_dev_link_status_handler,
+					  priv->dev);
+		}
+		return 1;
+	} else if (unlikely(priv->pending_alarm)) {
+		/* Link interrupt occurred while alarm is already scheduled. */
+		priv->pending_alarm = 0;
+		rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev);
+	}
+	return 0;
+}
+
+/**
+ * Device status handler.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param events
+ *   Pointer to event flags holder.
+ *
+ * @return
+ *   Events bitmap of callback process which can be called immediately.
+ */
+static uint32_t
+priv_dev_status_handler(struct priv *priv)
 {
 	struct ibv_async_event event;
-	struct rte_eth_link *link = &dev->data->dev_link;
-	int ret = 0;
+	uint32_t ret = 0;
 
 	/* Read all message and acknowledge them. */
 	for (;;) {
 		if (ibv_get_async_event(priv->ctx, &event))
 			break;
-
-		if (event.event_type != IBV_EVENT_PORT_ACTIVE &&
-		    event.event_type != IBV_EVENT_PORT_ERR)
+		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
+			event.event_type == IBV_EVENT_PORT_ERR) &&
+			(priv->dev->data->dev_conf.intr_conf.lsc == 1))
+			ret |= (1 << RTE_ETH_EVENT_INTR_LSC);
+		else if (event.event_type == IBV_EVENT_DEVICE_FATAL &&
+			priv->dev->data->dev_conf.intr_conf.rmv == 1)
+			ret |= (1 << RTE_ETH_EVENT_INTR_RMV);
+		else
 			DEBUG("event type %d on port %d not handled",
 			      event.event_type, event.element.port_num);
 		ibv_ack_async_event(&event);
 	}
-	mlx5_link_update(dev, 0);
-	if (((link->link_speed == 0) && link->link_status) ||
-	    ((link->link_speed != 0) && !link->link_status)) {
-		if (!priv->pending_alarm) {
-			/* Inconsistent status, check again later. */
-			priv->pending_alarm = 1;
-			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
-					  mlx5_dev_link_status_handler,
-					  dev);
-		}
-	} else {
-		ret = 1;
-	}
+	if (ret & (1 << RTE_ETH_EVENT_INTR_LSC))
+		if (priv_link_status_update(priv))
+			ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC);
 	return ret;
 }
 
@@ -1184,9 +1227,9 @@ mlx5_dev_link_status_handler(void *arg)
 	priv_lock(priv);
 	assert(priv->pending_alarm == 1);
 	priv->pending_alarm = 0;
-	ret = priv_dev_link_status_handler(priv, dev);
+	ret = priv_link_status_update(priv);
 	priv_unlock(priv);
-	if (ret)
+	if (!ret)
 		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
 					      NULL);
 }
@@ -1204,14 +1247,34 @@ mlx5_dev_interrupt_handler(void *cb_arg)
 {
 	struct rte_eth_dev *dev = cb_arg;
 	struct priv *priv = dev->data->dev_private;
-	int ret;
+	uint32_t events;
 
 	priv_lock(priv);
-	ret = priv_dev_link_status_handler(priv, dev);
+	events = priv_dev_status_handler(priv);
 	priv_unlock(priv);
-	if (ret)
+	if (events & (1 << RTE_ETH_EVENT_INTR_LSC))
 		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
 					      NULL);
+	if (events & (1 << RTE_ETH_EVENT_INTR_RMV))
+		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL,
+					      NULL);
+}
+
+/**
+ * Handle interrupts from the socket.
+ *
+ * @param cb_arg
+ *   Callback argument.
+ */
+static void
+mlx5_dev_handler_socket(void *cb_arg)
+{
+	struct rte_eth_dev *dev = cb_arg;
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	priv_socket_handle(priv);
+	priv_unlock(priv);
 }
 
 /**
@@ -1225,16 +1288,20 @@ mlx5_dev_interrupt_handler(void *cb_arg)
 void
 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
 {
-	if (!dev->data->dev_conf.intr_conf.lsc)
-		return;
-	rte_intr_callback_unregister(&priv->intr_handle,
-				     mlx5_dev_interrupt_handler,
-				     dev);
+	if (dev->data->dev_conf.intr_conf.lsc ||
+	    dev->data->dev_conf.intr_conf.rmv)
+		rte_intr_callback_unregister(&priv->intr_handle,
+					     mlx5_dev_interrupt_handler, dev);
+	if (priv->primary_socket)
+		rte_intr_callback_unregister(&priv->intr_handle_socket,
+					     mlx5_dev_handler_socket, dev);
 	if (priv->pending_alarm)
 		rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev);
 	priv->pending_alarm = 0;
 	priv->intr_handle.fd = 0;
 	priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+	priv->intr_handle_socket.fd = 0;
+	priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN;
 }
 
 /**
@@ -1250,20 +1317,29 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
 {
 	int rc, flags;
 
-	if (!dev->data->dev_conf.intr_conf.lsc)
-		return;
+	assert(!mlx5_is_secondary());
 	assert(priv->ctx->async_fd > 0);
 	flags = fcntl(priv->ctx->async_fd, F_GETFL);
 	rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
 	if (rc < 0) {
 		INFO("failed to change file descriptor async event queue");
 		dev->data->dev_conf.intr_conf.lsc = 0;
-	} else {
+		dev->data->dev_conf.intr_conf.rmv = 0;
+	}
+	if (dev->data->dev_conf.intr_conf.lsc ||
+	    dev->data->dev_conf.intr_conf.rmv) {
 		priv->intr_handle.fd = priv->ctx->async_fd;
 		priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
 		rte_intr_callback_register(&priv->intr_handle,
-					   mlx5_dev_interrupt_handler,
-					   dev);
+					   mlx5_dev_interrupt_handler, dev);
+	}
+
+	rc = priv_socket_init(priv);
+	if (!rc && priv->primary_socket) {
+		priv->intr_handle_socket.fd = priv->primary_socket;
+		priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT;
+		rte_intr_callback_register(&priv->intr_handle_socket,
+					   mlx5_dev_handler_socket, dev);
 	}
 }
 
@@ -1271,7 +1347,9 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
  * Change the link state (UP / DOWN).
  *
  * @param priv
- *   Pointer to Ethernet device structure.
+ *   Pointer to private data structure.
+ * @param dev
+ *   Pointer to rte_eth_dev structure.
  * @param up
  *   Nonzero for link up, otherwise link down.
  *
@@ -1279,17 +1357,16 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
  *   0 on success, errno value on failure.
  */
 static int
-priv_set_link(struct priv *priv, int up)
+priv_dev_set_link(struct priv *priv, struct rte_eth_dev *dev, int up)
 {
-	struct rte_eth_dev *dev = priv->dev;
 	int err;
 
 	if (up) {
 		err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
 		if (err)
 			return err;
-		priv_select_tx_function(priv);
-		priv_select_rx_function(priv);
+		priv_dev_select_tx_function(priv, dev);
+		priv_dev_select_rx_function(priv, dev);
 	} else {
 		err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP);
 		if (err)
@@ -1316,7 +1393,7 @@ mlx5_set_link_down(struct rte_eth_dev *dev)
 	int err;
 
 	priv_lock(priv);
-	err = priv_set_link(priv, 0);
+	err = priv_dev_set_link(priv, dev, 0);
 	priv_unlock(priv);
 	return err;
 }
@@ -1337,195 +1414,42 @@ mlx5_set_link_up(struct rte_eth_dev *dev)
 	int err;
 
 	priv_lock(priv);
-	err = priv_set_link(priv, 1);
+	err = priv_dev_set_link(priv, dev, 1);
 	priv_unlock(priv);
 	return err;
 }
 
 /**
- * Configure secondary process queues from a private data pointer (primary
- * or secondary) and update burst callbacks. Can take place only once.
- *
- * All queues must have been previously created by the primary process to
- * avoid undefined behavior.
- *
- * @param priv
- *   Private data pointer from either primary or secondary process.
- *
- * @return
- *   Private data pointer from secondary process, NULL in case of error.
- */
-struct priv *
-mlx5_secondary_data_setup(struct priv *priv)
-{
-	unsigned int port_id = 0;
-	struct mlx5_secondary_data *sd;
-	void **tx_queues;
-	void **rx_queues;
-	unsigned int nb_tx_queues;
-	unsigned int nb_rx_queues;
-	unsigned int i;
-
-	/* priv must be valid at this point. */
-	assert(priv != NULL);
-	/* priv->dev must also be valid but may point to local memory from
-	 * another process, possibly with the same address and must not
-	 * be dereferenced yet. */
-	assert(priv->dev != NULL);
-	/* Determine port ID by finding out where priv comes from. */
-	while (1) {
-		sd = &mlx5_secondary_data[port_id];
-		rte_spinlock_lock(&sd->lock);
-		/* Primary process? */
-		if (sd->primary_priv == priv)
-			break;
-		/* Secondary process? */
-		if (sd->data.dev_private == priv)
-			break;
-		rte_spinlock_unlock(&sd->lock);
-		if (++port_id == RTE_DIM(mlx5_secondary_data))
-			port_id = 0;
-	}
-	/* Switch to secondary private structure. If private data has already
-	 * been updated by another thread, there is nothing else to do. */
-	priv = sd->data.dev_private;
-	if (priv->dev->data == &sd->data)
-		goto end;
-	/* Sanity checks. Secondary private structure is supposed to point
-	 * to local eth_dev, itself still pointing to the shared device data
-	 * structure allocated by the primary process. */
-	assert(sd->shared_dev_data != &sd->data);
-	assert(sd->data.nb_tx_queues == 0);
-	assert(sd->data.tx_queues == NULL);
-	assert(sd->data.nb_rx_queues == 0);
-	assert(sd->data.rx_queues == NULL);
-	assert(priv != sd->primary_priv);
-	assert(priv->dev->data == sd->shared_dev_data);
-	assert(priv->txqs_n == 0);
-	assert(priv->txqs == NULL);
-	assert(priv->rxqs_n == 0);
-	assert(priv->rxqs == NULL);
-	nb_tx_queues = sd->shared_dev_data->nb_tx_queues;
-	nb_rx_queues = sd->shared_dev_data->nb_rx_queues;
-	/* Allocate local storage for queues. */
-	tx_queues = rte_zmalloc("secondary ethdev->tx_queues",
-				sizeof(sd->data.tx_queues[0]) * nb_tx_queues,
-				RTE_CACHE_LINE_SIZE);
-	rx_queues = rte_zmalloc("secondary ethdev->rx_queues",
-				sizeof(sd->data.rx_queues[0]) * nb_rx_queues,
-				RTE_CACHE_LINE_SIZE);
-	if (tx_queues == NULL || rx_queues == NULL)
-		goto error;
-	/* Lock to prevent control operations during setup. */
-	priv_lock(priv);
-	/* TX queues. */
-	for (i = 0; i != nb_tx_queues; ++i) {
-		struct txq *primary_txq = (*sd->primary_priv->txqs)[i];
-		struct txq_ctrl *primary_txq_ctrl;
-		struct txq_ctrl *txq_ctrl;
-
-		if (primary_txq == NULL)
-			continue;
-		primary_txq_ctrl = container_of(primary_txq,
-						struct txq_ctrl, txq);
-		txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl) +
-					     (1 << primary_txq->elts_n) *
-					     sizeof(struct rte_mbuf *), 0,
-					     primary_txq_ctrl->socket);
-		if (txq_ctrl != NULL) {
-			if (txq_ctrl_setup(priv->dev,
-					   txq_ctrl,
-					   1 << primary_txq->elts_n,
-					   primary_txq_ctrl->socket,
-					   NULL) == 0) {
-				txq_ctrl->txq.stats.idx =
-					primary_txq->stats.idx;
-				tx_queues[i] = &txq_ctrl->txq;
-				continue;
-			}
-			rte_free(txq_ctrl);
-		}
-		while (i) {
-			txq_ctrl = tx_queues[--i];
-			txq_cleanup(txq_ctrl);
-			rte_free(txq_ctrl);
-		}
-		goto error;
-	}
-	/* RX queues. */
-	for (i = 0; i != nb_rx_queues; ++i) {
-		struct rxq_ctrl *primary_rxq =
-			container_of((*sd->primary_priv->rxqs)[i],
-				     struct rxq_ctrl, rxq);
-
-		if (primary_rxq == NULL)
-			continue;
-		/* Not supported yet. */
-		rx_queues[i] = NULL;
-	}
-	/* Update everything. */
-	priv->txqs = (void *)tx_queues;
-	priv->txqs_n = nb_tx_queues;
-	priv->rxqs = (void *)rx_queues;
-	priv->rxqs_n = nb_rx_queues;
-	sd->data.rx_queues = rx_queues;
-	sd->data.tx_queues = tx_queues;
-	sd->data.nb_rx_queues = nb_rx_queues;
-	sd->data.nb_tx_queues = nb_tx_queues;
-	sd->data.dev_link = sd->shared_dev_data->dev_link;
-	sd->data.mtu = sd->shared_dev_data->mtu;
-	memcpy(sd->data.rx_queue_state, sd->shared_dev_data->rx_queue_state,
-	       sizeof(sd->data.rx_queue_state));
-	memcpy(sd->data.tx_queue_state, sd->shared_dev_data->tx_queue_state,
-	       sizeof(sd->data.tx_queue_state));
-	sd->data.dev_flags = sd->shared_dev_data->dev_flags;
-	/* Use local data from now on. */
-	rte_mb();
-	priv->dev->data = &sd->data;
-	rte_mb();
-	priv_select_tx_function(priv);
-	priv_select_rx_function(priv);
-	priv_unlock(priv);
-end:
-	/* More sanity checks. */
-	assert(priv->dev->data == &sd->data);
-	rte_spinlock_unlock(&sd->lock);
-	return priv;
-error:
-	priv_unlock(priv);
-	rte_free(tx_queues);
-	rte_free(rx_queues);
-	rte_spinlock_unlock(&sd->lock);
-	return NULL;
-}
-
-/**
  * Configure the TX function to use.
  *
  * @param priv
- *   Pointer to private structure.
+ *   Pointer to private data structure.
+ * @param dev
+ *   Pointer to rte_eth_dev structure.
  */
 void
-priv_select_tx_function(struct priv *priv)
+priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev)
 {
-	priv->dev->tx_pkt_burst = mlx5_tx_burst;
+	assert(priv != NULL);
+	assert(dev != NULL);
+	dev->tx_pkt_burst = mlx5_tx_burst;
 	/* Select appropriate TX function. */
 	if (priv->mps == MLX5_MPW_ENHANCED) {
 		if (priv_check_vec_tx_support(priv) > 0) {
 			if (priv_check_raw_vec_tx_support(priv) > 0)
-				priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
+				dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
 			else
-				priv->dev->tx_pkt_burst = mlx5_tx_burst_vec;
+				dev->tx_pkt_burst = mlx5_tx_burst_vec;
 			DEBUG("selected Enhanced MPW TX vectorized function");
 		} else {
-			priv->dev->tx_pkt_burst = mlx5_tx_burst_empw;
+			dev->tx_pkt_burst = mlx5_tx_burst_empw;
 			DEBUG("selected Enhanced MPW TX function");
 		}
 	} else if (priv->mps && priv->txq_inline) {
-		priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
+		dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
 		DEBUG("selected MPW inline TX function");
 	} else if (priv->mps) {
-		priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
+		dev->tx_pkt_burst = mlx5_tx_burst_mpw;
 		DEBUG("selected MPW TX function");
 	}
 }
@@ -1534,16 +1458,19 @@ priv_select_tx_function(struct priv *priv)
  * Configure the RX function to use.
  *
  * @param priv
- *   Pointer to private structure.
+ *   Pointer to private data structure.
+ * @param dev
+ *   Pointer to rte_eth_dev structure.
  */
 void
-priv_select_rx_function(struct priv *priv)
+priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev)
 {
+	assert(priv != NULL);
+	assert(dev != NULL);
 	if (priv_check_vec_rx_support(priv) > 0) {
-		priv_prep_vec_rx_function(priv);
-		priv->dev->rx_pkt_burst = mlx5_rx_burst_vec;
+		dev->rx_pkt_burst = mlx5_rx_burst_vec;
 		DEBUG("selected RX vectorized function");
 	} else {
-		priv->dev->rx_pkt_burst = mlx5_rx_burst;
+		dev->rx_pkt_burst = mlx5_rx_burst;
 	}
 }
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
deleted file mode 100644
index 34a7e69f..00000000
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ /dev/null
@@ -1,1101 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright 2015 6WIND S.A.
- *   Copyright 2015 Mellanox.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of 6WIND S.A. nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stddef.h>
-#include <assert.h>
-#include <stdint.h>
-#include <string.h>
-#include <errno.h>
-
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <rte_ether.h>
-#include <rte_malloc.h>
-#include <rte_ethdev.h>
-#include <rte_common.h>
-#include <rte_flow.h>
-#include <rte_flow_driver.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-#include "mlx5.h"
-#include "mlx5_rxtx.h"
-
-struct fdir_flow_desc {
-	uint16_t dst_port;
-	uint16_t src_port;
-	uint32_t src_ip[4];
-	uint32_t dst_ip[4];
-	uint8_t	mac[6];
-	uint16_t vlan_tag;
-	enum hash_rxq_type type;
-};
-
-struct mlx5_fdir_filter {
-	LIST_ENTRY(mlx5_fdir_filter) next;
-	uint16_t queue; /* Queue assigned to if FDIR match. */
-	enum rte_eth_fdir_behavior behavior;
-	struct fdir_flow_desc desc;
-	struct ibv_exp_flow *flow;
-};
-
-LIST_HEAD(fdir_filter_list, mlx5_fdir_filter);
-
-/**
- * Convert struct rte_eth_fdir_filter to mlx5 filter descriptor.
- *
- * @param[in] fdir_filter
- *   DPDK filter structure to convert.
- * @param[out] desc
- *   Resulting mlx5 filter descriptor.
- * @param mode
- *   Flow director mode.
- */
-static void
-fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
-			 struct fdir_flow_desc *desc, enum rte_fdir_mode mode)
-{
-	/* Initialize descriptor. */
-	memset(desc, 0, sizeof(*desc));
-
-	/* Set VLAN ID. */
-	desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci;
-
-	/* Set MAC address. */
-	if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		rte_memcpy(desc->mac,
-			   fdir_filter->input.flow.mac_vlan_flow.mac_addr.
-				addr_bytes,
-			   sizeof(desc->mac));
-		desc->type = HASH_RXQ_ETH;
-		return;
-	}
-
-	/* Set mode */
-	switch (fdir_filter->input.flow_type) {
-	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
-		desc->type = HASH_RXQ_UDPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-		desc->type = HASH_RXQ_TCPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
-		desc->type = HASH_RXQ_IPV4;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-		desc->type = HASH_RXQ_UDPV6;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-		desc->type = HASH_RXQ_TCPV6;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
-		desc->type = HASH_RXQ_IPV6;
-		break;
-	default:
-		break;
-	}
-
-	/* Set flow values */
-	switch (fdir_filter->input.flow_type) {
-	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
-	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-		desc->src_port = fdir_filter->input.flow.udp4_flow.src_port;
-		desc->dst_port = fdir_filter->input.flow.udp4_flow.dst_port;
-		/* fallthrough */
-	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
-		desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip;
-		desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip;
-		break;
-	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-		desc->src_port = fdir_filter->input.flow.udp6_flow.src_port;
-		desc->dst_port = fdir_filter->input.flow.udp6_flow.dst_port;
-		/* Fall through. */
-	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
-		rte_memcpy(desc->src_ip,
-			   fdir_filter->input.flow.ipv6_flow.src_ip,
-			   sizeof(desc->src_ip));
-		rte_memcpy(desc->dst_ip,
-			   fdir_filter->input.flow.ipv6_flow.dst_ip,
-			   sizeof(desc->dst_ip));
-		break;
-	default:
-		break;
-	}
-}
-
-/**
- * Check if two flow descriptors overlap according to configured mask.
- *
- * @param priv
- *   Private structure that provides flow director mask.
- * @param desc1
- *   First flow descriptor to compare.
- * @param desc2
- *   Second flow descriptor to compare.
- *
- * @return
- *   Nonzero if descriptors overlap.
- */
-static int
-priv_fdir_overlap(const struct priv *priv,
-		  const struct fdir_flow_desc *desc1,
-		  const struct fdir_flow_desc *desc2)
-{
-	const struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-	unsigned int i;
-
-	if (desc1->type != desc2->type)
-		return 0;
-	/* Ignore non masked bits. */
-	for (i = 0; i != RTE_DIM(desc1->mac); ++i)
-		if ((desc1->mac[i] & mask->mac_addr_byte_mask) !=
-		    (desc2->mac[i] & mask->mac_addr_byte_mask))
-			return 0;
-	if (((desc1->src_port & mask->src_port_mask) !=
-	     (desc2->src_port & mask->src_port_mask)) ||
-	    ((desc1->dst_port & mask->dst_port_mask) !=
-	     (desc2->dst_port & mask->dst_port_mask)))
-		return 0;
-	switch (desc1->type) {
-	case HASH_RXQ_IPV4:
-	case HASH_RXQ_UDPV4:
-	case HASH_RXQ_TCPV4:
-		if (((desc1->src_ip[0] & mask->ipv4_mask.src_ip) !=
-		     (desc2->src_ip[0] & mask->ipv4_mask.src_ip)) ||
-		    ((desc1->dst_ip[0] & mask->ipv4_mask.dst_ip) !=
-		     (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip)))
-			return 0;
-		break;
-	case HASH_RXQ_IPV6:
-	case HASH_RXQ_UDPV6:
-	case HASH_RXQ_TCPV6:
-		for (i = 0; i != RTE_DIM(desc1->src_ip); ++i)
-			if (((desc1->src_ip[i] & mask->ipv6_mask.src_ip[i]) !=
-			     (desc2->src_ip[i] & mask->ipv6_mask.src_ip[i])) ||
-			    ((desc1->dst_ip[i] & mask->ipv6_mask.dst_ip[i]) !=
-			     (desc2->dst_ip[i] & mask->ipv6_mask.dst_ip[i])))
-				return 0;
-		break;
-	default:
-		break;
-	}
-	return 1;
-}
-
-/**
- * Create flow director steering rule for a specific filter.
- *
- * @param priv
- *   Private structure.
- * @param mlx5_fdir_filter
- *   Filter to create a steering rule for.
- * @param fdir_queue
- *   Flow director queue for matching packets.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_flow_add(struct priv *priv,
-		   struct mlx5_fdir_filter *mlx5_fdir_filter,
-		   struct fdir_queue *fdir_queue)
-{
-	struct ibv_exp_flow *flow;
-	struct fdir_flow_desc *desc = &mlx5_fdir_filter->desc;
-	enum rte_fdir_mode fdir_mode =
-		priv->dev->data->dev_conf.fdir_conf.mode;
-	struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, desc->type));
-	struct ibv_exp_flow_attr *attr = &data->attr;
-	uintptr_t spec_offset = (uintptr_t)&data->spec;
-	struct ibv_exp_flow_spec_eth *spec_eth;
-	struct ibv_exp_flow_spec_ipv4 *spec_ipv4;
-	struct ibv_exp_flow_spec_ipv6 *spec_ipv6;
-	struct ibv_exp_flow_spec_tcp_udp *spec_tcp_udp;
-	struct mlx5_fdir_filter *iter_fdir_filter;
-	unsigned int i;
-
-	/* Abort if an existing flow overlaps this one to avoid packet
-	 * duplication, even if it targets another queue. */
-	LIST_FOREACH(iter_fdir_filter, priv->fdir_filter_list, next)
-		if ((iter_fdir_filter != mlx5_fdir_filter) &&
-		    (iter_fdir_filter->flow != NULL) &&
-		    (priv_fdir_overlap(priv,
-				       &mlx5_fdir_filter->desc,
-				       &iter_fdir_filter->desc)))
-			return EEXIST;
-
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec_offset);
-	priv_flow_attr(priv, attr, sizeof(data), desc->type);
-
-	/* Set Ethernet spec */
-	spec_eth = (struct ibv_exp_flow_spec_eth *)spec_offset;
-
-	/* The first specification must be Ethernet. */
-	assert(spec_eth->type == IBV_EXP_FLOW_SPEC_ETH);
-	assert(spec_eth->size == sizeof(*spec_eth));
-
-	/* VLAN ID */
-	spec_eth->val.vlan_tag = desc->vlan_tag & mask->vlan_tci_mask;
-	spec_eth->mask.vlan_tag = mask->vlan_tci_mask;
-
-	/* Update priority */
-	attr->priority = 2;
-
-	if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		/* MAC Address */
-		for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) {
-			spec_eth->val.dst_mac[i] =
-				desc->mac[i] & mask->mac_addr_byte_mask;
-			spec_eth->mask.dst_mac[i] = mask->mac_addr_byte_mask;
-		}
-		goto create_flow;
-	}
-
-	switch (desc->type) {
-	case HASH_RXQ_IPV4:
-	case HASH_RXQ_UDPV4:
-	case HASH_RXQ_TCPV4:
-		spec_offset += spec_eth->size;
-
-		/* Set IP spec */
-		spec_ipv4 = (struct ibv_exp_flow_spec_ipv4 *)spec_offset;
-
-		/* The second specification must be IP. */
-		assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4);
-		assert(spec_ipv4->size == sizeof(*spec_ipv4));
-
-		spec_ipv4->val.src_ip =
-			desc->src_ip[0] & mask->ipv4_mask.src_ip;
-		spec_ipv4->val.dst_ip =
-			desc->dst_ip[0] & mask->ipv4_mask.dst_ip;
-		spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip;
-		spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip;
-
-		/* Update priority */
-		attr->priority = 1;
-
-		if (desc->type == HASH_RXQ_IPV4)
-			goto create_flow;
-
-		spec_offset += spec_ipv4->size;
-		break;
-	case HASH_RXQ_IPV6:
-	case HASH_RXQ_UDPV6:
-	case HASH_RXQ_TCPV6:
-		spec_offset += spec_eth->size;
-
-		/* Set IP spec */
-		spec_ipv6 = (struct ibv_exp_flow_spec_ipv6 *)spec_offset;
-
-		/* The second specification must be IP. */
-		assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6);
-		assert(spec_ipv6->size == sizeof(*spec_ipv6));
-
-		for (i = 0; i != RTE_DIM(desc->src_ip); ++i) {
-			((uint32_t *)spec_ipv6->val.src_ip)[i] =
-				desc->src_ip[i] & mask->ipv6_mask.src_ip[i];
-			((uint32_t *)spec_ipv6->val.dst_ip)[i] =
-				desc->dst_ip[i] & mask->ipv6_mask.dst_ip[i];
-		}
-		rte_memcpy(spec_ipv6->mask.src_ip,
-			   mask->ipv6_mask.src_ip,
-			   sizeof(spec_ipv6->mask.src_ip));
-		rte_memcpy(spec_ipv6->mask.dst_ip,
-			   mask->ipv6_mask.dst_ip,
-			   sizeof(spec_ipv6->mask.dst_ip));
-
-		/* Update priority */
-		attr->priority = 1;
-
-		if (desc->type == HASH_RXQ_IPV6)
-			goto create_flow;
-
-		spec_offset += spec_ipv6->size;
-		break;
-	default:
-		ERROR("invalid flow attribute type");
-		return EINVAL;
-	}
-
-	/* Set TCP/UDP flow specification. */
-	spec_tcp_udp = (struct ibv_exp_flow_spec_tcp_udp *)spec_offset;
-
-	/* The third specification must be TCP/UDP. */
-	assert(spec_tcp_udp->type == IBV_EXP_FLOW_SPEC_TCP ||
-	       spec_tcp_udp->type == IBV_EXP_FLOW_SPEC_UDP);
-	assert(spec_tcp_udp->size == sizeof(*spec_tcp_udp));
-
-	spec_tcp_udp->val.src_port = desc->src_port & mask->src_port_mask;
-	spec_tcp_udp->val.dst_port = desc->dst_port & mask->dst_port_mask;
-	spec_tcp_udp->mask.src_port = mask->src_port_mask;
-	spec_tcp_udp->mask.dst_port = mask->dst_port_mask;
-
-	/* Update priority */
-	attr->priority = 0;
-
-create_flow:
-
-	errno = 0;
-	flow = ibv_exp_create_flow(fdir_queue->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow director configuration failed, errno=%d: %s",
-		      (void *)priv, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-
-	DEBUG("%p: added flow director rule (%p)", (void *)priv, (void *)flow);
-	mlx5_fdir_filter->flow = flow;
-	return 0;
-}
-
-/**
- * Destroy a flow director queue.
- *
- * @param fdir_queue
- *   Flow director queue to be destroyed.
- */
-void
-priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue)
-{
-	struct mlx5_fdir_filter *fdir_filter;
-
-	/* Disable filter flows still applying to this queue. */
-	LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) {
-		unsigned int idx = fdir_filter->queue;
-		struct rxq_ctrl *rxq_ctrl =
-			container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
-
-		assert(idx < priv->rxqs_n);
-		if (fdir_queue == rxq_ctrl->fdir_queue &&
-		    fdir_filter->flow != NULL) {
-			claim_zero(ibv_exp_destroy_flow(fdir_filter->flow));
-			fdir_filter->flow = NULL;
-		}
-	}
-	assert(fdir_queue->qp);
-	claim_zero(ibv_destroy_qp(fdir_queue->qp));
-	assert(fdir_queue->ind_table);
-	claim_zero(ibv_exp_destroy_rwq_ind_table(fdir_queue->ind_table));
-	if (fdir_queue->wq)
-		claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
-	if (fdir_queue->cq)
-		claim_zero(ibv_destroy_cq(fdir_queue->cq));
-#ifndef NDEBUG
-	memset(fdir_queue, 0x2a, sizeof(*fdir_queue));
-#endif
-	rte_free(fdir_queue);
-}
-
-/**
- * Create a flow director queue.
- *
- * @param priv
- *   Private structure.
- * @param wq
- *   Work queue to route matched packets to, NULL if one needs to
- *   be created.
- *
- * @return
- *   Related flow director queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_fdir_queue_create(struct priv *priv, struct ibv_exp_wq *wq,
-		       unsigned int socket)
-{
-	struct fdir_queue *fdir_queue;
-
-	fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue),
-				       0, socket);
-	if (!fdir_queue) {
-		ERROR("cannot allocate flow director queue");
-		return NULL;
-	}
-	assert(priv->pd);
-	assert(priv->ctx);
-	if (!wq) {
-		fdir_queue->cq = ibv_exp_create_cq(
-			priv->ctx, 1, NULL, NULL, 0,
-			&(struct ibv_exp_cq_init_attr){
-				.comp_mask = 0,
-			});
-		if (!fdir_queue->cq) {
-			ERROR("cannot create flow director CQ");
-			goto error;
-		}
-		fdir_queue->wq = ibv_exp_create_wq(
-			priv->ctx,
-			&(struct ibv_exp_wq_init_attr){
-				.wq_type = IBV_EXP_WQT_RQ,
-				.max_recv_wr = 1,
-				.max_recv_sge = 1,
-				.pd = priv->pd,
-				.cq = fdir_queue->cq,
-			});
-		if (!fdir_queue->wq) {
-			ERROR("cannot create flow director WQ");
-			goto error;
-		}
-		wq = fdir_queue->wq;
-	}
-	fdir_queue->ind_table = ibv_exp_create_rwq_ind_table(
-		priv->ctx,
-		&(struct ibv_exp_rwq_ind_table_init_attr){
-			.pd = priv->pd,
-			.log_ind_tbl_size = 0,
-			.ind_tbl = &wq,
-			.comp_mask = 0,
-		});
-	if (!fdir_queue->ind_table) {
-		ERROR("cannot create flow director indirection table");
-		goto error;
-	}
-	fdir_queue->qp = ibv_exp_create_qp(
-		priv->ctx,
-		&(struct ibv_exp_qp_init_attr){
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask =
-				IBV_EXP_QP_INIT_ATTR_PD |
-				IBV_EXP_QP_INIT_ATTR_PORT |
-				IBV_EXP_QP_INIT_ATTR_RX_HASH,
-			.pd = priv->pd,
-			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
-				.rx_hash_function =
-					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
-				.rx_hash_key_len = rss_hash_default_key_len,
-				.rx_hash_key = rss_hash_default_key,
-				.rx_hash_fields_mask = 0,
-				.rwq_ind_tbl = fdir_queue->ind_table,
-			},
-			.port_num = priv->port,
-		});
-	if (!fdir_queue->qp) {
-		ERROR("cannot create flow director hash RX QP");
-		goto error;
-	}
-	return fdir_queue;
-error:
-	assert(fdir_queue);
-	assert(!fdir_queue->qp);
-	if (fdir_queue->ind_table)
-		claim_zero(ibv_exp_destroy_rwq_ind_table
-			   (fdir_queue->ind_table));
-	if (fdir_queue->wq)
-		claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
-	if (fdir_queue->cq)
-		claim_zero(ibv_destroy_cq(fdir_queue->cq));
-	rte_free(fdir_queue);
-	return NULL;
-}
-
-/**
- * Get flow director queue for a specific RX queue, create it in case
- * it does not exist.
- *
- * @param priv
- *   Private structure.
- * @param idx
- *   RX queue index.
- *
- * @return
- *   Related flow director queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_get_fdir_queue(struct priv *priv, uint16_t idx)
-{
-	struct rxq_ctrl *rxq_ctrl =
-		container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
-	struct fdir_queue *fdir_queue = rxq_ctrl->fdir_queue;
-
-	assert(rxq_ctrl->wq);
-	if (fdir_queue == NULL) {
-		fdir_queue = priv_fdir_queue_create(priv, rxq_ctrl->wq,
-						    rxq_ctrl->socket);
-		rxq_ctrl->fdir_queue = fdir_queue;
-	}
-	return fdir_queue;
-}
-
-/**
- * Get or flow director drop queue. Create it if it does not exist.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   Flow director drop queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_get_fdir_drop_queue(struct priv *priv)
-{
-	struct fdir_queue *fdir_queue = priv->fdir_drop_queue;
-
-	if (fdir_queue == NULL) {
-		unsigned int socket = SOCKET_ID_ANY;
-
-		/* Select a known NUMA socket if possible. */
-		if (priv->rxqs_n && (*priv->rxqs)[0])
-			socket = container_of((*priv->rxqs)[0],
-					      struct rxq_ctrl, rxq)->socket;
-		fdir_queue = priv_fdir_queue_create(priv, NULL, socket);
-		priv->fdir_drop_queue = fdir_queue;
-	}
-	return fdir_queue;
-}
-
-/**
- * Enable flow director filter and create steering rules.
- *
- * @param priv
- *   Private structure.
- * @param mlx5_fdir_filter
- *   Filter to create steering rule for.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_enable(struct priv *priv,
-			struct mlx5_fdir_filter *mlx5_fdir_filter)
-{
-	struct fdir_queue *fdir_queue;
-
-	/* Check if flow already exists. */
-	if (mlx5_fdir_filter->flow != NULL)
-		return 0;
-
-	/* Get fdir_queue for specific queue. */
-	if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT)
-		fdir_queue = priv_get_fdir_drop_queue(priv);
-	else
-		fdir_queue = priv_get_fdir_queue(priv,
-						 mlx5_fdir_filter->queue);
-
-	if (fdir_queue == NULL) {
-		ERROR("failed to create flow director rxq for queue %d",
-		      mlx5_fdir_filter->queue);
-		return EINVAL;
-	}
-
-	/* Create flow */
-	return priv_fdir_flow_add(priv, mlx5_fdir_filter, fdir_queue);
-}
-
-/**
- * Initialize flow director filters list.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-fdir_init_filters_list(struct priv *priv)
-{
-	/* Filter list initialization should be done only once. */
-	if (priv->fdir_filter_list)
-		return 0;
-
-	/* Create filters list. */
-	priv->fdir_filter_list =
-		rte_calloc(__func__, 1, sizeof(*priv->fdir_filter_list), 0);
-
-	if (priv->fdir_filter_list == NULL) {
-		int err = ENOMEM;
-
-		ERROR("cannot allocate flow director filter list: %s",
-		      strerror(err));
-		return err;
-	}
-
-	LIST_INIT(priv->fdir_filter_list);
-
-	return 0;
-}
-
-/**
- * Flush all filters.
- *
- * @param priv
- *   Private structure.
- */
-static void
-priv_fdir_filter_flush(struct priv *priv)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	while ((mlx5_fdir_filter = LIST_FIRST(priv->fdir_filter_list))) {
-		struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
-
-		DEBUG("%p: flushing flow director filter %p",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-		LIST_REMOVE(mlx5_fdir_filter, next);
-		if (flow != NULL)
-			claim_zero(ibv_exp_destroy_flow(flow));
-		rte_free(mlx5_fdir_filter);
-	}
-}
-
-/**
- * Remove all flow director filters and delete list.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_delete_filters_list(struct priv *priv)
-{
-	priv_fdir_filter_flush(priv);
-	rte_free(priv->fdir_filter_list);
-	priv->fdir_filter_list = NULL;
-}
-
-/**
- * Disable flow director, remove all steering rules.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_disable(struct priv *priv)
-{
-	unsigned int i;
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Run on every flow director filter and destroy flow handle. */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		struct ibv_exp_flow *flow;
-
-		/* Only valid elements should be in the list */
-		assert(mlx5_fdir_filter != NULL);
-		flow = mlx5_fdir_filter->flow;
-
-		/* Destroy flow handle */
-		if (flow != NULL) {
-			claim_zero(ibv_exp_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-	}
-
-	/* Destroy flow director context in each RX queue. */
-	for (i = 0; (i != priv->rxqs_n); i++) {
-		struct rxq_ctrl *rxq_ctrl;
-
-		if (!(*priv->rxqs)[i])
-			continue;
-		rxq_ctrl = container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq);
-		if (!rxq_ctrl->fdir_queue)
-			continue;
-		priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue);
-		rxq_ctrl->fdir_queue = NULL;
-	}
-	if (priv->fdir_drop_queue) {
-		priv_fdir_queue_destroy(priv, priv->fdir_drop_queue);
-		priv->fdir_drop_queue = NULL;
-	}
-}
-
-/**
- * Enable flow director, create steering rules.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_fdir_enable(struct priv *priv)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Run on every fdir filter and create flow handle */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		/* Only valid elements should be in the list */
-		assert(mlx5_fdir_filter != NULL);
-
-		priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-	}
-}
-
-/**
- * Find specific filter in list.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Flow director filter to find.
- *
- * @return
- *   Filter element if found, otherwise NULL.
- */
-static struct mlx5_fdir_filter *
-priv_find_filter_in_list(struct priv *priv,
-			 const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct fdir_flow_desc desc;
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-	enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
-
-	/* Get flow director filter to look for. */
-	fdir_filter_to_flow_desc(fdir_filter, &desc, fdir_mode);
-
-	/* Look for the requested element. */
-	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
-		/* Only valid elements should be in the list. */
-		assert(mlx5_fdir_filter != NULL);
-
-		/* Return matching filter. */
-		if (!memcmp(&desc, &mlx5_fdir_filter->desc, sizeof(desc)))
-			return mlx5_fdir_filter;
-	}
-
-	/* Filter not found */
-	return NULL;
-}
-
-/**
- * Add new flow director filter and store it in list.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Flow director filter to add.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_add(struct priv *priv,
-		     const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-	enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
-	int err = 0;
-
-	/* Validate queue number. */
-	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
-		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
-		return EINVAL;
-	}
-
-	/* Duplicate filters are currently unsupported. */
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		ERROR("filter already exists");
-		return EINVAL;
-	}
-
-	/* Create new flow director filter. */
-	mlx5_fdir_filter =
-		rte_calloc(__func__, 1, sizeof(*mlx5_fdir_filter), 0);
-	if (mlx5_fdir_filter == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate flow director filter: %s",
-		      strerror(err));
-		return err;
-	}
-
-	/* Set action parameters. */
-	mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
-	mlx5_fdir_filter->behavior = fdir_filter->action.behavior;
-
-	/* Convert to mlx5 filter descriptor. */
-	fdir_filter_to_flow_desc(fdir_filter,
-				 &mlx5_fdir_filter->desc, fdir_mode);
-
-	/* Insert new filter into list. */
-	LIST_INSERT_HEAD(priv->fdir_filter_list, mlx5_fdir_filter, next);
-
-	DEBUG("%p: flow director filter %p added",
-	      (void *)priv, (void *)mlx5_fdir_filter);
-
-	/* Enable filter immediately if device is started. */
-	if (priv->started)
-		err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-
-	return err;
-}
-
-/**
- * Update queue for specific filter.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Filter to be updated.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_update(struct priv *priv,
-			const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	/* Validate queue number. */
-	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
-		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
-		return EINVAL;
-	}
-
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
-		int err = 0;
-
-		/* Update queue number. */
-		mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
-
-		/* Destroy flow handle. */
-		if (flow != NULL) {
-			claim_zero(ibv_exp_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-		DEBUG("%p: flow director filter %p updated",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-
-		/* Enable filter if device is started. */
-		if (priv->started)
-			err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-
-		return err;
-	}
-
-	/* Filter not found, create it. */
-	DEBUG("%p: filter not found for update, creating new filter",
-	      (void *)priv);
-	return priv_fdir_filter_add(priv, fdir_filter);
-}
-
-/**
- * Delete specific filter.
- *
- * @param priv
- *   Private structure.
- * @param fdir_filter
- *   Filter to be deleted.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_delete(struct priv *priv,
-			const struct rte_eth_fdir_filter *fdir_filter)
-{
-	struct mlx5_fdir_filter *mlx5_fdir_filter;
-
-	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
-	if (mlx5_fdir_filter != NULL) {
-		struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
-
-		/* Remove element from list. */
-		LIST_REMOVE(mlx5_fdir_filter, next);
-
-		/* Destroy flow handle. */
-		if (flow != NULL) {
-			claim_zero(ibv_exp_destroy_flow(flow));
-			mlx5_fdir_filter->flow = NULL;
-		}
-
-		DEBUG("%p: flow director filter %p deleted",
-		      (void *)priv, (void *)mlx5_fdir_filter);
-
-		/* Delete filter. */
-		rte_free(mlx5_fdir_filter);
-
-		return 0;
-	}
-
-	ERROR("%p: flow director delete failed, cannot find filter",
-	      (void *)priv);
-	return EINVAL;
-}
-
-/**
- * Get flow director information.
- *
- * @param priv
- *   Private structure.
- * @param[out] fdir_info
- *   Resulting flow director information.
- */
-static void
-priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
-{
-	struct rte_eth_fdir_masks *mask =
-		&priv->dev->data->dev_conf.fdir_conf.mask;
-
-	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
-	fdir_info->guarant_spc = 0;
-
-	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
-
-	fdir_info->max_flexpayload = 0;
-	fdir_info->flow_types_mask[0] = 0;
-
-	fdir_info->flex_payload_unit = 0;
-	fdir_info->max_flex_payload_segment_num = 0;
-	fdir_info->flex_payload_limit = 0;
-	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
-}
-
-/**
- * Deal with flow director operations.
- *
- * @param priv
- *   Pointer to private structure.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
-{
-	enum rte_fdir_mode fdir_mode =
-		priv->dev->data->dev_conf.fdir_conf.mode;
-	int ret = 0;
-
-	if (filter_op == RTE_ETH_FILTER_NOP)
-		return 0;
-
-	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
-	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-		ERROR("%p: flow director mode %d not supported",
-		      (void *)priv, fdir_mode);
-		return EINVAL;
-	}
-
-	switch (filter_op) {
-	case RTE_ETH_FILTER_ADD:
-		ret = priv_fdir_filter_add(priv, arg);
-		break;
-	case RTE_ETH_FILTER_UPDATE:
-		ret = priv_fdir_filter_update(priv, arg);
-		break;
-	case RTE_ETH_FILTER_DELETE:
-		ret = priv_fdir_filter_delete(priv, arg);
-		break;
-	case RTE_ETH_FILTER_FLUSH:
-		priv_fdir_filter_flush(priv);
-		break;
-	case RTE_ETH_FILTER_INFO:
-		priv_fdir_info_get(priv, arg);
-		break;
-	default:
-		DEBUG("%p: unknown operation %u", (void *)priv, filter_op);
-		ret = EINVAL;
-		break;
-	}
-	return ret;
-}
-
-static const struct rte_flow_ops mlx5_flow_ops = {
-	.validate = mlx5_flow_validate,
-	.create = mlx5_flow_create,
-	.destroy = mlx5_flow_destroy,
-	.flush = mlx5_flow_flush,
-	.query = NULL,
-	.isolate = mlx5_flow_isolate,
-};
-
-/**
- * Manage filter operations.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param filter_type
- *   Filter type.
- * @param filter_op
- *   Operation to perform.
- * @param arg
- *   Pointer to operation-specific structure.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
-		     enum rte_filter_type filter_type,
-		     enum rte_filter_op filter_op,
-		     void *arg)
-{
-	int ret = EINVAL;
-	struct priv *priv = dev->data->dev_private;
-
-	switch (filter_type) {
-	case RTE_ETH_FILTER_GENERIC:
-		if (filter_op != RTE_ETH_FILTER_GET)
-			return -EINVAL;
-		*(const void **)arg = &mlx5_flow_ops;
-		return 0;
-	case RTE_ETH_FILTER_FDIR:
-		priv_lock(priv);
-		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
-		priv_unlock(priv);
-		break;
-	default:
-		ERROR("%p: filter type (%d) not supported",
-		      (void *)dev, filter_type);
-		break;
-	}
-
-	return -ret;
-}
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 86be9291..cd99cb07 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -52,13 +52,36 @@
 #include "mlx5.h"
 #include "mlx5_prm.h"
 
-/* Number of Work Queue necessary for the DROP queue. */
-#ifndef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
-#define MLX5_DROP_WQ_N 4
-#else
-#define MLX5_DROP_WQ_N 1
+/* Define minimal priority for control plane flows. */
+#define MLX5_CTRL_FLOW_PRIORITY 4
+
+/* Internet Protocol versions. */
+#define MLX5_IPV4 4
+#define MLX5_IPV6 6
+
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+struct ibv_counter_set_init_attr {
+	int dummy;
+};
+struct ibv_flow_spec_counter_action {
+	int dummy;
+};
+struct ibv_counter_set {
+	int dummy;
+};
+
+static inline int
+ibv_destroy_counter_set(struct ibv_counter_set *cs)
+{
+	(void)cs;
+	return -ENOTSUP;
+}
 #endif
 
+/* Dev ops structure defined in mlx5.c */
+extern const struct eth_dev_ops mlx5_dev_ops;
+extern const struct eth_dev_ops mlx5_dev_ops_isolate;
+
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item,
 		     const void *default_mask,
@@ -94,19 +117,144 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 		       const void *default_mask,
 		       void *data);
 
-struct rte_flow {
-	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
-	struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
+struct mlx5_flow_parse;
+
+static void
+mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
+		      unsigned int size);
+
+static int
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
+
+static int
+mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
+
+/* Hash RX queue types. */
+enum hash_rxq_type {
+	HASH_RXQ_TCPV4,
+	HASH_RXQ_UDPV4,
+	HASH_RXQ_IPV4,
+	HASH_RXQ_TCPV6,
+	HASH_RXQ_UDPV6,
+	HASH_RXQ_IPV6,
+	HASH_RXQ_ETH,
+};
+
+/* Initialization data for hash RX queue. */
+struct hash_rxq_init {
+	uint64_t hash_fields; /* Fields that participate in the hash. */
+	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
+	unsigned int flow_priority; /* Flow priority to use. */
+	unsigned int ip_version; /* Internet protocol. */
+};
+
+/* Initialization data for hash RX queues. */
+const struct hash_rxq_init hash_rxq_init[] = {
+	[HASH_RXQ_TCPV4] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+				IBV_RX_HASH_DST_IPV4 |
+				IBV_RX_HASH_SRC_PORT_TCP |
+				IBV_RX_HASH_DST_PORT_TCP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV4,
+	},
+	[HASH_RXQ_UDPV4] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+				IBV_RX_HASH_DST_IPV4 |
+				IBV_RX_HASH_SRC_PORT_UDP |
+				IBV_RX_HASH_DST_PORT_UDP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV4,
+	},
+	[HASH_RXQ_IPV4] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+				IBV_RX_HASH_DST_IPV4),
+		.dpdk_rss_hf = (ETH_RSS_IPV4 |
+				ETH_RSS_FRAG_IPV4),
+		.flow_priority = 1,
+		.ip_version = MLX5_IPV4,
+	},
+	[HASH_RXQ_TCPV6] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+				IBV_RX_HASH_DST_IPV6 |
+				IBV_RX_HASH_SRC_PORT_TCP |
+				IBV_RX_HASH_DST_PORT_TCP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV6,
+	},
+	[HASH_RXQ_UDPV6] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+				IBV_RX_HASH_DST_IPV6 |
+				IBV_RX_HASH_SRC_PORT_UDP |
+				IBV_RX_HASH_DST_PORT_UDP),
+		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
+		.flow_priority = 0,
+		.ip_version = MLX5_IPV6,
+	},
+	[HASH_RXQ_IPV6] = {
+		.hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+				IBV_RX_HASH_DST_IPV6),
+		.dpdk_rss_hf = (ETH_RSS_IPV6 |
+				ETH_RSS_FRAG_IPV6),
+		.flow_priority = 1,
+		.ip_version = MLX5_IPV6,
+	},
+	[HASH_RXQ_ETH] = {
+		.hash_fields = 0,
+		.dpdk_rss_hf = 0,
+		.flow_priority = 2,
+	},
+};
+
+/* Number of entries in hash_rxq_init[]. */
+const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
+
+/** Structure for holding counter stats. */
+struct mlx5_flow_counter_stats {
+	uint64_t hits; /**< Number of packets matched by the rule. */
+	uint64_t bytes; /**< Number of bytes matched by the rule. */
+};
+
+/** Structure for Drop queue. */
+struct mlx5_hrxq_drop {
+	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
 	struct ibv_qp *qp; /**< Verbs queue pair. */
-	struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
-	struct ibv_exp_wq *wq; /**< Verbs work queue. */
+	struct ibv_wq *wq; /**< Verbs work queue. */
 	struct ibv_cq *cq; /**< Verbs completion queue. */
-	uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
+};
+
+/* Flows structures. */
+struct mlx5_flow {
+	uint64_t hash_fields; /**< Fields that participate in the hash. */
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
+	struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
+};
+
+/* Drop flows structures. */
+struct mlx5_flow_drop {
+	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_flow *ibv_flow; /**< Verbs flow. */
+};
+
+struct rte_flow {
+	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
-	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	struct rxq *rxqs[]; /**< Pointer to the queues array. */
+	uint16_t queues_n; /**< Number of entries in queue[]. */
+	uint16_t (*queues)[]; /**< Queues indexes to use. */
+	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
+	uint8_t rss_key[40]; /**< copy of the RSS key. */
+	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
+	struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
+	union {
+		struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
+		/**< Flow with Rx queue. */
+		struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
+	};
 };
 
 /** Static initializer for items. */
@@ -157,6 +305,9 @@ static const enum rte_flow_action_type valid_actions[] = {
 	RTE_FLOW_ACTION_TYPE_QUEUE,
 	RTE_FLOW_ACTION_TYPE_MARK,
 	RTE_FLOW_ACTION_TYPE_FLAG,
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	RTE_FLOW_ACTION_TYPE_COUNT,
+#endif
 	RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -179,7 +330,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.default_mask = &rte_flow_item_eth_mask,
 		.mask_sz = sizeof(struct rte_flow_item_eth),
 		.convert = mlx5_flow_create_eth,
-		.dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
+		.dst_sz = sizeof(struct ibv_flow_spec_eth),
 	},
 	[RTE_FLOW_ITEM_TYPE_VLAN] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
@@ -208,7 +359,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.default_mask = &rte_flow_item_ipv4_mask,
 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
 		.convert = mlx5_flow_create_ipv4,
-		.dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
+		.dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
 	},
 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
@@ -236,7 +387,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.default_mask = &rte_flow_item_ipv6_mask,
 		.mask_sz = sizeof(struct rte_flow_item_ipv6),
 		.convert = mlx5_flow_create_ipv6,
-		.dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
+		.dst_sz = sizeof(struct ibv_flow_spec_ipv6),
 	},
 	[RTE_FLOW_ITEM_TYPE_UDP] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
@@ -250,7 +401,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.default_mask = &rte_flow_item_udp_mask,
 		.mask_sz = sizeof(struct rte_flow_item_udp),
 		.convert = mlx5_flow_create_udp,
-		.dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
+		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
 	},
 	[RTE_FLOW_ITEM_TYPE_TCP] = {
 		.actions = valid_actions,
@@ -263,7 +414,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.default_mask = &rte_flow_item_tcp_mask,
 		.mask_sz = sizeof(struct rte_flow_item_tcp),
 		.convert = mlx5_flow_create_tcp,
-		.dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
+		.dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
 	},
 	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
@@ -274,33 +425,76 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.default_mask = &rte_flow_item_vxlan_mask,
 		.mask_sz = sizeof(struct rte_flow_item_vxlan),
 		.convert = mlx5_flow_create_vxlan,
-		.dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
+		.dst_sz = sizeof(struct ibv_flow_spec_tunnel),
 	},
 };
 
 /** Structure to pass to the conversion function. */
-struct mlx5_flow {
-	struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
-	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+struct mlx5_flow_parse {
 	uint32_t inner; /**< Set once VXLAN is encountered. */
-	uint64_t hash_fields; /**< Fields that participate in the hash. */
-};
-
-/** Structure for Drop queue. */
-struct rte_flow_drop {
-	struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
-	struct ibv_qp *qp; /**< Verbs queue pair. */
-	struct ibv_exp_wq *wqs[MLX5_DROP_WQ_N]; /**< Verbs work queue. */
-	struct ibv_cq *cq; /**< Verbs completion queue. */
-};
-
-struct mlx5_flow_action {
-	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t create:1;
+	/**< Whether resources should remain after a validate. */
 	uint32_t drop:1; /**< Target is a drop queue. */
 	uint32_t mark:1; /**< Mark is present in the flow. */
+	uint32_t count:1; /**< Count is present in the flow. */
 	uint32_t mark_id; /**< Mark identifier. */
 	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
 	uint16_t queues_n; /**< Number of entries in queue[]. */
+	struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
+	uint8_t rss_key[40]; /**< copy of the RSS key. */
+	enum hash_rxq_type layer; /**< Last pattern layer detected. */
+	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
+	union {
+		struct {
+			struct ibv_flow_attr *ibv_attr;
+			/**< Pointer to Verbs attributes. */
+			unsigned int offset;
+			/**< Current position or total size of the attribute. */
+		} queue[RTE_DIM(hash_rxq_init)];
+		struct {
+			struct ibv_flow_attr *ibv_attr;
+			/**< Pointer to Verbs attributes. */
+			unsigned int offset;
+			/**< Current position or total size of the attribute. */
+		} drop_q;
+	};
+};
+
+static const struct rte_flow_ops mlx5_flow_ops = {
+	.validate = mlx5_flow_validate,
+	.create = mlx5_flow_create,
+	.destroy = mlx5_flow_destroy,
+	.flush = mlx5_flow_flush,
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	.query = mlx5_flow_query,
+#else
+	.query = NULL,
+#endif
+	.isolate = mlx5_flow_isolate,
+};
+
+/* Convert FDIR request to Generic flow. */
+struct mlx5_fdir {
+	struct rte_flow_attr attr;
+	struct rte_flow_action actions[2];
+	struct rte_flow_item items[4];
+	struct rte_flow_item_eth l2;
+	struct rte_flow_item_eth l2_mask;
+	union {
+		struct rte_flow_item_ipv4 ipv4;
+		struct rte_flow_item_ipv6 ipv6;
+	} l3;
+	union {
+		struct rte_flow_item_udp udp;
+		struct rte_flow_item_tcp tcp;
+	} l4;
+	struct rte_flow_action_queue queue;
+};
+
+/* Verbs specification header. */
+struct ibv_spec_header {
+	enum ibv_flow_spec_type type;
+	uint16_t size;
 };
 
 /**
@@ -367,38 +561,58 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
 }
 
 /**
- * Validate a flow supported by the NIC.
+ * Copy the RSS configuration from the user ones.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param parser
+ *   Internal parser structure.
+ * @param rss_conf
+ *   User RSS configuration to save.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_flow_convert_rss_conf(struct priv *priv,
+			   struct mlx5_flow_parse *parser,
+			   const struct rte_eth_rss_conf *rss_conf)
+{
+	const struct rte_eth_rss_conf *rss =
+		rss_conf ? rss_conf : &priv->rss_conf;
+
+	if (rss->rss_key_len > 40)
+		return EINVAL;
+	parser->rss_conf.rss_key_len = rss->rss_key_len;
+	parser->rss_conf.rss_hf = rss->rss_hf;
+	memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
+	parser->rss_conf.rss_key = parser->rss_key;
+	return 0;
+}
+
+/**
+ * Extract attribute to the parser.
  *
  * @param priv
  *   Pointer to private structure.
  * @param[in] attr
  *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification (list terminated by the END pattern item).
- * @param[in] actions
- *   Associated actions (list terminated by the END action).
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
- * @param[in, out] flow
- *   Flow structure to update.
- * @param[in, out] action
- *   Action structure to update.
+ * @param[in, out] parser
+ *   Internal parser structure.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_validate(struct priv *priv,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error,
-		   struct mlx5_flow *flow,
-		   struct mlx5_flow_action *action)
+priv_flow_convert_attributes(struct priv *priv,
+			     const struct rte_flow_attr *attr,
+			     struct rte_flow_error *error,
+			     struct mlx5_flow_parse *parser)
 {
-	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
-
 	(void)priv;
+	(void)parser;
 	if (attr->group) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
@@ -406,7 +620,7 @@ priv_flow_validate(struct priv *priv,
 				   "groups are not supported");
 		return -rte_errno;
 	}
-	if (attr->priority) {
+	if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
 		rte_flow_error_set(error, ENOTSUP,
 				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
 				   NULL,
@@ -427,56 +641,42 @@ priv_flow_validate(struct priv *priv,
 				   "only ingress is supported");
 		return -rte_errno;
 	}
-	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
-		const struct mlx5_flow_items *token = NULL;
-		unsigned int i;
-		int err;
+	return 0;
+}
 
-		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
-			continue;
-		for (i = 0;
-		     cur_item->items &&
-		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
-		     ++i) {
-			if (cur_item->items[i] == items->type) {
-				token = &mlx5_flow_items[items->type];
-				break;
-			}
-		}
-		if (!token)
-			goto exit_item_not_supported;
-		cur_item = token;
-		err = mlx5_flow_item_validate(items,
-					      (const uint8_t *)cur_item->mask,
-					      cur_item->mask_sz);
-		if (err)
-			goto exit_item_not_supported;
-		if (flow->ibv_attr && cur_item->convert) {
-			err = cur_item->convert(items,
-						(cur_item->default_mask ?
-						 cur_item->default_mask :
-						 cur_item->mask),
-						flow);
-			if (err)
-				goto exit_item_not_supported;
-		} else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
-			if (flow->inner) {
-				rte_flow_error_set(error, ENOTSUP,
-						   RTE_FLOW_ERROR_TYPE_ITEM,
-						   items,
-						   "cannot recognize multiple"
-						   " VXLAN encapsulations");
-				return -rte_errno;
-			}
-			flow->inner = 1;
-		}
-		flow->offset += cur_item->dst_sz;
-	}
+/**
+ * Extract actions request to the parser.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_convert_actions(struct priv *priv,
+			  const struct rte_flow_action actions[],
+			  struct rte_flow_error *error,
+			  struct mlx5_flow_parse *parser)
+{
+	/*
+	 * Add default RSS configuration necessary for Verbs to create QP even
+	 * if no RSS is necessary.
+	 */
+	priv_flow_convert_rss_conf(priv, parser,
+				   (const struct rte_eth_rss_conf *)
+				   &priv->rss_conf);
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
 			continue;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
-			action->drop = 1;
+			parser->drop = 1;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
 			const struct rte_flow_action_queue *queue =
 				(const struct rte_flow_action_queue *)
@@ -486,13 +686,13 @@ priv_flow_validate(struct priv *priv,
 
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
-			for (n = 0; n < action->queues_n; ++n) {
-				if (action->queues[n] == queue->index) {
+			for (n = 0; n < parser->queues_n; ++n) {
+				if (parser->queues[n] == queue->index) {
 					found = 1;
 					break;
 				}
 			}
-			if (action->queues_n > 1 && !found) {
+			if (parser->queues_n > 1 && !found) {
 				rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
 					   actions,
@@ -500,9 +700,8 @@ priv_flow_validate(struct priv *priv,
 				return -rte_errno;
 			}
 			if (!found) {
-				action->queue = 1;
-				action->queues_n = 1;
-				action->queues[0] = queue->index;
+				parser->queues_n = 1;
+				parser->queues[0] = queue->index;
 			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
 			const struct rte_flow_action_rss *rss =
@@ -517,12 +716,12 @@ priv_flow_validate(struct priv *priv,
 						   "no valid queues");
 				return -rte_errno;
 			}
-			if (action->queues_n == 1) {
+			if (parser->queues_n == 1) {
 				uint16_t found = 0;
 
-				assert(action->queues_n);
+				assert(parser->queues_n);
 				for (n = 0; n < rss->num; ++n) {
-					if (action->queues[0] ==
+					if (parser->queues[0] ==
 					    rss->queue[n]) {
 						found = 1;
 						break;
@@ -547,10 +746,17 @@ priv_flow_validate(struct priv *priv,
 					return -rte_errno;
 				}
 			}
-			action->queue = 1;
 			for (n = 0; n < rss->num; ++n)
-				action->queues[n] = rss->queue[n];
-			action->queues_n = rss->num;
+				parser->queues[n] = rss->queue[n];
+			parser->queues_n = rss->num;
+			if (priv_flow_convert_rss_conf(priv, parser,
+						       rss->rss_conf)) {
+				rte_flow_error_set(error, EINVAL,
+						   RTE_FLOW_ERROR_TYPE_ACTION,
+						   actions,
+						   "wrong RSS configuration");
+				return -rte_errno;
+			}
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
 			const struct rte_flow_action_mark *mark =
 				(const struct rte_flow_action_mark *)
@@ -570,30 +776,25 @@ priv_flow_validate(struct priv *priv,
 						   " and 16777199");
 				return -rte_errno;
 			}
-			action->mark = 1;
-			action->mark_id = mark->id;
+			parser->mark = 1;
+			parser->mark_id = mark->id;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
-			action->mark = 1;
+			parser->mark = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
+			   priv->counter_set_supported) {
+			parser->count = 1;
 		} else {
 			goto exit_action_not_supported;
 		}
 	}
-	if (action->mark && !flow->ibv_attr && !action->drop)
-		flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
-#ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
-	if (!flow->ibv_attr && action->drop)
-		flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
-#endif
-	if (!action->queue && !action->drop) {
+	if (parser->drop && parser->mark)
+		parser->mark = 0;
+	if (!parser->queues_n && !parser->drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
 		return -rte_errno;
 	}
 	return 0;
-exit_item_not_supported:
-	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
-			   items, "item not supported");
-	return -rte_errno;
 exit_action_not_supported:
 	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
 			   actions, "action not supported");
@@ -601,34 +802,467 @@ exit_action_not_supported:
 }
 
 /**
- * Validate a flow supported by the NIC.
+ * Validate items.
  *
- * @see rte_flow_validate()
- * @see rte_flow_ops
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] items
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-int
-mlx5_flow_validate(struct rte_eth_dev *dev,
-		   const struct rte_flow_attr *attr,
-		   const struct rte_flow_item items[],
-		   const struct rte_flow_action actions[],
-		   struct rte_flow_error *error)
+static int
+priv_flow_convert_items_validate(struct priv *priv,
+				 const struct rte_flow_item items[],
+				 struct rte_flow_error *error,
+				 struct mlx5_flow_parse *parser)
 {
-	struct priv *priv = dev->data->dev_private;
+	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
+	unsigned int i;
+
+	(void)priv;
+	/* Initialise the offsets to start after verbs attribute. */
+	if (parser->drop) {
+		parser->drop_q.offset = sizeof(struct ibv_flow_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i)
+			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
+	}
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+		const struct mlx5_flow_items *token = NULL;
+		unsigned int n;
+		int err;
+
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		for (i = 0;
+		     cur_item->items &&
+		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+		     ++i) {
+			if (cur_item->items[i] == items->type) {
+				token = &mlx5_flow_items[items->type];
+				break;
+			}
+		}
+		if (!token)
+			goto exit_item_not_supported;
+		cur_item = token;
+		err = mlx5_flow_item_validate(items,
+					      (const uint8_t *)cur_item->mask,
+					      cur_item->mask_sz);
+		if (err)
+			goto exit_item_not_supported;
+		if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
+			if (parser->inner) {
+				rte_flow_error_set(error, ENOTSUP,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   items,
+						   "cannot recognize multiple"
+						   " VXLAN encapsulations");
+				return -rte_errno;
+			}
+			parser->inner = IBV_FLOW_SPEC_INNER;
+		}
+		if (parser->drop) {
+			parser->drop_q.offset += cur_item->dst_sz;
+		} else if (parser->queues_n == 1) {
+			parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
+		} else {
+			for (n = 0; n != hash_rxq_init_n; ++n)
+				parser->queue[n].offset += cur_item->dst_sz;
+		}
+	}
+	if (parser->mark) {
+		for (i = 0; i != hash_rxq_init_n; ++i)
+			parser->queue[i].offset +=
+				sizeof(struct ibv_flow_spec_action_tag);
+	}
+	if (parser->count) {
+		unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+
+		if (parser->drop) {
+			parser->drop_q.offset += size;
+		} else {
+			for (i = 0; i != hash_rxq_init_n; ++i)
+				parser->queue[i].offset += size;
+		}
+	}
+	return 0;
+exit_item_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+			   items, "item not supported");
+	return -rte_errno;
+}
+
+/**
+ * Allocate memory space to store verbs flow attributes.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] priority
+ *   Flow priority.
+ * @param[in] size
+ *   Amount of byte to allocate.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A verbs flow attribute on success, NULL otherwise.
+ */
+static struct ibv_flow_attr*
+priv_flow_convert_allocate(struct priv *priv,
+			   unsigned int priority,
+			   unsigned int size,
+			   struct rte_flow_error *error)
+{
+	struct ibv_flow_attr *ibv_attr;
+
+	(void)priv;
+	ibv_attr = rte_calloc(__func__, 1, size, 0);
+	if (!ibv_attr) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "cannot allocate verbs spec attributes.");
+		return NULL;
+	}
+	ibv_attr->priority = priority;
+	return ibv_attr;
+}
+
+/**
+ * Finalise verbs flow attributes.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ */
+static void
+priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
+{
+	const unsigned int ipv4 =
+		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
+	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
+	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
+	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
+	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+	unsigned int i;
+
+	(void)priv;
+	if (parser->layer == HASH_RXQ_ETH) {
+		goto fill;
+	} else {
+		/*
+		 * This layer becomes useless as the pattern define under
+		 * layers.
+		 */
+		rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
+		parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
+	}
+	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
+	for (i = ohmin; i != (ohmax + 1); ++i) {
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		rte_free(parser->queue[i].ibv_attr);
+		parser->queue[i].ibv_attr = NULL;
+	}
+	/* Remove impossible flow according to the RSS configuration. */
+	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
+	    parser->rss_conf.rss_hf) {
+		/* Remove any other flow. */
+		for (i = hmin; i != (hmax + 1); ++i) {
+			if ((i == parser->layer) ||
+			     (!parser->queue[i].ibv_attr))
+				continue;
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+	} else  if (!parser->queue[ip].ibv_attr) {
+		/* no RSS possible with the current configuration. */
+		parser->queues_n = 1;
+		return;
+	}
+fill:
+	/*
+	 * Fill missing layers in verbs specifications, or compute the correct
+	 * offset to allocate the memory space for the attributes and
+	 * specifications.
+	 */
+	for (i = 0; i != hash_rxq_init_n - 1; ++i) {
+		union {
+			struct ibv_flow_spec_ipv4_ext ipv4;
+			struct ibv_flow_spec_ipv6 ipv6;
+			struct ibv_flow_spec_tcp_udp udp_tcp;
+		} specs;
+		void *dst;
+		uint16_t size;
+
+		if (i == parser->layer)
+			continue;
+		if (parser->layer == HASH_RXQ_ETH) {
+			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
+				size = sizeof(struct ibv_flow_spec_ipv4_ext);
+				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
+					.type = IBV_FLOW_SPEC_IPV4_EXT,
+					.size = size,
+				};
+			} else {
+				size = sizeof(struct ibv_flow_spec_ipv6);
+				specs.ipv6 = (struct ibv_flow_spec_ipv6){
+					.type = IBV_FLOW_SPEC_IPV6,
+					.size = size,
+				};
+			}
+			if (parser->queue[i].ibv_attr) {
+				dst = (void *)((uintptr_t)
+					       parser->queue[i].ibv_attr +
+					       parser->queue[i].offset);
+				memcpy(dst, &specs, size);
+				++parser->queue[i].ibv_attr->num_of_specs;
+			}
+			parser->queue[i].offset += size;
+		}
+		if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
+		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
+			size = sizeof(struct ibv_flow_spec_tcp_udp);
+			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
+				.type = ((i == HASH_RXQ_UDPV4 ||
+					  i == HASH_RXQ_UDPV6) ?
+					 IBV_FLOW_SPEC_UDP :
+					 IBV_FLOW_SPEC_TCP),
+				.size = size,
+			};
+			if (parser->queue[i].ibv_attr) {
+				dst = (void *)((uintptr_t)
+					       parser->queue[i].ibv_attr +
+					       parser->queue[i].offset);
+				memcpy(dst, &specs, size);
+				++parser->queue[i].ibv_attr->num_of_specs;
+			}
+			parser->queue[i].offset += size;
+		}
+	}
+}
+
+/**
+ * Validate and convert a flow supported by the NIC.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ *   Internal parser structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_convert(struct priv *priv,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item items[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error,
+		  struct mlx5_flow_parse *parser)
+{
+	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
+	unsigned int i;
 	int ret;
-	struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
-	struct mlx5_flow_action action = {
-		.queue = 0,
-		.drop = 0,
-		.mark = 0,
+
+	/* First step. Validate the attributes, items and actions. */
+	*parser = (struct mlx5_flow_parse){
+		.create = parser->create,
+		.layer = HASH_RXQ_ETH,
 		.mark_id = MLX5_FLOW_MARK_DEFAULT,
-		.queues_n = 0,
 	};
-
-	priv_lock(priv);
-	ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
-				 &action);
-	priv_unlock(priv);
+	ret = priv_flow_convert_attributes(priv, attr, error, parser);
+	if (ret)
+		return ret;
+	ret = priv_flow_convert_actions(priv, actions, error, parser);
+	if (ret)
+		return ret;
+	ret = priv_flow_convert_items_validate(priv, items, error, parser);
+	if (ret)
+		return ret;
+	priv_flow_convert_finalise(priv, parser);
+	/*
+	 * Second step.
+	 * Allocate the memory space to store verbs specifications.
+	 */
+	if (parser->drop) {
+		parser->drop_q.ibv_attr =
+			priv_flow_convert_allocate(priv, attr->priority,
+						   parser->drop_q.offset,
+						   error);
+		if (!parser->drop_q.ibv_attr)
+			return ENOMEM;
+		parser->drop_q.offset = sizeof(struct ibv_flow_attr);
+	} else if (parser->queues_n == 1) {
+		unsigned int priority =
+			attr->priority +
+			hash_rxq_init[HASH_RXQ_ETH].flow_priority;
+		unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
+
+		parser->queue[HASH_RXQ_ETH].ibv_attr =
+			priv_flow_convert_allocate(priv, priority,
+						   offset, error);
+		if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
+			return ENOMEM;
+		parser->queue[HASH_RXQ_ETH].offset =
+			sizeof(struct ibv_flow_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			unsigned int priority =
+				attr->priority +
+				hash_rxq_init[i].flow_priority;
+			unsigned int offset;
+
+			if (!(parser->rss_conf.rss_hf &
+			      hash_rxq_init[i].dpdk_rss_hf) &&
+			    (i != HASH_RXQ_ETH))
+				continue;
+			offset = parser->queue[i].offset;
+			parser->queue[i].ibv_attr =
+				priv_flow_convert_allocate(priv, priority,
+							   offset, error);
+			if (!parser->queue[i].ibv_attr)
+				goto exit_enomem;
+			parser->queue[i].offset = sizeof(struct ibv_flow_attr);
+		}
+	}
+	/* Third step. Conversion parse, fill the specifications. */
+	parser->inner = 0;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		cur_item = &mlx5_flow_items[items->type];
+		ret = cur_item->convert(items,
+					(cur_item->default_mask ?
+					 cur_item->default_mask :
+					 cur_item->mask),
+					parser);
+		if (ret) {
+			rte_flow_error_set(error, ret,
+					   RTE_FLOW_ERROR_TYPE_ITEM,
+					   items, "item not supported");
+			goto exit_free;
+		}
+	}
+	if (parser->mark)
+		mlx5_flow_create_flag_mark(parser, parser->mark_id);
+	if (parser->count && parser->create) {
+		mlx5_flow_create_count(priv, parser);
+		if (!parser->cs)
+			goto exit_count_error;
+	}
+	/*
+	 * Last step. Complete missing specification to reach the RSS
+	 * configuration.
+	 */
+	if (parser->drop) {
+		/*
+		 * Drop queue priority needs to be adjusted to
+		 * their most specific layer priority.
+		 */
+		parser->drop_q.ibv_attr->priority =
+			attr->priority +
+			hash_rxq_init[parser->layer].flow_priority;
+	} else if (parser->queues_n > 1) {
+		priv_flow_convert_finalise(priv, parser);
+	} else {
+		/*
+		 * Action queue have their priority overridden with
+		 * Ethernet priority, this priority needs to be adjusted to
+		 * their most specific layer priority.
+		 */
+		parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
+			attr->priority +
+			hash_rxq_init[parser->layer].flow_priority;
+	}
+exit_free:
+	/* Only verification is expected, all resources should be released. */
+	if (!parser->create) {
+		if (parser->drop) {
+			rte_free(parser->drop_q.ibv_attr);
+			parser->drop_q.ibv_attr = NULL;
+		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (parser->queue[i].ibv_attr) {
+				rte_free(parser->queue[i].ibv_attr);
+				parser->queue[i].ibv_attr = NULL;
+			}
+		}
+	}
 	return ret;
+exit_enomem:
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (parser->queue[i].ibv_attr) {
+			rte_free(parser->queue[i].ibv_attr);
+			parser->queue[i].ibv_attr = NULL;
+		}
+	}
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, "cannot allocate verbs spec attributes.");
+	return ret;
+exit_count_error:
+	rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, "cannot create counter.");
+	return rte_errno;
+}
+
+/**
+ * Copy the specification created into the flow.
+ *
+ * @param parser
+ *   Internal parser structure.
+ * @param src
+ *   Create specification.
+ * @param size
+ *   Size in bytes of the specification to copy.
+ */
+static void
+mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
+		      unsigned int size)
+{
+	unsigned int i;
+	void *dst;
+
+	if (parser->drop) {
+		dst = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+				parser->drop_q.offset);
+		memcpy(dst, src, size);
+		++parser->drop_q.ibv_attr->num_of_specs;
+		parser->drop_q.offset += size;
+		return;
+	}
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		/* Specification must be the same l3 type or none. */
+		if (parser->layer == HASH_RXQ_ETH ||
+		    (hash_rxq_init[parser->layer].ip_version ==
+		     hash_rxq_init[i].ip_version) ||
+		    (hash_rxq_init[i].ip_version == 0)) {
+			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+					parser->queue[i].offset);
+			memcpy(dst, src, size);
+			++parser->queue[i].ibv_attr->num_of_specs;
+			parser->queue[i].offset += size;
+		}
+	}
 }
 
 /**
@@ -648,35 +1282,35 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_eth *spec = item->spec;
 	const struct rte_flow_item_eth *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
-	struct ibv_exp_flow_spec_eth *eth;
-	const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
-	unsigned int i;
-
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 2;
-	flow->hash_fields = 0;
-	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
-	*eth = (struct ibv_exp_flow_spec_eth) {
-		.type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+	struct ibv_flow_spec_eth eth = {
+		.type = parser->inner | IBV_FLOW_SPEC_ETH,
 		.size = eth_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
-	memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
-	eth->val.ether_type = spec->type;
-	memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
-	memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
-	eth->mask.ether_type = mask->type;
-	/* Remove unwanted bits from values. */
-	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
-		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
-		eth->val.src_mac[i] &= eth->mask.src_mac[i];
-	}
-	eth->val.ether_type &= eth->mask.ether_type;
+
+	/* Don't update layer for the inner pattern. */
+	if (!parser->inner)
+		parser->layer = HASH_RXQ_ETH;
+	if (spec) {
+		unsigned int i;
+
+		if (!mask)
+			mask = default_mask;
+		memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+		memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+		eth.val.ether_type = spec->type;
+		memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+		memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+		eth.mask.ether_type = mask->type;
+		/* Remove unwanted bits from values. */
+		for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+			eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
+			eth.val.src_mac[i] &= eth.mask.src_mac[i];
+		}
+		eth.val.ether_type &= eth.mask.ether_type;
+	}
+	mlx5_flow_create_copy(parser, &eth, eth_size);
 	return 0;
 }
 
@@ -697,18 +1331,34 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vlan *spec = item->spec;
 	const struct rte_flow_item_vlan *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
-	struct ibv_exp_flow_spec_eth *eth;
-	const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+	struct ibv_flow_spec_eth *eth;
+	const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 
-	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	eth->val.vlan_tag = spec->tci;
-	eth->mask.vlan_tag = mask->tci;
-	eth->val.vlan_tag &= eth->mask.vlan_tag;
+	if (spec) {
+		unsigned int i;
+		if (!mask)
+			mask = default_mask;
+
+		if (parser->drop) {
+			eth = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+				       parser->drop_q.offset - eth_size);
+			eth->val.vlan_tag = spec->tci;
+			eth->mask.vlan_tag = mask->tci;
+			eth->val.vlan_tag &= eth->mask.vlan_tag;
+			return 0;
+		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!parser->queue[i].ibv_attr)
+				continue;
+
+			eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+				       parser->queue[i].offset - eth_size);
+			eth->val.vlan_tag = spec->tci;
+			eth->mask.vlan_tag = mask->tci;
+			eth->val.vlan_tag &= eth->mask.vlan_tag;
+		}
+	}
 	return 0;
 }
 
@@ -729,40 +1379,38 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
-	struct ibv_exp_flow_spec_ipv4_ext *ipv4;
-	unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
-
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 1;
-	flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
-			     IBV_EXP_RX_HASH_DST_IPV4);
-	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
-	*ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
-		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+	unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
+	struct ibv_flow_spec_ipv4_ext ipv4 = {
+		.type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
 		.size = ipv4_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
-		.src_ip = spec->hdr.src_addr,
-		.dst_ip = spec->hdr.dst_addr,
-		.proto = spec->hdr.next_proto_id,
-		.tos = spec->hdr.type_of_service,
-	};
-	ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
-		.src_ip = mask->hdr.src_addr,
-		.dst_ip = mask->hdr.dst_addr,
-		.proto = mask->hdr.next_proto_id,
-		.tos = mask->hdr.type_of_service,
-	};
-	/* Remove unwanted bits from values. */
-	ipv4->val.src_ip &= ipv4->mask.src_ip;
-	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
-	ipv4->val.proto &= ipv4->mask.proto;
-	ipv4->val.tos &= ipv4->mask.tos;
+
+	/* Don't update layer for the inner pattern. */
+	if (!parser->inner)
+		parser->layer = HASH_RXQ_IPV4;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		ipv4.val = (struct ibv_flow_ipv4_ext_filter){
+			.src_ip = spec->hdr.src_addr,
+			.dst_ip = spec->hdr.dst_addr,
+			.proto = spec->hdr.next_proto_id,
+			.tos = spec->hdr.type_of_service,
+		};
+		ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
+			.src_ip = mask->hdr.src_addr,
+			.dst_ip = mask->hdr.dst_addr,
+			.proto = mask->hdr.next_proto_id,
+			.tos = mask->hdr.type_of_service,
+		};
+		/* Remove unwanted bits from values. */
+		ipv4.val.src_ip &= ipv4.mask.src_ip;
+		ipv4.val.dst_ip &= ipv4.mask.dst_ip;
+		ipv4.val.proto &= ipv4.mask.proto;
+		ipv4.val.tos &= ipv4.mask.tos;
+	}
+	mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
 	return 0;
 }
 
@@ -783,43 +1431,42 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_ipv6 *spec = item->spec;
 	const struct rte_flow_item_ipv6 *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
-	struct ibv_exp_flow_spec_ipv6_ext *ipv6;
-	unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
-	unsigned int i;
-
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 1;
-	flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
-			     IBV_EXP_RX_HASH_DST_IPV6);
-	ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
-	*ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
-		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+	unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
+	struct ibv_flow_spec_ipv6 ipv6 = {
+		.type = parser->inner | IBV_FLOW_SPEC_IPV6,
 		.size = ipv6_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
-	       RTE_DIM(ipv6->val.src_ip));
-	memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
-	       RTE_DIM(ipv6->val.dst_ip));
-	memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
-	       RTE_DIM(ipv6->mask.src_ip));
-	memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
-	       RTE_DIM(ipv6->mask.dst_ip));
-	ipv6->mask.flow_label = mask->hdr.vtc_flow;
-	ipv6->mask.next_hdr = mask->hdr.proto;
-	ipv6->mask.hop_limit = mask->hdr.hop_limits;
-	/* Remove unwanted bits from values. */
-	for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
-		ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
-		ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
-	}
-	ipv6->val.flow_label &= ipv6->mask.flow_label;
-	ipv6->val.next_hdr &= ipv6->mask.next_hdr;
-	ipv6->val.hop_limit &= ipv6->mask.hop_limit;
+
+	/* Don't update layer for the inner pattern. */
+	if (!parser->inner)
+		parser->layer = HASH_RXQ_IPV6;
+	if (spec) {
+		unsigned int i;
+
+		if (!mask)
+			mask = default_mask;
+		memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
+		       RTE_DIM(ipv6.val.src_ip));
+		memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
+		       RTE_DIM(ipv6.val.dst_ip));
+		memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
+		       RTE_DIM(ipv6.mask.src_ip));
+		memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
+		       RTE_DIM(ipv6.mask.dst_ip));
+		ipv6.mask.flow_label = mask->hdr.vtc_flow;
+		ipv6.mask.next_hdr = mask->hdr.proto;
+		ipv6.mask.hop_limit = mask->hdr.hop_limits;
+		/* Remove unwanted bits from values. */
+		for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
+			ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
+			ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
+		}
+		ipv6.val.flow_label &= ipv6.mask.flow_label;
+		ipv6.val.next_hdr &= ipv6.mask.next_hdr;
+		ipv6.val.hop_limit &= ipv6.mask.hop_limit;
+	}
+	mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
 	return 0;
 }
 
@@ -840,30 +1487,32 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_udp *spec = item->spec;
 	const struct rte_flow_item_udp *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
-	struct ibv_exp_flow_spec_tcp_udp *udp;
-	unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
-
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
-	flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
-			      IBV_EXP_RX_HASH_DST_PORT_UDP);
-	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
-	*udp = (struct ibv_exp_flow_spec_tcp_udp) {
-		.type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+	unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+	struct ibv_flow_spec_tcp_udp udp = {
+		.type = parser->inner | IBV_FLOW_SPEC_UDP,
 		.size = udp_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	udp->val.dst_port = spec->hdr.dst_port;
-	udp->val.src_port = spec->hdr.src_port;
-	udp->mask.dst_port = mask->hdr.dst_port;
-	udp->mask.src_port = mask->hdr.src_port;
-	/* Remove unwanted bits from values. */
-	udp->val.src_port &= udp->mask.src_port;
-	udp->val.dst_port &= udp->mask.dst_port;
+
+	/* Don't update layer for the inner pattern. */
+	if (!parser->inner) {
+		if (parser->layer == HASH_RXQ_IPV4)
+			parser->layer = HASH_RXQ_UDPV4;
+		else
+			parser->layer = HASH_RXQ_UDPV6;
+	}
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		udp.val.dst_port = spec->hdr.dst_port;
+		udp.val.src_port = spec->hdr.src_port;
+		udp.mask.dst_port = mask->hdr.dst_port;
+		udp.mask.src_port = mask->hdr.src_port;
+		/* Remove unwanted bits from values. */
+		udp.val.src_port &= udp.mask.src_port;
+		udp.val.dst_port &= udp.mask.dst_port;
+	}
+	mlx5_flow_create_copy(parser, &udp, udp_size);
 	return 0;
 }
 
@@ -884,30 +1533,32 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_tcp *spec = item->spec;
 	const struct rte_flow_item_tcp *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
-	struct ibv_exp_flow_spec_tcp_udp *tcp;
-	unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
-
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
-	flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
-			      IBV_EXP_RX_HASH_DST_PORT_TCP);
-	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
-	*tcp = (struct ibv_exp_flow_spec_tcp_udp) {
-		.type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+	unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+	struct ibv_flow_spec_tcp_udp tcp = {
+		.type = parser->inner | IBV_FLOW_SPEC_TCP,
 		.size = tcp_size,
 	};
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	tcp->val.dst_port = spec->hdr.dst_port;
-	tcp->val.src_port = spec->hdr.src_port;
-	tcp->mask.dst_port = mask->hdr.dst_port;
-	tcp->mask.src_port = mask->hdr.src_port;
-	/* Remove unwanted bits from values. */
-	tcp->val.src_port &= tcp->mask.src_port;
-	tcp->val.dst_port &= tcp->mask.dst_port;
+
+	/* Don't update layer for the inner pattern. */
+	if (!parser->inner) {
+		if (parser->layer == HASH_RXQ_IPV4)
+			parser->layer = HASH_RXQ_TCPV4;
+		else
+			parser->layer = HASH_RXQ_TCPV6;
+	}
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		tcp.val.dst_port = spec->hdr.dst_port;
+		tcp.val.src_port = spec->hdr.src_port;
+		tcp.mask.dst_port = mask->hdr.dst_port;
+		tcp.mask.src_port = mask->hdr.src_port;
+		/* Remove unwanted bits from values. */
+		tcp.val.src_port &= tcp.mask.src_port;
+		tcp.val.dst_port &= tcp.mask.dst_port;
+	}
+	mlx5_flow_create_copy(parser, &tcp, tcp_size);
 	return 0;
 }
 
@@ -928,57 +1579,97 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 {
 	const struct rte_flow_item_vxlan *spec = item->spec;
 	const struct rte_flow_item_vxlan *mask = item->mask;
-	struct mlx5_flow *flow = (struct mlx5_flow *)data;
-	struct ibv_exp_flow_spec_tunnel *vxlan;
-	unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
+	struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+	unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+	struct ibv_flow_spec_tunnel vxlan = {
+		.type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
+		.size = size,
+	};
 	union vni {
 		uint32_t vlan_id;
 		uint8_t vni[4];
 	} id;
 
-	++flow->ibv_attr->num_of_specs;
-	flow->ibv_attr->priority = 0;
 	id.vni[0] = 0;
-	vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
-	*vxlan = (struct ibv_exp_flow_spec_tunnel) {
-		.type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
-		.size = size,
-	};
-	flow->inner = IBV_EXP_FLOW_SPEC_INNER;
-	if (!spec)
-		return 0;
-	if (!mask)
-		mask = default_mask;
-	memcpy(&id.vni[1], spec->vni, 3);
-	vxlan->val.tunnel_id = id.vlan_id;
-	memcpy(&id.vni[1], mask->vni, 3);
-	vxlan->mask.tunnel_id = id.vlan_id;
-	/* Remove unwanted bits from values. */
-	vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
+	parser->inner = IBV_FLOW_SPEC_INNER;
+	if (spec) {
+		if (!mask)
+			mask = default_mask;
+		memcpy(&id.vni[1], spec->vni, 3);
+		vxlan.val.tunnel_id = id.vlan_id;
+		memcpy(&id.vni[1], mask->vni, 3);
+		vxlan.mask.tunnel_id = id.vlan_id;
+		/* Remove unwanted bits from values. */
+		vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
+	}
+	/*
+	 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
+	 * layer is defined in the Verbs specification it is interpreted as
+	 * wildcard and all packets will match this rule, if it follows a full
+	 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
+	 * before will also match this rule.
+	 * To avoid such situation, VNI 0 is currently refused.
+	 */
+	if (!vxlan.val.tunnel_id)
+		return EINVAL;
+	mlx5_flow_create_copy(parser, &vxlan, size);
 	return 0;
 }
 
 /**
  * Convert mark/flag action to Verbs specification.
  *
- * @param flow
- *   Pointer to MLX5 flow structure.
+ * @param parser
+ *   Internal parser structure.
  * @param mark_id
  *   Mark identifier.
  */
 static int
-mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
 {
-	struct ibv_exp_flow_spec_action_tag *tag;
-	unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
-
-	tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
-	*tag = (struct ibv_exp_flow_spec_action_tag){
-		.type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
+	unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
+	struct ibv_flow_spec_action_tag tag = {
+		.type = IBV_FLOW_SPEC_ACTION_TAG,
 		.size = size,
 		.tag_id = mlx5_flow_mark_set(mark_id),
 	};
-	++flow->ibv_attr->num_of_specs;
+
+	assert(parser->mark);
+	mlx5_flow_create_copy(parser, &tag, size);
+	return 0;
+}
+
+/**
+ * Convert count action to Verbs specification.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param parser
+ *   Pointer to MLX5 flow parser structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+mlx5_flow_create_count(struct priv *priv __rte_unused,
+		       struct mlx5_flow_parse *parser __rte_unused)
+{
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+	unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+	struct ibv_counter_set_init_attr init_attr = {0};
+	struct ibv_flow_spec_counter_action counter = {
+		.type = IBV_FLOW_SPEC_ACTION_COUNT,
+		.size = size,
+		.counter_set_handle = 0,
+	};
+
+	init_attr.counter_set_id = 0;
+	parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
+	if (!parser->cs)
+		return EINVAL;
+	counter.counter_set_handle = parser->cs->handle;
+	mlx5_flow_create_copy(parser, &counter, size);
+#endif
 	return 0;
 }
 
@@ -987,59 +1678,127 @@ mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
  *
  * @param priv
  *   Pointer to private structure.
+ * @param parser
+ *   Internal parser structure.
  * @param flow
- *   MLX5 flow attributes (filled by mlx5_flow_validate()).
+ *   Pointer to the rte_flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   A flow if the rule could be created.
+ *   0 on success, errno value on failure.
  */
-static struct rte_flow *
+static int
 priv_flow_create_action_queue_drop(struct priv *priv,
-				   struct mlx5_flow *flow,
+				   struct mlx5_flow_parse *parser,
+				   struct rte_flow *flow,
 				   struct rte_flow_error *error)
 {
-	struct rte_flow *rte_flow;
-#ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
-	struct ibv_exp_flow_spec_action_drop *drop;
-	unsigned int size = sizeof(struct ibv_exp_flow_spec_action_drop);
-#endif
+	struct ibv_flow_spec_action_drop *drop;
+	unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
+	int err = 0;
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
-	if (!rte_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate flow memory");
-		return NULL;
-	}
-	rte_flow->drop = 1;
-#ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
-	drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
-	*drop = (struct ibv_exp_flow_spec_action_drop){
-			.type = IBV_EXP_FLOW_SPEC_ACTION_DROP,
+	flow->drop = 1;
+	drop = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+			parser->drop_q.offset);
+	*drop = (struct ibv_flow_spec_action_drop){
+			.type = IBV_FLOW_SPEC_ACTION_DROP,
 			.size = size,
 	};
-	++flow->ibv_attr->num_of_specs;
-	flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
-#endif
-	rte_flow->ibv_attr = flow->ibv_attr;
-	if (!priv->started)
-		return rte_flow;
-	rte_flow->qp = priv->flow_drop_queue->qp;
-	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
-						 rte_flow->ibv_attr);
-	if (!rte_flow->ibv_flow) {
+	++parser->drop_q.ibv_attr->num_of_specs;
+	parser->drop_q.offset += size;
+	flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
+	if (parser->count)
+		flow->cs = parser->cs;
+	if (!priv->dev->data->dev_started)
+		return 0;
+	parser->drop_q.ibv_attr = NULL;
+	flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp,
+					      flow->drxq.ibv_attr);
+	if (!flow->drxq.ibv_flow) {
 		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "flow rule creation failure");
+		err = ENOMEM;
 		goto error;
 	}
-	return rte_flow;
+	return 0;
 error:
-	assert(rte_flow);
-	rte_free(rte_flow);
-	return NULL;
+	assert(flow);
+	if (flow->drxq.ibv_flow) {
+		claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+		flow->drxq.ibv_flow = NULL;
+	}
+	if (flow->drxq.ibv_attr) {
+		rte_free(flow->drxq.ibv_attr);
+		flow->drxq.ibv_attr = NULL;
+	}
+	if (flow->cs) {
+		claim_zero(ibv_destroy_counter_set(flow->cs));
+		flow->cs = NULL;
+		parser->cs = NULL;
+	}
+	return err;
+}
+
+/**
+ * Create hash Rx queues when RSS is enabled.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param parser
+ *   Internal parser structure.
+ * @param flow
+ *   Pointer to the rte_flow.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_create_action_queue_rss(struct priv *priv,
+				  struct mlx5_flow_parse *parser,
+				  struct rte_flow *flow,
+				  struct rte_flow_error *error)
+{
+	unsigned int i;
+
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		uint64_t hash_fields;
+
+		if (!parser->queue[i].ibv_attr)
+			continue;
+		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
+		parser->queue[i].ibv_attr = NULL;
+		hash_fields = hash_rxq_init[i].hash_fields;
+		if (!priv->dev->data->dev_started)
+			continue;
+		flow->frxq[i].hrxq =
+			mlx5_priv_hrxq_get(priv,
+					   parser->rss_conf.rss_key,
+					   parser->rss_conf.rss_key_len,
+					   hash_fields,
+					   parser->queues,
+					   parser->queues_n);
+		if (flow->frxq[i].hrxq)
+			continue;
+		flow->frxq[i].hrxq =
+			mlx5_priv_hrxq_new(priv,
+					   parser->rss_conf.rss_key,
+					   parser->rss_conf.rss_key_len,
+					   hash_fields,
+					   parser->queues,
+					   parser->queues_n);
+		if (!flow->frxq[i].hrxq) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot create hash rxq");
+			return ENOMEM;
+		}
+	}
+	return 0;
 }
 
 /**
@@ -1047,112 +1806,79 @@ error:
  *
  * @param priv
  *   Pointer to private structure.
+ * @param parser
+ *   Internal parser structure.
  * @param flow
- *   MLX5 flow attributes (filled by mlx5_flow_validate()).
- * @param action
- *   Target action structure.
+ *   Pointer to the rte_flow.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   A flow if the rule could be created.
+ *   0 on success, a errno value otherwise and rte_errno is set.
  */
-static struct rte_flow *
+static int
 priv_flow_create_action_queue(struct priv *priv,
-			      struct mlx5_flow *flow,
-			      struct mlx5_flow_action *action,
+			      struct mlx5_flow_parse *parser,
+			      struct rte_flow *flow,
 			      struct rte_flow_error *error)
 {
-	struct rte_flow *rte_flow;
+	int err = 0;
 	unsigned int i;
-	unsigned int j;
-	const unsigned int wqs_n = 1 << log2above(action->queues_n);
-	struct ibv_exp_wq *wqs[wqs_n];
 
 	assert(priv->pd);
 	assert(priv->ctx);
-	assert(!action->drop);
-	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
-			      sizeof(*rte_flow->rxqs) * action->queues_n, 0);
-	if (!rte_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate flow memory");
-		return NULL;
-	}
-	for (i = 0; i < action->queues_n; ++i) {
-		struct rxq_ctrl *rxq;
-
-		rxq = container_of((*priv->rxqs)[action->queues[i]],
-				   struct rxq_ctrl, rxq);
-		wqs[i] = rxq->wq;
-		rte_flow->rxqs[i] = &rxq->rxq;
-		++rte_flow->rxqs_n;
-		rxq->rxq.mark |= action->mark;
-	}
-	/* finalise indirection table. */
-	for (j = 0; i < wqs_n; ++i, ++j) {
-		wqs[i] = wqs[j];
-		if (j == action->queues_n)
-			j = 0;
-	}
-	rte_flow->mark = action->mark;
-	rte_flow->ibv_attr = flow->ibv_attr;
-	rte_flow->hash_fields = flow->hash_fields;
-	rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
-		priv->ctx,
-		&(struct ibv_exp_rwq_ind_table_init_attr){
-			.pd = priv->pd,
-			.log_ind_tbl_size = log2above(action->queues_n),
-			.ind_tbl = wqs,
-			.comp_mask = 0,
-		});
-	if (!rte_flow->ind_table) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate indirection table");
-		goto error;
-	}
-	rte_flow->qp = ibv_exp_create_qp(
-		priv->ctx,
-		&(struct ibv_exp_qp_init_attr){
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask =
-				IBV_EXP_QP_INIT_ATTR_PD |
-				IBV_EXP_QP_INIT_ATTR_PORT |
-				IBV_EXP_QP_INIT_ATTR_RX_HASH,
-			.pd = priv->pd,
-			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
-				.rx_hash_function =
-					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
-				.rx_hash_key_len = rss_hash_default_key_len,
-				.rx_hash_key = rss_hash_default_key,
-				.rx_hash_fields_mask = rte_flow->hash_fields,
-				.rwq_ind_tbl = rte_flow->ind_table,
-			},
-			.port_num = priv->port,
-		});
-	if (!rte_flow->qp) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate QP");
+	assert(!parser->drop);
+	err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
+	if (err)
 		goto error;
+	if (parser->count)
+		flow->cs = parser->cs;
+	if (!priv->dev->data->dev_started)
+		return 0;
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (!flow->frxq[i].hrxq)
+			continue;
+		flow->frxq[i].ibv_flow =
+			ibv_create_flow(flow->frxq[i].hrxq->qp,
+					flow->frxq[i].ibv_attr);
+		if (!flow->frxq[i].ibv_flow) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "flow rule creation failure");
+			err = ENOMEM;
+			goto error;
+		}
+		DEBUG("%p type %d QP %p ibv_flow %p",
+		      (void *)flow, i,
+		      (void *)flow->frxq[i].hrxq,
+		      (void *)flow->frxq[i].ibv_flow);
 	}
-	if (!priv->started)
-		return rte_flow;
-	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
-						 rte_flow->ibv_attr);
-	if (!rte_flow->ibv_flow) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "flow rule creation failure");
-		goto error;
+	for (i = 0; i != parser->queues_n; ++i) {
+		struct mlx5_rxq_data *q =
+			(*priv->rxqs)[parser->queues[i]];
+
+		q->mark |= parser->mark;
 	}
-	return rte_flow;
+	return 0;
 error:
-	assert(rte_flow);
-	if (rte_flow->qp)
-		ibv_destroy_qp(rte_flow->qp);
-	if (rte_flow->ind_table)
-		ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
-	rte_free(rte_flow);
-	return NULL;
+	assert(flow);
+	for (i = 0; i != hash_rxq_init_n; ++i) {
+		if (flow->frxq[i].ibv_flow) {
+			struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
+
+			claim_zero(ibv_destroy_flow(ibv_flow));
+		}
+		if (flow->frxq[i].hrxq)
+			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
+		if (flow->frxq[i].ibv_attr)
+			rte_free(flow->frxq[i].ibv_attr);
+	}
+	if (flow->cs) {
+		claim_zero(ibv_destroy_counter_set(flow->cs));
+		flow->cs = NULL;
+		parser->cs = NULL;
+	}
+	return err;
 }
 
 /**
@@ -1160,6 +1886,8 @@ error:
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  * @param[in] attr
  *   Flow rule attributes.
  * @param[in] pattern
@@ -1174,65 +1902,86 @@ error:
  */
 static struct rte_flow *
 priv_flow_create(struct priv *priv,
+		 struct mlx5_flows *list,
 		 const struct rte_flow_attr *attr,
 		 const struct rte_flow_item items[],
 		 const struct rte_flow_action actions[],
 		 struct rte_flow_error *error)
 {
-	struct rte_flow *rte_flow;
-	struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
-	struct mlx5_flow_action action = {
-		.queue = 0,
-		.drop = 0,
-		.mark = 0,
-		.mark_id = MLX5_FLOW_MARK_DEFAULT,
-		.queues_n = 0,
-	};
+	struct mlx5_flow_parse parser = { .create = 1, };
+	struct rte_flow *flow = NULL;
+	unsigned int i;
 	int err;
 
-	err = priv_flow_validate(priv, attr, items, actions, error, &flow,
-				 &action);
+	err = priv_flow_convert(priv, attr, items, actions, error, &parser);
 	if (err)
 		goto exit;
-	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
-	flow.offset = sizeof(struct ibv_exp_flow_attr);
-	if (!flow.ibv_attr) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate ibv_attr memory");
-		goto exit;
+	flow = rte_calloc(__func__, 1,
+			  sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
+			  0);
+	if (!flow) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "cannot allocate flow memory");
+		return NULL;
 	}
-	*flow.ibv_attr = (struct ibv_exp_flow_attr){
-		.type = IBV_EXP_FLOW_ATTR_NORMAL,
-		.size = sizeof(struct ibv_exp_flow_attr),
-		.priority = attr->priority,
-		.num_of_specs = 0,
-		.port = 0,
-		.flags = 0,
-		.reserved = 0,
-	};
-	flow.inner = 0;
-	flow.hash_fields = 0;
-	claim_zero(priv_flow_validate(priv, attr, items, actions,
-				      error, &flow, &action));
-	if (action.mark && !action.drop) {
-		mlx5_flow_create_flag_mark(&flow, action.mark_id);
-		flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
-	}
-	if (action.drop)
-		rte_flow =
-			priv_flow_create_action_queue_drop(priv, &flow, error);
-	else
-		rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
+	/* Copy queues configuration. */
+	flow->queues = (uint16_t (*)[])(flow + 1);
+	memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
+	flow->queues_n = parser.queues_n;
+	/* Copy RSS configuration. */
+	flow->rss_conf = parser.rss_conf;
+	flow->rss_conf.rss_key = flow->rss_key;
+	memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
+	/* finalise the flow. */
+	if (parser.drop)
+		err = priv_flow_create_action_queue_drop(priv, &parser, flow,
 							 error);
-	if (!rte_flow)
+	else
+		err = priv_flow_create_action_queue(priv, &parser, flow, error);
+	if (err)
 		goto exit;
-	return rte_flow;
+	TAILQ_INSERT_TAIL(list, flow, next);
+	DEBUG("Flow created %p", (void *)flow);
+	return flow;
 exit:
-	rte_free(flow.ibv_attr);
+	if (parser.drop) {
+		rte_free(parser.drop_q.ibv_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (parser.queue[i].ibv_attr)
+				rte_free(parser.queue[i].ibv_attr);
+		}
+	}
+	rte_free(flow);
 	return NULL;
 }
 
 /**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	int ret;
+	struct mlx5_flow_parse parser = { .create = 0, };
+
+	priv_lock(priv);
+	ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
+	priv_unlock(priv);
+	return ret;
+}
+
+/**
  * Create a flow.
  *
  * @see rte_flow_create()
@@ -1249,11 +1998,8 @@ mlx5_flow_create(struct rte_eth_dev *dev,
 	struct rte_flow *flow;
 
 	priv_lock(priv);
-	flow = priv_flow_create(priv, attr, items, actions, error);
-	if (flow) {
-		TAILQ_INSERT_TAIL(&priv->flows, flow, next);
-		DEBUG("Flow created %p", (void *)flow);
-	}
+	flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
+				error);
 	priv_unlock(priv);
 	return flow;
 }
@@ -1263,121 +2009,95 @@ mlx5_flow_create(struct rte_eth_dev *dev,
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  * @param[in] flow
  *   Flow to destroy.
  */
 static void
 priv_flow_destroy(struct priv *priv,
+		  struct mlx5_flows *list,
 		  struct rte_flow *flow)
 {
-	TAILQ_REMOVE(&priv->flows, flow, next);
-	if (flow->ibv_flow)
-		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
-	if (flow->drop)
+	unsigned int i;
+
+	if (flow->drop || !flow->mark)
 		goto free;
-	if (flow->qp)
-		claim_zero(ibv_destroy_qp(flow->qp));
-	if (flow->ind_table)
-		claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
-	if (flow->drop && flow->wq)
-		claim_zero(ibv_exp_destroy_wq(flow->wq));
-	if (flow->drop && flow->cq)
-		claim_zero(ibv_destroy_cq(flow->cq));
-	if (flow->mark) {
+	for (i = 0; i != flow->queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct rxq *rxq;
-		uint32_t mark_n = 0;
-		uint32_t queue_n;
+		int mark = 0;
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
 		 * present in any other marked flow (RSS or not).
 		 */
-		for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
-			rxq = flow->rxqs[queue_n];
-			for (tmp = TAILQ_FIRST(&priv->flows);
-			     tmp;
-			     tmp = TAILQ_NEXT(tmp, next)) {
-				uint32_t tqueue_n;
-
-				if (tmp->drop)
+		TAILQ_FOREACH(tmp, list, next) {
+			unsigned int j;
+			uint16_t *tqs = NULL;
+			uint16_t tq_n = 0;
+
+			if (!tmp->mark)
+				continue;
+			for (j = 0; j != hash_rxq_init_n; ++j) {
+				if (!tmp->frxq[j].hrxq)
 					continue;
-				for (tqueue_n = 0;
-				     tqueue_n < tmp->rxqs_n;
-				     ++tqueue_n) {
-					struct rxq *trxq;
-
-					trxq = tmp->rxqs[tqueue_n];
-					if (rxq == trxq)
-						++mark_n;
-				}
+				tqs = tmp->frxq[j].hrxq->ind_table->queues;
+				tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
 			}
-			rxq->mark = !!mark_n;
+			if (!tq_n)
+				continue;
+			for (j = 0; (j != tq_n) && !mark; j++)
+				if (tqs[j] == (*flow->queues)[i])
+					mark = 1;
 		}
+		(*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
 	}
 free:
-	rte_free(flow->ibv_attr);
+	if (flow->drop) {
+		if (flow->drxq.ibv_flow)
+			claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+		rte_free(flow->drxq.ibv_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			struct mlx5_flow *frxq = &flow->frxq[i];
+
+			if (frxq->ibv_flow)
+				claim_zero(ibv_destroy_flow(frxq->ibv_flow));
+			if (frxq->hrxq)
+				mlx5_priv_hrxq_release(priv, frxq->hrxq);
+			if (frxq->ibv_attr)
+				rte_free(frxq->ibv_attr);
+		}
+	}
+	if (flow->cs) {
+		claim_zero(ibv_destroy_counter_set(flow->cs));
+		flow->cs = NULL;
+	}
+	TAILQ_REMOVE(list, flow, next);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
 }
 
 /**
- * Destroy a flow.
- *
- * @see rte_flow_destroy()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_destroy(struct rte_eth_dev *dev,
-		  struct rte_flow *flow,
-		  struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-
-	(void)error;
-	priv_lock(priv);
-	priv_flow_destroy(priv, flow);
-	priv_unlock(priv);
-	return 0;
-}
-
-/**
  * Destroy all flows.
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  */
-static void
-priv_flow_flush(struct priv *priv)
+void
+priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
 {
-	while (!TAILQ_EMPTY(&priv->flows)) {
+	while (!TAILQ_EMPTY(list)) {
 		struct rte_flow *flow;
 
-		flow = TAILQ_FIRST(&priv->flows);
-		priv_flow_destroy(priv, flow);
+		flow = TAILQ_FIRST(list);
+		priv_flow_destroy(priv, list, flow);
 	}
 }
 
 /**
- * Destroy all flows.
- *
- * @see rte_flow_flush()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_flush(struct rte_eth_dev *dev,
-		struct rte_flow_error *error)
-{
-	struct priv *priv = dev->data->dev_private;
-
-	(void)error;
-	priv_lock(priv);
-	priv_flow_flush(priv);
-	priv_unlock(priv);
-	return 0;
-}
-
-/**
  * Create drop queue.
  *
  * @param priv
@@ -1386,11 +2106,10 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
  * @return
  *   0 on success.
  */
-static int
+int
 priv_flow_create_drop_queue(struct priv *priv)
 {
-	struct rte_flow_drop *fdq = NULL;
-	unsigned int i;
+	struct mlx5_hrxq_drop *fdq = NULL;
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -1399,57 +2118,50 @@ priv_flow_create_drop_queue(struct priv *priv)
 		WARN("cannot allocate memory for drop queue");
 		goto error;
 	}
-	fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
-			&(struct ibv_exp_cq_init_attr){
-			.comp_mask = 0,
-			});
+	fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
 	if (!fdq->cq) {
 		WARN("cannot allocate CQ for drop queue");
 		goto error;
 	}
-	for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
-		fdq->wqs[i] = ibv_exp_create_wq(priv->ctx,
-				&(struct ibv_exp_wq_init_attr){
-				.wq_type = IBV_EXP_WQT_RQ,
-				.max_recv_wr = 1,
-				.max_recv_sge = 1,
-				.pd = priv->pd,
-				.cq = fdq->cq,
-				});
-		if (!fdq->wqs[i]) {
-			WARN("cannot allocate WQ for drop queue");
-			goto error;
-		}
-	}
-	fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
-			&(struct ibv_exp_rwq_ind_table_init_attr){
+	fdq->wq = ibv_create_wq(priv->ctx,
+			&(struct ibv_wq_init_attr){
+			.wq_type = IBV_WQT_RQ,
+			.max_wr = 1,
+			.max_sge = 1,
 			.pd = priv->pd,
+			.cq = fdq->cq,
+			});
+	if (!fdq->wq) {
+		WARN("cannot allocate WQ for drop queue");
+		goto error;
+	}
+	fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
+			&(struct ibv_rwq_ind_table_init_attr){
 			.log_ind_tbl_size = 0,
-			.ind_tbl = fdq->wqs,
+			.ind_tbl = &fdq->wq,
 			.comp_mask = 0,
 			});
 	if (!fdq->ind_table) {
 		WARN("cannot allocate indirection table for drop queue");
 		goto error;
 	}
-	fdq->qp = ibv_exp_create_qp(priv->ctx,
-		&(struct ibv_exp_qp_init_attr){
+	fdq->qp = ibv_create_qp_ex(priv->ctx,
+		&(struct ibv_qp_init_attr_ex){
 			.qp_type = IBV_QPT_RAW_PACKET,
 			.comp_mask =
-				IBV_EXP_QP_INIT_ATTR_PD |
-				IBV_EXP_QP_INIT_ATTR_PORT |
-				IBV_EXP_QP_INIT_ATTR_RX_HASH,
-			.pd = priv->pd,
-			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
+				IBV_QP_INIT_ATTR_PD |
+				IBV_QP_INIT_ATTR_IND_TABLE |
+				IBV_QP_INIT_ATTR_RX_HASH,
+			.rx_hash_conf = (struct ibv_rx_hash_conf){
 				.rx_hash_function =
-					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
+					IBV_RX_HASH_FUNC_TOEPLITZ,
 				.rx_hash_key_len = rss_hash_default_key_len,
 				.rx_hash_key = rss_hash_default_key,
 				.rx_hash_fields_mask = 0,
-				.rwq_ind_tbl = fdq->ind_table,
 				},
-			.port_num = priv->port,
-			});
+			.rwq_ind_tbl = fdq->ind_table,
+			.pd = priv->pd
+		});
 	if (!fdq->qp) {
 		WARN("cannot allocate QP for drop queue");
 		goto error;
@@ -1460,11 +2172,9 @@ error:
 	if (fdq->qp)
 		claim_zero(ibv_destroy_qp(fdq->qp));
 	if (fdq->ind_table)
-		claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
-	for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
-		if (fdq->wqs[i])
-			claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
-	}
+		claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
+	if (fdq->wq)
+		claim_zero(ibv_destroy_wq(fdq->wq));
 	if (fdq->cq)
 		claim_zero(ibv_destroy_cq(fdq->cq));
 	if (fdq)
@@ -1479,22 +2189,19 @@ error:
  * @param priv
  *   Pointer to private structure.
  */
-static void
+void
 priv_flow_delete_drop_queue(struct priv *priv)
 {
-	struct rte_flow_drop *fdq = priv->flow_drop_queue;
-	unsigned int i;
+	struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
 
 	if (!fdq)
 		return;
 	if (fdq->qp)
 		claim_zero(ibv_destroy_qp(fdq->qp));
 	if (fdq->ind_table)
-		claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
-	for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
-		if (fdq->wqs[i])
-			claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
-	}
+		claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
+	if (fdq->wq)
+		claim_zero(ibv_destroy_wq(fdq->wq));
 	if (fdq->cq)
 		claim_zero(ibv_destroy_cq(fdq->cq));
 	rte_free(fdq);
@@ -1504,28 +2211,49 @@ priv_flow_delete_drop_queue(struct priv *priv)
 /**
  * Remove all flows.
  *
- * Called by dev_stop() to remove all flows.
- *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  */
 void
-priv_flow_stop(struct priv *priv)
+priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
 {
 	struct rte_flow *flow;
 
-	TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
-		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
-		flow->ibv_flow = NULL;
+	TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
+		unsigned int i;
+
+		if (flow->drop) {
+			if (!flow->drxq.ibv_flow)
+				continue;
+			claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+			flow->drxq.ibv_flow = NULL;
+			/* Next flow. */
+			continue;
+		}
 		if (flow->mark) {
-			unsigned int n;
+			struct mlx5_ind_table_ibv *ind_tbl = NULL;
 
-			for (n = 0; n < flow->rxqs_n; ++n)
-				flow->rxqs[n]->mark = 0;
+			for (i = 0; i != hash_rxq_init_n; ++i) {
+				if (!flow->frxq[i].hrxq)
+					continue;
+				ind_tbl = flow->frxq[i].hrxq->ind_table;
+			}
+			assert(ind_tbl);
+			for (i = 0; i != ind_tbl->queues_n; ++i)
+				(*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
+		}
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->frxq[i].ibv_flow)
+				continue;
+			claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
+			flow->frxq[i].ibv_flow = NULL;
+			mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
+			flow->frxq[i].hrxq = NULL;
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
-	priv_flow_delete_drop_queue(priv);
 }
 
 /**
@@ -1533,75 +2261,321 @@ priv_flow_stop(struct priv *priv)
  *
  * @param priv
  *   Pointer to private structure.
+ * @param list
+ *   Pointer to a TAILQ flow list.
  *
  * @return
  *   0 on success, a errno value otherwise and rte_errno is set.
  */
 int
-priv_flow_start(struct priv *priv)
+priv_flow_start(struct priv *priv, struct mlx5_flows *list)
 {
-	int ret;
 	struct rte_flow *flow;
 
-	ret = priv_flow_create_drop_queue(priv);
-	if (ret)
-		return -1;
-	TAILQ_FOREACH(flow, &priv->flows, next) {
-		struct ibv_qp *qp;
+	TAILQ_FOREACH(flow, list, next) {
+		unsigned int i;
 
-		if (flow->drop)
-			qp = priv->flow_drop_queue->qp;
-		else
-			qp = flow->qp;
-		flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
-		if (!flow->ibv_flow) {
-			DEBUG("Flow %p cannot be applied", (void *)flow);
-			rte_errno = EINVAL;
-			return rte_errno;
+		if (flow->drop) {
+			flow->drxq.ibv_flow =
+				ibv_create_flow(priv->flow_drop_queue->qp,
+						flow->drxq.ibv_attr);
+			if (!flow->drxq.ibv_flow) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
+			}
+			DEBUG("Flow %p applied", (void *)flow);
+			/* Next flow. */
+			continue;
 		}
-		DEBUG("Flow %p applied", (void *)flow);
-		if (flow->mark) {
-			unsigned int n;
-
-			for (n = 0; n < flow->rxqs_n; ++n)
-				flow->rxqs[n]->mark = 1;
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (!flow->frxq[i].ibv_attr)
+				continue;
+			flow->frxq[i].hrxq =
+				mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
+						   flow->rss_conf.rss_key_len,
+						   hash_rxq_init[i].hash_fields,
+						   (*flow->queues),
+						   flow->queues_n);
+			if (flow->frxq[i].hrxq)
+				goto flow_create;
+			flow->frxq[i].hrxq =
+				mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
+						   flow->rss_conf.rss_key_len,
+						   hash_rxq_init[i].hash_fields,
+						   (*flow->queues),
+						   flow->queues_n);
+			if (!flow->frxq[i].hrxq) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
+			}
+flow_create:
+			flow->frxq[i].ibv_flow =
+				ibv_create_flow(flow->frxq[i].hrxq->qp,
+						flow->frxq[i].ibv_attr);
+			if (!flow->frxq[i].ibv_flow) {
+				DEBUG("Flow %p cannot be applied",
+				      (void *)flow);
+				rte_errno = EINVAL;
+				return rte_errno;
+			}
+			DEBUG("Flow %p applied", (void *)flow);
 		}
+		if (!flow->mark)
+			continue;
+		for (i = 0; i != flow->queues_n; ++i)
+			(*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
 	}
 	return 0;
 }
 
 /**
- * Verify if the Rx queue is used in a flow.
+ * Verify the flow list is empty
  *
  * @param priv
- *   Pointer to private structure.
- * @param rxq
- *   Pointer to the queue to search.
+ *  Pointer to private structure.
+ *
+ * @return the number of flows not released.
+ */
+int
+priv_flow_verify(struct priv *priv)
+{
+	struct rte_flow *flow;
+	int ret = 0;
+
+	TAILQ_FOREACH(flow, &priv->flows, next) {
+		DEBUG("%p: flow %p still referenced", (void *)priv,
+		      (void *)flow);
+		++ret;
+	}
+	return ret;
+}
+
+/**
+ * Enable a control flow configured from the control plane.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param eth_spec
+ *   An Ethernet flow spec to apply.
+ * @param eth_mask
+ *   An Ethernet flow mask to apply.
+ * @param vlan_spec
+ *   A VLAN flow spec to apply.
+ * @param vlan_mask
+ *   A VLAN flow mask to apply.
  *
  * @return
- *   Nonzero if the queue is used by a flow.
+ *   0 on success.
  */
 int
-priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
+mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
+		    struct rte_flow_item_eth *eth_spec,
+		    struct rte_flow_item_eth *eth_mask,
+		    struct rte_flow_item_vlan *vlan_spec,
+		    struct rte_flow_item_vlan *vlan_mask)
 {
+	struct priv *priv = dev->data->dev_private;
+	const struct rte_flow_attr attr = {
+		.ingress = 1,
+		.priority = MLX5_CTRL_FLOW_PRIORITY,
+	};
+	struct rte_flow_item items[] = {
+		{
+			.type = RTE_FLOW_ITEM_TYPE_ETH,
+			.spec = eth_spec,
+			.last = NULL,
+			.mask = eth_mask,
+		},
+		{
+			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
+				RTE_FLOW_ITEM_TYPE_END,
+			.spec = vlan_spec,
+			.last = NULL,
+			.mask = vlan_mask,
+		},
+		{
+			.type = RTE_FLOW_ITEM_TYPE_END,
+		},
+	};
+	struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_RSS,
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		},
+	};
 	struct rte_flow *flow;
+	struct rte_flow_error error;
+	unsigned int i;
+	union {
+		struct rte_flow_action_rss rss;
+		struct {
+			const struct rte_eth_rss_conf *rss_conf;
+			uint16_t num;
+			uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
+		} local;
+	} action_rss;
+
+	if (!priv->reta_idx_n)
+		return EINVAL;
+	for (i = 0; i != priv->reta_idx_n; ++i)
+		action_rss.local.queue[i] = (*priv->reta_idx)[i];
+	action_rss.local.rss_conf = &priv->rss_conf;
+	action_rss.local.num = priv->reta_idx_n;
+	actions[0].conf = (const void *)&action_rss.rss;
+	flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
+				&error);
+	if (!flow)
+		return rte_errno;
+	return 0;
+}
 
-	for (flow = TAILQ_FIRST(&priv->flows);
-	     flow;
-	     flow = TAILQ_NEXT(flow, next)) {
-		unsigned int n;
+/**
+ * Enable a flow control configured from the control plane.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param eth_spec
+ *   An Ethernet flow spec to apply.
+ * @param eth_mask
+ *   An Ethernet flow mask to apply.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+mlx5_ctrl_flow(struct rte_eth_dev *dev,
+	       struct rte_flow_item_eth *eth_spec,
+	       struct rte_flow_item_eth *eth_mask)
+{
+	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
+}
 
-		if (flow->drop)
-			continue;
-		for (n = 0; n < flow->rxqs_n; ++n) {
-			if (flow->rxqs[n] == rxq)
-				return 1;
-		}
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_destroy(priv, &priv->flows, flow);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	(void)error;
+	priv_lock(priv);
+	priv_flow_flush(priv, &priv->flows);
+	priv_unlock(priv);
+	return 0;
+}
+
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+/**
+ * Query flow counter.
+ *
+ * @param cs
+ *   the counter set.
+ * @param counter_value
+ *   returned data from the counter.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_query_count(struct ibv_counter_set *cs,
+		      struct mlx5_flow_counter_stats *counter_stats,
+		      struct rte_flow_query_count *query_count,
+		      struct rte_flow_error *error)
+{
+	uint64_t counters[2];
+	struct ibv_query_counter_set_attr query_cs_attr = {
+		.cs = cs,
+		.query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
+	};
+	struct ibv_counter_set_data query_out = {
+		.out = counters,
+		.outlen = 2 * sizeof(uint64_t),
+	};
+	int res = ibv_query_counter_set(&query_cs_attr, &query_out);
+
+	if (res) {
+		rte_flow_error_set(error, -res,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "cannot read counter");
+		return -res;
+	}
+	query_count->hits_set = 1;
+	query_count->bytes_set = 1;
+	query_count->hits = counters[0] - counter_stats->hits;
+	query_count->bytes = counters[1] - counter_stats->bytes;
+	if (query_count->reset) {
+		counter_stats->hits = counters[0];
+		counter_stats->bytes = counters[1];
 	}
 	return 0;
 }
 
 /**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_query(struct rte_eth_dev *dev,
+		struct rte_flow *flow,
+		enum rte_flow_action_type action __rte_unused,
+		void *data,
+		struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	int res = EINVAL;
+
+	priv_lock(priv);
+	if (flow->cs) {
+		res = priv_flow_query_count(flow->cs,
+					&flow->counter_stats,
+					(struct rte_flow_query_count *)data,
+					error);
+	} else {
+		rte_flow_error_set(error, res,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				   NULL,
+				   "no counter found for flow");
+	}
+	priv_unlock(priv);
+	return -res;
+}
+#endif
+
+/**
  * Isolated mode.
  *
  * @see rte_flow_isolate()
@@ -1615,7 +2589,7 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 	struct priv *priv = dev->data->dev_private;
 
 	priv_lock(priv);
-	if (priv->started) {
+	if (dev->data->dev_started) {
 		rte_flow_error_set(error, EBUSY,
 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 				   NULL,
@@ -1624,6 +2598,497 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 		return -rte_errno;
 	}
 	priv->isolated = !!enable;
+	if (enable)
+		priv->dev->dev_ops = &mlx5_dev_ops_isolate;
+	else
+		priv->dev->dev_ops = &mlx5_dev_ops;
 	priv_unlock(priv);
 	return 0;
 }
+
+/**
+ * Convert a flow director filter to a generic flow.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Flow director filter to add.
+ * @param attributes
+ *   Generic flow parameters structure.
+ *
+ * @return
+ *  0 on success, errno value on error.
+ */
+static int
+priv_fdir_filter_convert(struct priv *priv,
+			 const struct rte_eth_fdir_filter *fdir_filter,
+			 struct mlx5_fdir *attributes)
+{
+	const struct rte_eth_fdir_input *input = &fdir_filter->input;
+
+	/* Validate queue number. */
+	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
+		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
+		return EINVAL;
+	}
+	attributes->attr.ingress = 1;
+	attributes->items[0] = (struct rte_flow_item) {
+		.type = RTE_FLOW_ITEM_TYPE_ETH,
+		.spec = &attributes->l2,
+		.mask = &attributes->l2_mask,
+	};
+	switch (fdir_filter->action.behavior) {
+	case RTE_ETH_FDIR_ACCEPT:
+		attributes->actions[0] = (struct rte_flow_action){
+			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
+			.conf = &attributes->queue,
+		};
+		break;
+	case RTE_ETH_FDIR_REJECT:
+		attributes->actions[0] = (struct rte_flow_action){
+			.type = RTE_FLOW_ACTION_TYPE_DROP,
+		};
+		break;
+	default:
+		ERROR("invalid behavior %d", fdir_filter->action.behavior);
+		return ENOTSUP;
+	}
+	attributes->queue.index = fdir_filter->action.rx_queue;
+	switch (fdir_filter->input.flow_type) {
+	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
+		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+			.src_addr = input->flow.udp4_flow.ip.src_ip,
+			.dst_addr = input->flow.udp4_flow.ip.dst_ip,
+			.time_to_live = input->flow.udp4_flow.ip.ttl,
+			.type_of_service = input->flow.udp4_flow.ip.tos,
+			.next_proto_id = input->flow.udp4_flow.ip.proto,
+		};
+		attributes->l4.udp.hdr = (struct udp_hdr){
+			.src_port = input->flow.udp4_flow.src_port,
+			.dst_port = input->flow.udp4_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV4,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_UDP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
+		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+			.src_addr = input->flow.tcp4_flow.ip.src_ip,
+			.dst_addr = input->flow.tcp4_flow.ip.dst_ip,
+			.time_to_live = input->flow.tcp4_flow.ip.ttl,
+			.type_of_service = input->flow.tcp4_flow.ip.tos,
+			.next_proto_id = input->flow.tcp4_flow.ip.proto,
+		};
+		attributes->l4.tcp.hdr = (struct tcp_hdr){
+			.src_port = input->flow.tcp4_flow.src_port,
+			.dst_port = input->flow.tcp4_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV4,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_TCP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
+		attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+			.src_addr = input->flow.ip4_flow.src_ip,
+			.dst_addr = input->flow.ip4_flow.dst_ip,
+			.time_to_live = input->flow.ip4_flow.ttl,
+			.type_of_service = input->flow.ip4_flow.tos,
+			.next_proto_id = input->flow.ip4_flow.proto,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV4,
+			.spec = &attributes->l3,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
+		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+			.hop_limits = input->flow.udp6_flow.ip.hop_limits,
+			.proto = input->flow.udp6_flow.ip.proto,
+		};
+		memcpy(attributes->l3.ipv6.hdr.src_addr,
+		       input->flow.udp6_flow.ip.src_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		memcpy(attributes->l3.ipv6.hdr.dst_addr,
+		       input->flow.udp6_flow.ip.dst_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		attributes->l4.udp.hdr = (struct udp_hdr){
+			.src_port = input->flow.udp6_flow.src_port,
+			.dst_port = input->flow.udp6_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV6,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_UDP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
+		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+			.hop_limits = input->flow.tcp6_flow.ip.hop_limits,
+			.proto = input->flow.tcp6_flow.ip.proto,
+		};
+		memcpy(attributes->l3.ipv6.hdr.src_addr,
+		       input->flow.tcp6_flow.ip.src_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		memcpy(attributes->l3.ipv6.hdr.dst_addr,
+		       input->flow.tcp6_flow.ip.dst_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		attributes->l4.tcp.hdr = (struct tcp_hdr){
+			.src_port = input->flow.tcp6_flow.src_port,
+			.dst_port = input->flow.tcp6_flow.dst_port,
+		};
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV6,
+			.spec = &attributes->l3,
+		};
+		attributes->items[2] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_TCP,
+			.spec = &attributes->l4,
+		};
+		break;
+	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
+		attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+			.hop_limits = input->flow.ipv6_flow.hop_limits,
+			.proto = input->flow.ipv6_flow.proto,
+		};
+		memcpy(attributes->l3.ipv6.hdr.src_addr,
+		       input->flow.ipv6_flow.src_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		memcpy(attributes->l3.ipv6.hdr.dst_addr,
+		       input->flow.ipv6_flow.dst_ip,
+		       RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+		attributes->items[1] = (struct rte_flow_item){
+			.type = RTE_FLOW_ITEM_TYPE_IPV6,
+			.spec = &attributes->l3,
+		};
+		break;
+	default:
+		ERROR("invalid flow type%d",
+		      fdir_filter->input.flow_type);
+		return ENOTSUP;
+	}
+	return 0;
+}
+
+/**
+ * Add new flow director filter and store it in list.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Flow director filter to add.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_add(struct priv *priv,
+		     const struct rte_eth_fdir_filter *fdir_filter)
+{
+	struct mlx5_fdir attributes = {
+		.attr.group = 0,
+		.l2_mask = {
+			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.type = 0,
+		},
+	};
+	struct mlx5_flow_parse parser = {
+		.layer = HASH_RXQ_ETH,
+	};
+	struct rte_flow_error error;
+	struct rte_flow *flow;
+	int ret;
+
+	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+	if (ret)
+		return -ret;
+	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+				attributes.actions, &error, &parser);
+	if (ret)
+		return -ret;
+	flow = priv_flow_create(priv,
+				&priv->flows,
+				&attributes.attr,
+				attributes.items,
+				attributes.actions,
+				&error);
+	if (flow) {
+		DEBUG("FDIR created %p", (void *)flow);
+		return 0;
+	}
+	return ENOTSUP;
+}
+
+/**
+ * Delete specific filter.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Filter to be deleted.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_delete(struct priv *priv,
+			const struct rte_eth_fdir_filter *fdir_filter)
+{
+	struct mlx5_fdir attributes = {
+		.attr.group = 0,
+	};
+	struct mlx5_flow_parse parser = {
+		.create = 1,
+		.layer = HASH_RXQ_ETH,
+	};
+	struct rte_flow_error error;
+	struct rte_flow *flow;
+	unsigned int i;
+	int ret;
+
+	ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+	if (ret)
+		return -ret;
+	ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+				attributes.actions, &error, &parser);
+	if (ret)
+		goto exit;
+	/*
+	 * Special case for drop action which is only set in the
+	 * specifications when the flow is created.  In this situation the
+	 * drop specification is missing.
+	 */
+	if (parser.drop) {
+		struct ibv_flow_spec_action_drop *drop;
+
+		drop = (void *)((uintptr_t)parser.drop_q.ibv_attr +
+				parser.drop_q.offset);
+		*drop = (struct ibv_flow_spec_action_drop){
+			.type = IBV_FLOW_SPEC_ACTION_DROP,
+			.size = sizeof(struct ibv_flow_spec_action_drop),
+		};
+		parser.drop_q.ibv_attr->num_of_specs++;
+	}
+	TAILQ_FOREACH(flow, &priv->flows, next) {
+		struct ibv_flow_attr *attr;
+		struct ibv_spec_header *attr_h;
+		void *spec;
+		struct ibv_flow_attr *flow_attr;
+		struct ibv_spec_header *flow_h;
+		void *flow_spec;
+		unsigned int specs_n;
+
+		if (parser.drop)
+			attr = parser.drop_q.ibv_attr;
+		else
+			attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
+		if (flow->drop)
+			flow_attr = flow->drxq.ibv_attr;
+		else
+			flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
+		/* Compare first the attributes. */
+		if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
+			continue;
+		if (attr->num_of_specs == 0)
+			continue;
+		spec = (void *)((uintptr_t)attr +
+				sizeof(struct ibv_flow_attr));
+		flow_spec = (void *)((uintptr_t)flow_attr +
+				     sizeof(struct ibv_flow_attr));
+		specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
+		for (i = 0; i != specs_n; ++i) {
+			attr_h = spec;
+			flow_h = flow_spec;
+			if (memcmp(spec, flow_spec,
+				   RTE_MIN(attr_h->size, flow_h->size)))
+				continue;
+			spec = (void *)((uintptr_t)attr + attr_h->size);
+			flow_spec = (void *)((uintptr_t)flow_attr +
+					     flow_h->size);
+		}
+		/* At this point, the flow match. */
+		break;
+	}
+	if (flow)
+		priv_flow_destroy(priv, &priv->flows, flow);
+exit:
+	if (parser.drop) {
+		rte_free(parser.drop_q.ibv_attr);
+	} else {
+		for (i = 0; i != hash_rxq_init_n; ++i) {
+			if (parser.queue[i].ibv_attr)
+				rte_free(parser.queue[i].ibv_attr);
+		}
+	}
+	return -ret;
+}
+
+/**
+ * Update queue for specific filter.
+ *
+ * @param priv
+ *   Private structure.
+ * @param fdir_filter
+ *   Filter to be updated.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_update(struct priv *priv,
+			const struct rte_eth_fdir_filter *fdir_filter)
+{
+	int ret;
+
+	ret = priv_fdir_filter_delete(priv, fdir_filter);
+	if (ret)
+		return ret;
+	ret = priv_fdir_filter_add(priv, fdir_filter);
+	return ret;
+}
+
+/**
+ * Flush all filters.
+ *
+ * @param priv
+ *   Private structure.
+ */
+static void
+priv_fdir_filter_flush(struct priv *priv)
+{
+	priv_flow_flush(priv, &priv->flows);
+}
+
+/**
+ * Get flow director information.
+ *
+ * @param priv
+ *   Private structure.
+ * @param[out] fdir_info
+ *   Resulting flow director information.
+ */
+static void
+priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
+{
+	struct rte_eth_fdir_masks *mask =
+		&priv->dev->data->dev_conf.fdir_conf.mask;
+
+	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
+	fdir_info->guarant_spc = 0;
+	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
+	fdir_info->max_flexpayload = 0;
+	fdir_info->flow_types_mask[0] = 0;
+	fdir_info->flex_payload_unit = 0;
+	fdir_info->max_flex_payload_segment_num = 0;
+	fdir_info->flex_payload_limit = 0;
+	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
+}
+
+/**
+ * Deal with flow director operations.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
+{
+	enum rte_fdir_mode fdir_mode =
+		priv->dev->data->dev_conf.fdir_conf.mode;
+	int ret = 0;
+
+	if (filter_op == RTE_ETH_FILTER_NOP)
+		return 0;
+	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
+	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
+		ERROR("%p: flow director mode %d not supported",
+		      (void *)priv, fdir_mode);
+		return EINVAL;
+	}
+	switch (filter_op) {
+	case RTE_ETH_FILTER_ADD:
+		ret = priv_fdir_filter_add(priv, arg);
+		break;
+	case RTE_ETH_FILTER_UPDATE:
+		ret = priv_fdir_filter_update(priv, arg);
+		break;
+	case RTE_ETH_FILTER_DELETE:
+		ret = priv_fdir_filter_delete(priv, arg);
+		break;
+	case RTE_ETH_FILTER_FLUSH:
+		priv_fdir_filter_flush(priv);
+		break;
+	case RTE_ETH_FILTER_INFO:
+		priv_fdir_info_get(priv, arg);
+		break;
+	default:
+		DEBUG("%p: unknown operation %u", (void *)priv,
+		      filter_op);
+		ret = EINVAL;
+		break;
+	}
+	return ret;
+}
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param filter_type
+ *   Filter type.
+ * @param filter_op
+ *   Operation to perform.
+ * @param arg
+ *   Pointer to operation-specific structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+		     enum rte_filter_type filter_type,
+		     enum rte_filter_op filter_op,
+		     void *arg)
+{
+	int ret = EINVAL;
+	struct priv *priv = dev->data->dev_private;
+
+	switch (filter_type) {
+	case RTE_ETH_FILTER_GENERIC:
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &mlx5_flow_ops;
+		return 0;
+	case RTE_ETH_FILTER_FDIR:
+		priv_lock(priv);
+		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
+		priv_unlock(priv);
+		break;
+	default:
+		ERROR("%p: filter type (%d) not supported",
+		      (void *)dev, filter_type);
+		break;
+	}
+	return -ret;
+}
diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c
index 8489ea67..d17b991e 100644
--- a/drivers/net/mlx5/mlx5_mac.c
+++ b/drivers/net/mlx5/mlx5_mac.c
@@ -51,16 +51,9 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_common.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5.h"
 #include "mlx5_utils.h"
@@ -90,112 +83,6 @@ priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
 }
 
 /**
- * Delete MAC flow steering rule.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index.
- * @param vlan_index
- *   VLAN index to use.
- */
-static void
-hash_rxq_del_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
-		      unsigned int vlan_index)
-{
-#ifndef NDEBUG
-	const uint8_t (*mac)[ETHER_ADDR_LEN] =
-		(const uint8_t (*)[ETHER_ADDR_LEN])
-		hash_rxq->priv->mac[mac_index].addr_bytes;
-#endif
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
-	if (hash_rxq->mac_flow[mac_index][vlan_index] == NULL)
-		return;
-	DEBUG("%p: removing MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
-	      " VLAN index %u",
-	      (void *)hash_rxq,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
-	      mac_index,
-	      vlan_index);
-	claim_zero(ibv_exp_destroy_flow(hash_rxq->mac_flow
-					[mac_index][vlan_index]));
-	hash_rxq->mac_flow[mac_index][vlan_index] = NULL;
-}
-
-/**
- * Unregister a MAC address from a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index.
- */
-static void
-hash_rxq_mac_addr_del(struct hash_rxq *hash_rxq, unsigned int mac_index)
-{
-	unsigned int i;
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow[mac_index])); ++i)
-		hash_rxq_del_mac_flow(hash_rxq, mac_index, i);
-}
-
-/**
- * Unregister all MAC addresses from a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- */
-void
-hash_rxq_mac_addrs_del(struct hash_rxq *hash_rxq)
-{
-	unsigned int i;
-
-	for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow)); ++i)
-		hash_rxq_mac_addr_del(hash_rxq, i);
-}
-
-/**
- * Unregister a MAC address.
- *
- * This is done for each hash RX queue.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mac_index
- *   MAC address index.
- */
-static void
-priv_mac_addr_del(struct priv *priv, unsigned int mac_index)
-{
-	unsigned int i;
-
-	assert(mac_index < RTE_DIM(priv->mac));
-	if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
-		return;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i)
-		hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[i], mac_index);
-	BITFIELD_RESET(priv->mac_configured, mac_index);
-}
-
-/**
- * Unregister all MAC addresses from all hash RX queues.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void
-priv_mac_addrs_disable(struct priv *priv)
-{
-	unsigned int i;
-
-	for (i = 0; (i != priv->hash_rxqs_n); ++i)
-		hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[i]);
-}
-
-/**
  * DPDK callback to remove a MAC address.
  *
  * @param dev
@@ -206,258 +93,12 @@ priv_mac_addrs_disable(struct priv *priv)
 void
 mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 {
-	struct priv *priv = dev->data->dev_private;
-
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	DEBUG("%p: removing MAC address from index %" PRIu32,
-	      (void *)dev, index);
-	if (index >= RTE_DIM(priv->mac))
-		goto end;
-	priv_mac_addr_del(priv, index);
-end:
-	priv_unlock(priv);
-}
-
-/**
- * Add MAC flow steering rule.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index to register.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_add_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
-		      unsigned int vlan_index)
-{
-	struct ibv_exp_flow *flow;
-	struct priv *priv = hash_rxq->priv;
-	const uint8_t (*mac)[ETHER_ADDR_LEN] =
-			(const uint8_t (*)[ETHER_ADDR_LEN])
-			priv->mac[mac_index].addr_bytes;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type));
-	struct ibv_exp_flow_attr *attr = &data->attr;
-	struct ibv_exp_flow_spec_eth *spec = &data->spec;
-	unsigned int vlan_enabled = !!priv->vlan_filter_n;
-	unsigned int vlan_id = priv->vlan_filter[vlan_index];
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
-	if (hash_rxq->mac_flow[mac_index][vlan_index] != NULL)
-		return 0;
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
-	priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type);
-	/* The first specification must be Ethernet. */
-	assert(spec->type == IBV_EXP_FLOW_SPEC_ETH);
-	assert(spec->size == sizeof(*spec));
-	*spec = (struct ibv_exp_flow_spec_eth){
-		.type = IBV_EXP_FLOW_SPEC_ETH,
-		.size = sizeof(*spec),
-		.val = {
-			.dst_mac = {
-				(*mac)[0], (*mac)[1], (*mac)[2],
-				(*mac)[3], (*mac)[4], (*mac)[5]
-			},
-			.vlan_tag = (vlan_enabled ? htons(vlan_id) : 0),
-		},
-		.mask = {
-			.dst_mac = "\xff\xff\xff\xff\xff\xff",
-			.vlan_tag = (vlan_enabled ? htons(0xfff) : 0),
-		},
-	};
-	DEBUG("%p: adding MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
-	      " VLAN index %u filtering %s, ID %u",
-	      (void *)hash_rxq,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
-	      mac_index,
-	      vlan_index,
-	      (vlan_enabled ? "enabled" : "disabled"),
-	      vlan_id);
-	/* Create related flow. */
-	errno = 0;
-	flow = ibv_exp_create_flow(hash_rxq->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)hash_rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	hash_rxq->mac_flow[mac_index][vlan_index] = flow;
-	return 0;
-}
-
-/**
- * Register a MAC address in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param mac_index
- *   MAC address index to register.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_mac_addr_add(struct hash_rxq *hash_rxq, unsigned int mac_index)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i = 0;
-	int ret;
-
-	assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
-	assert(RTE_DIM(hash_rxq->mac_flow[mac_index]) ==
-	       RTE_DIM(priv->vlan_filter));
-	/* Add a MAC address for each VLAN filter, or at least once. */
-	do {
-		ret = hash_rxq_add_mac_flow(hash_rxq, mac_index, i);
-		if (ret) {
-			/* Failure, rollback. */
-			while (i != 0)
-				hash_rxq_del_mac_flow(hash_rxq, mac_index,
-						      --i);
-			return ret;
-		}
-	} while (++i < priv->vlan_filter_n);
-	return 0;
-}
-
-/**
- * Register all MAC addresses in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-hash_rxq_mac_addrs_add(struct hash_rxq *hash_rxq)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i;
-	int ret;
-
-	assert(RTE_DIM(priv->mac) == RTE_DIM(hash_rxq->mac_flow));
-	for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
-		if (!BITFIELD_ISSET(priv->mac_configured, i))
-			continue;
-		ret = hash_rxq_mac_addr_add(hash_rxq, i);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addr_del(hash_rxq, --i);
-		assert(ret > 0);
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Register a MAC address.
- *
- * This is done for each hash RX queue.
- *
- * @param priv
- *   Pointer to private structure.
- * @param mac_index
- *   MAC address index to use.
- * @param mac
- *   MAC address to register.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_mac_addr_add(struct priv *priv, unsigned int mac_index,
-		  const uint8_t (*mac)[ETHER_ADDR_LEN])
-{
-	unsigned int i;
-	int ret;
-
-	assert(mac_index < RTE_DIM(priv->mac));
-	/* First, make sure this address isn't already configured. */
-	for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
-		/* Skip this index, it's going to be reconfigured. */
-		if (i == mac_index)
-			continue;
-		if (!BITFIELD_ISSET(priv->mac_configured, i))
-			continue;
-		if (memcmp(priv->mac[i].addr_bytes, *mac, sizeof(*mac)))
-			continue;
-		/* Address already configured elsewhere, return with error. */
-		return EADDRINUSE;
-	}
-	if (BITFIELD_ISSET(priv->mac_configured, mac_index))
-		priv_mac_addr_del(priv, mac_index);
-	priv->mac[mac_index] = (struct ether_addr){
-		{
-			(*mac)[0], (*mac)[1], (*mac)[2],
-			(*mac)[3], (*mac)[4], (*mac)[5]
-		}
-	};
-	if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		goto end;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		ret = hash_rxq_mac_addr_add(&(*priv->hash_rxqs)[i], mac_index);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[--i],
-					      mac_index);
-		return ret;
-	}
-end:
-	BITFIELD_SET(priv->mac_configured, mac_index);
-	return 0;
-}
-
-/**
- * Register all MAC addresses in all hash RX queues.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_mac_addrs_enable(struct priv *priv)
-{
-	unsigned int i;
-	int ret;
-
-	if (priv->isolated)
-		return 0;
-	if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		return 0;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		ret = hash_rxq_mac_addrs_add(&(*priv->hash_rxqs)[i]);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0)
-			hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[--i]);
-		assert(ret > 0);
-		return ret;
-	}
-	return 0;
+	assert(index < MLX5_MAX_MAC_ADDRESSES);
+	memset(&dev->data->mac_addrs[index], 0, sizeof(struct ether_addr));
+	if (!dev->data->promiscuous && !dev->data->all_multicast)
+		mlx5_traffic_restart(dev);
 }
 
 /**
@@ -471,31 +112,35 @@ priv_mac_addrs_enable(struct priv *priv)
  *   MAC address index.
  * @param vmdq
  *   VMDq pool index to associate address with (ignored).
+ *
+ * @return
+ *   0 on success.
  */
 int
-mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
 		  uint32_t index, uint32_t vmdq)
 {
-	struct priv *priv = dev->data->dev_private;
-	int re;
-
-	if (mlx5_is_secondary())
-		return -ENOTSUP;
+	unsigned int i;
+	int ret = 0;
 
 	(void)vmdq;
-	priv_lock(priv);
-	DEBUG("%p: adding MAC address at index %" PRIu32,
-	      (void *)dev, index);
-	if (index >= RTE_DIM(priv->mac)) {
-		re = EINVAL;
-		goto end;
+	if (mlx5_is_secondary())
+		return 0;
+	assert(index < MLX5_MAX_MAC_ADDRESSES);
+	/* First, make sure this address isn't already configured. */
+	for (i = 0; (i != MLX5_MAX_MAC_ADDRESSES); ++i) {
+		/* Skip this index, it's going to be reconfigured. */
+		if (i == index)
+			continue;
+		if (memcmp(&dev->data->mac_addrs[i], mac, sizeof(*mac)))
+			continue;
+		/* Address already configured elsewhere, return with error. */
+		return EADDRINUSE;
 	}
-	re = priv_mac_addr_add(priv, index,
-			       (const uint8_t (*)[ETHER_ADDR_LEN])
-			       mac_addr->addr_bytes);
-end:
-	priv_unlock(priv);
-	return -re;
+	dev->data->mac_addrs[index] = *mac;
+	if (!dev->data->promiscuous && !dev->data->all_multicast)
+		mlx5_traffic_restart(dev);
+	return ret;
 }
 
 /**
@@ -509,7 +154,8 @@ end:
 void
 mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 {
+	if (mlx5_is_secondary())
+		return;
 	DEBUG("%p: setting primary MAC address", (void *)dev);
-	mlx5_mac_addr_remove(dev, 0);
 	mlx5_mac_addr_add(dev, mac_addr, 0, 0);
 }
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 28733517..6b29eed5 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -41,14 +41,8 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_mempool.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
+#include <rte_malloc.h>
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
@@ -118,59 +112,13 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
 }
 
 /**
- * Register mempool as a memory region.
- *
- * @param pd
- *   Pointer to protection domain.
- * @param mp
- *   Pointer to memory pool.
- *
- * @return
- *   Memory region pointer, NULL in case of error.
- */
-struct ibv_mr *
-mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
-{
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	uintptr_t start;
-	uintptr_t end;
-	unsigned int i;
-
-	if (mlx5_check_mempool(mp, &start, &end) != 0) {
-		ERROR("mempool %p: not virtually contiguous",
-		      (void *)mp);
-		return NULL;
-	}
-
-	DEBUG("mempool %p area start=%p end=%p size=%zu",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	/* Round start and end to page boundary if found in memory segments. */
-	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
-		uintptr_t addr = (uintptr_t)ms[i].addr;
-		size_t len = ms[i].len;
-		unsigned int align = ms[i].hugepage_sz;
-
-		if ((start > addr) && (start < addr + len))
-			start = RTE_ALIGN_FLOOR(start, align);
-		if ((end > addr) && (end < addr + len))
-			end = RTE_ALIGN_CEIL(end, align);
-	}
-	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
-	      (void *)mp, (void *)start, (void *)end,
-	      (size_t)(end - start));
-	return ibv_reg_mr(pd,
-			  (void *)start,
-			  end - start,
-			  IBV_ACCESS_LOCAL_WRITE);
-}
-
-/**
  * Register a Memory Region (MR) <-> Memory Pool (MP) association in
  * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
  *
  * This function should only be called by txq_mp2mr().
  *
+ * @param priv
+ *   Pointer to private structure.
  * @param txq
  *   Pointer to TX queue structure.
  * @param[in] mp
@@ -179,45 +127,75 @@ mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
  *   Index of the next available entry.
  *
  * @return
- *   mr->lkey on success, (uint32_t)-1 on failure.
+ *   mr on success, NULL on failure.
  */
-uint32_t
-txq_mp2mr_reg(struct txq *txq, struct rte_mempool *mp, unsigned int idx)
+struct mlx5_mr*
+priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *txq,
+		   struct rte_mempool *mp, unsigned int idx)
 {
-	struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
-	struct ibv_mr *mr;
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
+	struct mlx5_mr *mr;
 
 	/* Add a new entry, register MR first. */
 	DEBUG("%p: discovered new memory pool \"%s\" (%p)",
 	      (void *)txq_ctrl, mp->name, (void *)mp);
-	mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp);
+	mr = priv_mr_get(priv, mp);
+	if (mr == NULL)
+		mr = priv_mr_new(priv, mp);
 	if (unlikely(mr == NULL)) {
 		DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
 		      (void *)txq_ctrl);
-		return (uint32_t)-1;
+		return NULL;
 	}
-	if (unlikely(idx == RTE_DIM(txq_ctrl->txq.mp2mr))) {
+	if (unlikely(idx == RTE_DIM(txq->mp2mr))) {
 		/* Table is full, remove oldest entry. */
 		DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
 		      (void *)txq_ctrl);
 		--idx;
-		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[0].mr));
-		memmove(&txq_ctrl->txq.mp2mr[0], &txq_ctrl->txq.mp2mr[1],
-			(sizeof(txq_ctrl->txq.mp2mr) -
-			 sizeof(txq_ctrl->txq.mp2mr[0])));
+		priv_mr_release(priv, txq->mp2mr[0]);
+		memmove(&txq->mp2mr[0], &txq->mp2mr[1],
+			(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
 	}
 	/* Store the new entry. */
-	txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr;
-	txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length;
-	txq_ctrl->txq.mp2mr[idx].mr = mr;
-	txq_ctrl->txq.mp2mr[idx].lkey = htonl(mr->lkey);
+	txq_ctrl->txq.mp2mr[idx] = mr;
 	DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
 	      (void *)txq_ctrl, mp->name, (void *)mp,
-	      txq_ctrl->txq.mp2mr[idx].lkey);
-	return txq_ctrl->txq.mp2mr[idx].lkey;
+	      txq_ctrl->txq.mp2mr[idx]->lkey);
+	return mr;
+}
+
+/**
+ * Register a Memory Region (MR) <-> Memory Pool (MP) association in
+ * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
+ *
+ * This function should only be called by txq_mp2mr().
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param[in] mp
+ *   Memory Pool for which a Memory Region lkey must be returned.
+ * @param idx
+ *   Index of the next available entry.
+ *
+ * @return
+ *   mr on success, NULL on failure.
+ */
+struct mlx5_mr*
+mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
+		   unsigned int idx)
+{
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
+	struct mlx5_mr *mr;
+
+	priv_lock(txq_ctrl->priv);
+	mr = priv_txq_mp2mr_reg(txq_ctrl->priv, txq, mp, idx);
+	priv_unlock(txq_ctrl->priv);
+	return mr;
 }
 
-struct txq_mp2mr_mbuf_check_data {
+struct mlx5_mp2mr_mbuf_check_data {
 	int ret;
 };
 
@@ -239,7 +217,7 @@ static void
 txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
 	uint32_t index __rte_unused)
 {
-	struct txq_mp2mr_mbuf_check_data *data = arg;
+	struct mlx5_mp2mr_mbuf_check_data *data = arg;
 	struct rte_mbuf *buf = obj;
 
 	/*
@@ -260,35 +238,158 @@ txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
  *   Pointer to TX queue structure.
  */
 void
-txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
+mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
 {
-	struct txq_ctrl *txq_ctrl = arg;
-	struct txq_mp2mr_mbuf_check_data data = {
+	struct priv *priv = (struct priv *)arg;
+	struct mlx5_mp2mr_mbuf_check_data data = {
 		.ret = 0,
 	};
-	uintptr_t start;
-	uintptr_t end;
-	unsigned int i;
+	struct mlx5_mr *mr;
 
 	/* Register mempool only if the first element looks like a mbuf. */
 	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
 			data.ret == -1)
 		return;
+	mr = priv_mr_get(priv, mp);
+	if (mr) {
+		priv_mr_release(priv, mr);
+		return;
+	}
+	priv_mr_new(priv, mp);
+}
+
+/**
+ * Register a new memory region from the mempool and store it in the memory
+ * region list.
+ *
+ * @param  priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @return
+ *   The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_new(struct priv *priv, struct rte_mempool *mp)
+{
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+	uintptr_t start;
+	uintptr_t end;
+	unsigned int i;
+	struct mlx5_mr *mr;
+
+	mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
+	if (!mr) {
+		DEBUG("unable to configure MR, ibv_reg_mr() failed.");
+		return NULL;
+	}
 	if (mlx5_check_mempool(mp, &start, &end) != 0) {
 		ERROR("mempool %p: not virtually contiguous",
 		      (void *)mp);
-		return;
+		return NULL;
 	}
-	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
+	DEBUG("mempool %p area start=%p end=%p size=%zu",
+	      (void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	/* Round start and end to page boundary if found in memory segments. */
+	for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+		uintptr_t addr = (uintptr_t)ms[i].addr;
+		size_t len = ms[i].len;
+		unsigned int align = ms[i].hugepage_sz;
 
-		if (unlikely(mr == NULL)) {
-			/* Unknown MP, add a new MR for it. */
-			break;
+		if ((start > addr) && (start < addr + len))
+			start = RTE_ALIGN_FLOOR(start, align);
+		if ((end > addr) && (end < addr + len))
+			end = RTE_ALIGN_CEIL(end, align);
+	}
+	DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+	      (void *)mp, (void *)start, (void *)end,
+	      (size_t)(end - start));
+	mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
+			    IBV_ACCESS_LOCAL_WRITE);
+	mr->mp = mp;
+	mr->lkey = rte_cpu_to_be_32(mr->mr->lkey);
+	mr->start = start;
+	mr->end = (uintptr_t)mr->mr->addr + mr->mr->length;
+	rte_atomic32_inc(&mr->refcnt);
+	DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv,
+	      (void *)mr, rte_atomic32_read(&mr->refcnt));
+	LIST_INSERT_HEAD(&priv->mr, mr, next);
+	return mr;
+}
+
+/**
+ * Search the memory region object in the memory region list.
+ *
+ * @param  priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @return
+ *   The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_get(struct priv *priv, struct rte_mempool *mp)
+{
+	struct mlx5_mr *mr;
+
+	assert(mp);
+	if (LIST_EMPTY(&priv->mr))
+		return NULL;
+	LIST_FOREACH(mr, &priv->mr, next) {
+		if (mr->mp == mp) {
+			rte_atomic32_inc(&mr->refcnt);
+			DEBUG("Memory Region %p refcnt: %d",
+			      (void *)mr, rte_atomic32_read(&mr->refcnt));
+			return mr;
 		}
-		if (start >= (uintptr_t)mr->addr &&
-		    end <= (uintptr_t)mr->addr + mr->length)
-			return;
 	}
-	txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
+	return NULL;
+}
+
+/**
+ * Release the memory region object.
+ *
+ * @param  mr
+ *   Pointer to memory region to release.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+priv_mr_release(struct priv *priv, struct mlx5_mr *mr)
+{
+	(void)priv;
+	assert(mr);
+	DEBUG("Memory Region %p refcnt: %d",
+	      (void *)mr, rte_atomic32_read(&mr->refcnt));
+	if (rte_atomic32_dec_and_test(&mr->refcnt)) {
+		claim_zero(ibv_dereg_mr(mr->mr));
+		LIST_REMOVE(mr, next);
+		rte_free(mr);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+priv_mr_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_mr *mr;
+
+	LIST_FOREACH(mr, &priv->mr, next) {
+		DEBUG("%p: mr %p still referenced", (void *)priv,
+		      (void *)mr);
+		++ret;
+	}
+	return ret;
 }
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 608072f7..2de310bc 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -41,7 +41,7 @@
 #ifdef PEDANTIC
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
-#include <infiniband/mlx5_hw.h>
+#include <infiniband/mlx5dv.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -89,9 +89,6 @@
 /* Default max packet length to be inlined. */
 #define MLX5_EMPW_MAX_INLINE_LEN (4U * MLX5_WQE_SIZE)
 
-#ifndef HAVE_VERBS_MLX5_OPCODE_TSO
-#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */
-#endif
 
 #define MLX5_OPC_MOD_ENHANCED_MPSW 0
 #define MLX5_OPCODE_ENHANCED_MPSW 0x29
@@ -154,6 +151,9 @@
 /* Default mark value used when none is provided. */
 #define MLX5_FLOW_MARK_DEFAULT 0xffffff
 
+/* Maximum number of DS in WQE. */
+#define MLX5_DSEG_MAX 63
+
 /* Subset of struct mlx5_wqe_eth_seg. */
 struct mlx5_wqe_eth_seg_small {
 	uint32_t rsvd0;
@@ -244,6 +244,46 @@ struct mlx5_cqe {
 	uint8_t op_own;
 };
 
+/* Adding direct verbs to data-path. */
+
+/* CQ sequence number mask. */
+#define MLX5_CQ_SQN_MASK 0x3
+
+/* CQ sequence number index. */
+#define MLX5_CQ_SQN_OFFSET 28
+
+/* CQ doorbell index mask. */
+#define MLX5_CI_MASK 0xffffff
+
+/* CQ doorbell offset. */
+#define MLX5_CQ_ARM_DB 1
+
+/* CQ doorbell offset*/
+#define MLX5_CQ_DOORBELL 0x20
+
+/* CQE format value. */
+#define MLX5_COMPRESSED 0x3
+
+/* CQE format mask. */
+#define MLX5E_CQE_FORMAT_MASK 0xc
+
+/* MPW opcode. */
+#define MLX5_OPC_MOD_MPW 0x01
+
+/* Compressed Rx CQE structure. */
+struct mlx5_mini_cqe8 {
+	union {
+		uint32_t rx_hash_result;
+		uint32_t checksum;
+		struct {
+			uint16_t wqe_counter;
+			uint8_t  s_wqe_opcode;
+			uint8_t  reserved;
+		} s_wqe_info;
+	};
+	uint32_t byte_cnt;
+};
+
 /**
  * Convert a user mark to flow mark.
  *
diff --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index a2dd7d17..f3de46de 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -47,88 +47,13 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
 
 /**
- * Get a RSS configuration hash key.
- *
- * @param priv
- *   Pointer to private structure.
- * @param rss_hf
- *   RSS hash functions configuration must be retrieved for.
- *
- * @return
- *   Pointer to a RSS configuration structure or NULL if rss_hf cannot
- *   be matched.
- */
-static struct rte_eth_rss_conf *
-rss_hash_get(struct priv *priv, uint64_t rss_hf)
-{
-	unsigned int i;
-
-	for (i = 0; (i != hash_rxq_init_n); ++i) {
-		uint64_t dpdk_rss_hf = hash_rxq_init[i].dpdk_rss_hf;
-
-		if (!(dpdk_rss_hf & rss_hf))
-			continue;
-		return (*priv->rss_conf)[i];
-	}
-	return NULL;
-}
-
-/**
- * Register a RSS key.
- *
- * @param priv
- *   Pointer to private structure.
- * @param key
- *   Hash key to register.
- * @param key_len
- *   Hash key length in bytes.
- * @param rss_hf
- *   RSS hash functions the provided key applies to.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-rss_hash_rss_conf_new_key(struct priv *priv, const uint8_t *key,
-			  unsigned int key_len, uint64_t rss_hf)
-{
-	unsigned int i;
-
-	for (i = 0; (i != hash_rxq_init_n); ++i) {
-		struct rte_eth_rss_conf *rss_conf;
-		uint64_t dpdk_rss_hf = hash_rxq_init[i].dpdk_rss_hf;
-
-		if (!(dpdk_rss_hf & rss_hf))
-			continue;
-		rss_conf = rte_realloc((*priv->rss_conf)[i],
-				       (sizeof(*rss_conf) + key_len),
-				       0);
-		if (!rss_conf)
-			return ENOMEM;
-		rss_conf->rss_key = (void *)(rss_conf + 1);
-		rss_conf->rss_key_len = key_len;
-		rss_conf->rss_hf = dpdk_rss_hf;
-		memcpy(rss_conf->rss_key, key, key_len);
-		(*priv->rss_conf)[i] = rss_conf;
-	}
-	return 0;
-}
-
-/**
  * DPDK callback to update the RSS hash configuration.
  *
  * @param dev
@@ -144,23 +69,24 @@ mlx5_rss_hash_update(struct rte_eth_dev *dev,
 		     struct rte_eth_rss_conf *rss_conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	int err = 0;
+	int ret = 0;
 
 	priv_lock(priv);
-
-	assert(priv->rss_conf != NULL);
-
-	/* Apply configuration. */
-	if (rss_conf->rss_key)
-		err = rss_hash_rss_conf_new_key(priv,
-						rss_conf->rss_key,
-						rss_conf->rss_key_len,
-						rss_conf->rss_hf);
-	/* Store protocols for which RSS is enabled. */
-	priv->rss_hf = rss_conf->rss_hf;
+	if (rss_conf->rss_key && rss_conf->rss_key_len) {
+		priv->rss_conf.rss_key = rte_realloc(priv->rss_conf.rss_key,
+						     rss_conf->rss_key_len, 0);
+		if (!priv->rss_conf.rss_key) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		memcpy(priv->rss_conf.rss_key, rss_conf->rss_key,
+		       rss_conf->rss_key_len);
+		priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
+	}
+	priv->rss_conf.rss_hf = rss_conf->rss_hf;
+out:
 	priv_unlock(priv);
-	assert(err >= 0);
-	return -err;
+	return ret;
 }
 
 /**
@@ -179,26 +105,17 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
 		       struct rte_eth_rss_conf *rss_conf)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rte_eth_rss_conf *priv_rss_conf;
 
-	priv_lock(priv);
-
-	assert(priv->rss_conf != NULL);
-
-	priv_rss_conf = rss_hash_get(priv, rss_conf->rss_hf);
-	if (!priv_rss_conf) {
-		rss_conf->rss_hf = 0;
-		priv_unlock(priv);
+	if (!rss_conf)
 		return -EINVAL;
-	}
+	priv_lock(priv);
 	if (rss_conf->rss_key &&
-	    rss_conf->rss_key_len >= priv_rss_conf->rss_key_len)
-		memcpy(rss_conf->rss_key,
-		       priv_rss_conf->rss_key,
-		       priv_rss_conf->rss_key_len);
-	rss_conf->rss_key_len = priv_rss_conf->rss_key_len;
-	rss_conf->rss_hf = priv_rss_conf->rss_hf;
-
+	    (rss_conf->rss_key_len >= priv->rss_conf.rss_key_len)) {
+		memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
+		       priv->rss_conf.rss_key_len);
+	}
+	rss_conf->rss_key_len = priv->rss_conf.rss_key_len;
+	rss_conf->rss_hf = priv->rss_conf.rss_hf;
 	priv_unlock(priv);
 	return 0;
 }
@@ -357,11 +274,13 @@ mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
 	int ret;
 	struct priv *priv = dev->data->dev_private;
 
-	mlx5_dev_stop(dev);
+	assert(!mlx5_is_secondary());
 	priv_lock(priv);
 	ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
 	priv_unlock(priv);
-	if (ret)
-		return -ret;
-	return mlx5_dev_start(dev);
+	if (dev->data->dev_started) {
+		mlx5_dev_stop(dev);
+		mlx5_dev_start(dev);
+	}
+	return -ret;
 }
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index a67e5426..0ef2cdf0 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -45,343 +45,12 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_ethdev.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
-/* Initialization data for special flows. */
-static const struct special_flow_init special_flow_init[] = {
-	[HASH_RXQ_FLOW_TYPE_PROMISC] = {
-		.dst_mac_val = "\x00\x00\x00\x00\x00\x00",
-		.dst_mac_mask = "\x00\x00\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_TCPV4 |
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_TCPV6 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 0,
-	},
-	[HASH_RXQ_FLOW_TYPE_ALLMULTI] = {
-		.dst_mac_val = "\x01\x00\x00\x00\x00\x00",
-		.dst_mac_mask = "\x01\x00\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 0,
-	},
-	[HASH_RXQ_FLOW_TYPE_BROADCAST] = {
-		.dst_mac_val = "\xff\xff\xff\xff\xff\xff",
-		.dst_mac_mask = "\xff\xff\xff\xff\xff\xff",
-		.hash_types =
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 1,
-	},
-	[HASH_RXQ_FLOW_TYPE_IPV6MULTI] = {
-		.dst_mac_val = "\x33\x33\x00\x00\x00\x00",
-		.dst_mac_mask = "\xff\xff\x00\x00\x00\x00",
-		.hash_types =
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			1 << HASH_RXQ_ETH |
-			0,
-		.per_vlan = 1,
-	},
-};
-
-/**
- * Enable a special flow in a hash RX queue for a given VLAN index.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_special_flow_enable_vlan(struct hash_rxq *hash_rxq,
-				  enum hash_rxq_flow_type flow_type,
-				  unsigned int vlan_index)
-{
-	struct priv *priv = hash_rxq->priv;
-	struct ibv_exp_flow *flow;
-	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type));
-	struct ibv_exp_flow_attr *attr = &data->attr;
-	struct ibv_exp_flow_spec_eth *spec = &data->spec;
-	const uint8_t *mac;
-	const uint8_t *mask;
-	unsigned int vlan_enabled = (priv->vlan_filter_n &&
-				     special_flow_init[flow_type].per_vlan);
-	unsigned int vlan_id = priv->vlan_filter[vlan_index];
-
-	/* Check if flow is relevant for this hash_rxq. */
-	if (!(special_flow_init[flow_type].hash_types & (1 << hash_rxq->type)))
-		return 0;
-	/* Check if flow already exists. */
-	if (hash_rxq->special_flow[flow_type][vlan_index] != NULL)
-		return 0;
-
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
-	priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type);
-	/* The first specification must be Ethernet. */
-	assert(spec->type == IBV_EXP_FLOW_SPEC_ETH);
-	assert(spec->size == sizeof(*spec));
-
-	mac = special_flow_init[flow_type].dst_mac_val;
-	mask = special_flow_init[flow_type].dst_mac_mask;
-	*spec = (struct ibv_exp_flow_spec_eth){
-		.type = IBV_EXP_FLOW_SPEC_ETH,
-		.size = sizeof(*spec),
-		.val = {
-			.dst_mac = {
-				mac[0], mac[1], mac[2],
-				mac[3], mac[4], mac[5],
-			},
-			.vlan_tag = (vlan_enabled ? htons(vlan_id) : 0),
-		},
-		.mask = {
-			.dst_mac = {
-				mask[0], mask[1], mask[2],
-				mask[3], mask[4], mask[5],
-			},
-			.vlan_tag = (vlan_enabled ? htons(0xfff) : 0),
-		},
-	};
-
-	errno = 0;
-	flow = ibv_exp_create_flow(hash_rxq->qp, attr);
-	if (flow == NULL) {
-		/* It's not clear whether errno is always set in this case. */
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)hash_rxq, errno,
-		      (errno ? strerror(errno) : "Unknown error"));
-		if (errno)
-			return errno;
-		return EINVAL;
-	}
-	hash_rxq->special_flow[flow_type][vlan_index] = flow;
-	DEBUG("%p: special flow %s (index %d) VLAN %u (index %u) enabled",
-	      (void *)hash_rxq, hash_rxq_flow_type_str(flow_type), flow_type,
-	      vlan_id, vlan_index);
-	return 0;
-}
-
-/**
- * Disable a special flow in a hash RX queue for a given VLAN index.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- */
-static void
-hash_rxq_special_flow_disable_vlan(struct hash_rxq *hash_rxq,
-				   enum hash_rxq_flow_type flow_type,
-				   unsigned int vlan_index)
-{
-	struct ibv_exp_flow *flow =
-		hash_rxq->special_flow[flow_type][vlan_index];
-
-	if (flow == NULL)
-		return;
-	claim_zero(ibv_exp_destroy_flow(flow));
-	hash_rxq->special_flow[flow_type][vlan_index] = NULL;
-	DEBUG("%p: special flow %s (index %d) VLAN %u (index %u) disabled",
-	      (void *)hash_rxq, hash_rxq_flow_type_str(flow_type), flow_type,
-	      hash_rxq->priv->vlan_filter[vlan_index], vlan_index);
-}
-
-/**
- * Enable a special flow in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- * @param vlan_index
- *   VLAN index to use.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-hash_rxq_special_flow_enable(struct hash_rxq *hash_rxq,
-			     enum hash_rxq_flow_type flow_type)
-{
-	struct priv *priv = hash_rxq->priv;
-	unsigned int i = 0;
-	int ret;
-
-	assert((unsigned int)flow_type < RTE_DIM(hash_rxq->special_flow));
-	assert(RTE_DIM(hash_rxq->special_flow[flow_type]) ==
-	       RTE_DIM(priv->vlan_filter));
-	/* Add a special flow for each VLAN filter when relevant. */
-	do {
-		ret = hash_rxq_special_flow_enable_vlan(hash_rxq, flow_type, i);
-		if (ret) {
-			/* Failure, rollback. */
-			while (i != 0)
-				hash_rxq_special_flow_disable_vlan(hash_rxq,
-								   flow_type,
-								   --i);
-			return ret;
-		}
-	} while (special_flow_init[flow_type].per_vlan &&
-		 ++i < priv->vlan_filter_n);
-	return 0;
-}
-
-/**
- * Disable a special flow in a hash RX queue.
- *
- * @param hash_rxq
- *   Pointer to hash RX queue structure.
- * @param flow_type
- *   Special flow type.
- */
-static void
-hash_rxq_special_flow_disable(struct hash_rxq *hash_rxq,
-			      enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	assert((unsigned int)flow_type < RTE_DIM(hash_rxq->special_flow));
-	for (i = 0; (i != RTE_DIM(hash_rxq->special_flow[flow_type])); ++i)
-		hash_rxq_special_flow_disable_vlan(hash_rxq, flow_type, i);
-}
-
-/**
- * Enable a special flow in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- * @param flow_type
- *   Special flow type.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_special_flow_enable(struct priv *priv, enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	if (!priv_allow_flow_type(priv, flow_type))
-		return 0;
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-		int ret;
-
-		ret = hash_rxq_special_flow_enable(hash_rxq, flow_type);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (i != 0) {
-			hash_rxq = &(*priv->hash_rxqs)[--i];
-			hash_rxq_special_flow_disable(hash_rxq, flow_type);
-		}
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Disable a special flow in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- * @param flow_type
- *   Special flow type.
- */
-void
-priv_special_flow_disable(struct priv *priv, enum hash_rxq_flow_type flow_type)
-{
-	unsigned int i;
-
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-
-		hash_rxq_special_flow_disable(hash_rxq, flow_type);
-	}
-}
-
-/**
- * Enable all special flows in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- */
-int
-priv_special_flow_enable_all(struct priv *priv)
-{
-	enum hash_rxq_flow_type flow_type;
-
-	if (priv->isolated)
-		return 0;
-	for (flow_type = HASH_RXQ_FLOW_TYPE_PROMISC;
-			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
-			++flow_type) {
-		int ret;
-
-		ret = priv_special_flow_enable(priv, flow_type);
-		if (!ret)
-			continue;
-		/* Failure, rollback. */
-		while (flow_type)
-			priv_special_flow_disable(priv, --flow_type);
-		return ret;
-	}
-	return 0;
-}
-
-/**
- * Disable all special flows in all hash RX queues.
- *
- * @param priv
- *   Private structure.
- */
-void
-priv_special_flow_disable_all(struct priv *priv)
-{
-	enum hash_rxq_flow_type flow_type;
-
-	for (flow_type = HASH_RXQ_FLOW_TYPE_PROMISC;
-			flow_type != HASH_RXQ_FLOW_TYPE_MAC;
-			++flow_type)
-		priv_special_flow_disable(priv, flow_type);
-}
-
 /**
  * DPDK callback to enable promiscuous mode.
  *
@@ -391,19 +60,10 @@ priv_special_flow_disable_all(struct priv *priv)
 void
 mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->promisc_req = 1;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while enabling promiscuous mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->promiscuous = 1;
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -415,19 +75,10 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 void
 mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->promisc_req = 0;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while disabling promiscuous mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->promiscuous = 0;
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -439,19 +90,10 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->allmulti_req = 1;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while enabling allmulticast mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->all_multicast = 1;
+	mlx5_traffic_restart(dev);
 }
 
 /**
@@ -463,17 +105,8 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_disable(struct rte_eth_dev *dev)
 {
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
 	if (mlx5_is_secondary())
 		return;
-
-	priv_lock(priv);
-	priv->allmulti_req = 0;
-	ret = priv_rehash_flows(priv);
-	if (ret)
-		ERROR("error while disabling allmulticast mode: %s",
-		      strerror(ret));
-	priv_unlock(priv);
+	dev->data->all_multicast = 0;
+	mlx5_traffic_restart(dev);
 }
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 74387a79..6b29aaee 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -37,6 +37,7 @@
 #include <string.h>
 #include <stdint.h>
 #include <fcntl.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -44,25 +45,18 @@
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
-#include <infiniband/arch.h>
-#include <infiniband/mlx5_hw.h>
+#include <infiniband/mlx5dv.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_common.h>
 #include <rte_interrupts.h>
 #include <rte_debug.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
+#include <rte_io.h>
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
@@ -70,122 +64,6 @@
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 
-/* Initialization data for hash RX queues. */
-const struct hash_rxq_init hash_rxq_init[] = {
-	[HASH_RXQ_TCPV4] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
-				IBV_EXP_RX_HASH_DST_IPV4 |
-				IBV_EXP_RX_HASH_SRC_PORT_TCP |
-				IBV_EXP_RX_HASH_DST_PORT_TCP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_EXP_FLOW_SPEC_TCP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
-	},
-	[HASH_RXQ_UDPV4] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
-				IBV_EXP_RX_HASH_DST_IPV4 |
-				IBV_EXP_RX_HASH_SRC_PORT_UDP |
-				IBV_EXP_RX_HASH_DST_PORT_UDP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_EXP_FLOW_SPEC_UDP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
-	},
-	[HASH_RXQ_IPV4] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
-				IBV_EXP_RX_HASH_DST_IPV4),
-		.dpdk_rss_hf = (ETH_RSS_IPV4 |
-				ETH_RSS_FRAG_IPV4),
-		.flow_priority = 1,
-		.flow_spec.ipv4 = {
-			.type = IBV_EXP_FLOW_SPEC_IPV4,
-			.size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_ETH],
-	},
-	[HASH_RXQ_TCPV6] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
-				IBV_EXP_RX_HASH_DST_IPV6 |
-				IBV_EXP_RX_HASH_SRC_PORT_TCP |
-				IBV_EXP_RX_HASH_DST_PORT_TCP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_EXP_FLOW_SPEC_TCP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
-	},
-	[HASH_RXQ_UDPV6] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
-				IBV_EXP_RX_HASH_DST_IPV6 |
-				IBV_EXP_RX_HASH_SRC_PORT_UDP |
-				IBV_EXP_RX_HASH_DST_PORT_UDP),
-		.dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
-		.flow_priority = 0,
-		.flow_spec.tcp_udp = {
-			.type = IBV_EXP_FLOW_SPEC_UDP,
-			.size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
-	},
-	[HASH_RXQ_IPV6] = {
-		.hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
-				IBV_EXP_RX_HASH_DST_IPV6),
-		.dpdk_rss_hf = (ETH_RSS_IPV6 |
-				ETH_RSS_FRAG_IPV6),
-		.flow_priority = 1,
-		.flow_spec.ipv6 = {
-			.type = IBV_EXP_FLOW_SPEC_IPV6,
-			.size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
-		},
-		.underlayer = &hash_rxq_init[HASH_RXQ_ETH],
-	},
-	[HASH_RXQ_ETH] = {
-		.hash_fields = 0,
-		.dpdk_rss_hf = 0,
-		.flow_priority = 2,
-		.flow_spec.eth = {
-			.type = IBV_EXP_FLOW_SPEC_ETH,
-			.size = sizeof(hash_rxq_init[0].flow_spec.eth),
-		},
-		.underlayer = NULL,
-	},
-};
-
-/* Number of entries in hash_rxq_init[]. */
-const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
-
-/* Initialization data for hash RX queue indirection tables. */
-static const struct ind_table_init ind_table_init[] = {
-	{
-		.max_size = -1u, /* Superseded by HW limitations. */
-		.hash_types =
-			1 << HASH_RXQ_TCPV4 |
-			1 << HASH_RXQ_UDPV4 |
-			1 << HASH_RXQ_IPV4 |
-			1 << HASH_RXQ_TCPV6 |
-			1 << HASH_RXQ_UDPV6 |
-			1 << HASH_RXQ_IPV6 |
-			0,
-		.hash_types_n = 6,
-	},
-	{
-		.max_size = 1,
-		.hash_types = 1 << HASH_RXQ_ETH,
-		.hash_types_n = 1,
-	},
-};
-
-#define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
-
 /* Default RSS hash key also used for ConnectX-3. */
 uint8_t rss_hash_default_key[] = {
 	0x2c, 0xc6, 0x81, 0xd1,
@@ -204,495 +82,27 @@ uint8_t rss_hash_default_key[] = {
 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
 
 /**
- * Populate flow steering rule for a given hash RX queue type using
- * information from hash_rxq_init[]. Nothing is written to flow_attr when
- * flow_attr_size is not large enough, but the required size is still returned.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[out] flow_attr
- *   Pointer to flow attribute structure to fill. Note that the allocated
- *   area must be larger and large enough to hold all flow specifications.
- * @param flow_attr_size
- *   Entire size of flow_attr and trailing room for flow specifications.
- * @param type
- *   Hash RX queue type to use for flow steering rule.
- *
- * @return
- *   Total size of the flow attribute buffer. No errors are defined.
- */
-size_t
-priv_flow_attr(struct priv *priv, struct ibv_exp_flow_attr *flow_attr,
-	       size_t flow_attr_size, enum hash_rxq_type type)
-{
-	size_t offset = sizeof(*flow_attr);
-	const struct hash_rxq_init *init = &hash_rxq_init[type];
-
-	assert(priv != NULL);
-	assert((size_t)type < RTE_DIM(hash_rxq_init));
-	do {
-		offset += init->flow_spec.hdr.size;
-		init = init->underlayer;
-	} while (init != NULL);
-	if (offset > flow_attr_size)
-		return offset;
-	flow_attr_size = offset;
-	init = &hash_rxq_init[type];
-	*flow_attr = (struct ibv_exp_flow_attr){
-		.type = IBV_EXP_FLOW_ATTR_NORMAL,
-		/* Priorities < 3 are reserved for flow director. */
-		.priority = init->flow_priority + 3,
-		.num_of_specs = 0,
-		.port = priv->port,
-		.flags = 0,
-	};
-	do {
-		offset -= init->flow_spec.hdr.size;
-		memcpy((void *)((uintptr_t)flow_attr + offset),
-		       &init->flow_spec,
-		       init->flow_spec.hdr.size);
-		++flow_attr->num_of_specs;
-		init = init->underlayer;
-	} while (init != NULL);
-	return flow_attr_size;
-}
-
-/**
- * Convert hash type position in indirection table initializer to
- * hash RX queue type.
- *
- * @param table
- *   Indirection table initializer.
- * @param pos
- *   Hash type position.
- *
- * @return
- *   Hash RX queue type.
- */
-static enum hash_rxq_type
-hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
-{
-	enum hash_rxq_type type = HASH_RXQ_TCPV4;
-
-	assert(pos < table->hash_types_n);
-	do {
-		if ((table->hash_types & (1 << type)) && (pos-- == 0))
-			break;
-		++type;
-	} while (1);
-	return type;
-}
-
-/**
- * Filter out disabled hash RX queue types from ind_table_init[].
- *
- * @param priv
- *   Pointer to private structure.
- * @param[out] table
- *   Output table.
- *
- * @return
- *   Number of table entries.
- */
-static unsigned int
-priv_make_ind_table_init(struct priv *priv,
-			 struct ind_table_init (*table)[IND_TABLE_INIT_N])
-{
-	uint64_t rss_hf;
-	unsigned int i;
-	unsigned int j;
-	unsigned int table_n = 0;
-	/* Mandatory to receive frames not handled by normal hash RX queues. */
-	unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
-
-	rss_hf = priv->rss_hf;
-	/* Process other protocols only if more than one queue. */
-	if (priv->rxqs_n > 1)
-		for (i = 0; (i != hash_rxq_init_n); ++i)
-			if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
-				hash_types_sup |= (1 << i);
-
-	/* Filter out entries whose protocols are not in the set. */
-	for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
-		unsigned int nb;
-		unsigned int h;
-
-		/* j is increased only if the table has valid protocols. */
-		assert(j <= i);
-		(*table)[j] = ind_table_init[i];
-		(*table)[j].hash_types &= hash_types_sup;
-		for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
-			if (((*table)[j].hash_types >> h) & 0x1)
-				++nb;
-		(*table)[i].hash_types_n = nb;
-		if (nb) {
-			++table_n;
-			++j;
-		}
-	}
-	return table_n;
-}
-
-/**
- * Initialize hash RX queues and indirection table.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_create_hash_rxqs(struct priv *priv)
-{
-	struct ibv_exp_wq *wqs[priv->reta_idx_n];
-	struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
-	unsigned int ind_tables_n =
-		priv_make_ind_table_init(priv, &ind_table_init);
-	unsigned int hash_rxqs_n = 0;
-	struct hash_rxq (*hash_rxqs)[] = NULL;
-	struct ibv_exp_rwq_ind_table *(*ind_tables)[] = NULL;
-	unsigned int i;
-	unsigned int j;
-	unsigned int k;
-	int err = 0;
-
-	assert(priv->ind_tables == NULL);
-	assert(priv->ind_tables_n == 0);
-	assert(priv->hash_rxqs == NULL);
-	assert(priv->hash_rxqs_n == 0);
-	assert(priv->pd != NULL);
-	assert(priv->ctx != NULL);
-	if (priv->isolated)
-		return 0;
-	if (priv->rxqs_n == 0)
-		return EINVAL;
-	assert(priv->rxqs != NULL);
-	if (ind_tables_n == 0) {
-		ERROR("all hash RX queue types have been filtered out,"
-		      " indirection table cannot be created");
-		return EINVAL;
-	}
-	if (priv->rxqs_n & (priv->rxqs_n - 1)) {
-		INFO("%u RX queues are configured, consider rounding this"
-		     " number to the next power of two for better balancing",
-		     priv->rxqs_n);
-		DEBUG("indirection table extended to assume %u WQs",
-		      priv->reta_idx_n);
-	}
-	for (i = 0; (i != priv->reta_idx_n); ++i) {
-		struct rxq_ctrl *rxq_ctrl;
-
-		rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
-					struct rxq_ctrl, rxq);
-		wqs[i] = rxq_ctrl->wq;
-	}
-	/* Get number of hash RX queues to configure. */
-	for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
-		hash_rxqs_n += ind_table_init[i].hash_types_n;
-	DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
-	      hash_rxqs_n, priv->rxqs_n, ind_tables_n);
-	/* Create indirection tables. */
-	ind_tables = rte_calloc(__func__, ind_tables_n,
-				sizeof((*ind_tables)[0]), 0);
-	if (ind_tables == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate indirection tables container: %s",
-		      strerror(err));
-		goto error;
-	}
-	for (i = 0; (i != ind_tables_n); ++i) {
-		struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = {
-			.pd = priv->pd,
-			.log_ind_tbl_size = 0, /* Set below. */
-			.ind_tbl = wqs,
-			.comp_mask = 0,
-		};
-		unsigned int ind_tbl_size = ind_table_init[i].max_size;
-		struct ibv_exp_rwq_ind_table *ind_table;
-
-		if (priv->reta_idx_n < ind_tbl_size)
-			ind_tbl_size = priv->reta_idx_n;
-		ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
-		errno = 0;
-		ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
-							 &ind_init_attr);
-		if (ind_table != NULL) {
-			(*ind_tables)[i] = ind_table;
-			continue;
-		}
-		/* Not clear whether errno is set. */
-		err = (errno ? errno : EINVAL);
-		ERROR("RX indirection table creation failed with error %d: %s",
-		      err, strerror(err));
-		goto error;
-	}
-	/* Allocate array that holds hash RX queues and related data. */
-	hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
-			       sizeof((*hash_rxqs)[0]), 0);
-	if (hash_rxqs == NULL) {
-		err = ENOMEM;
-		ERROR("cannot allocate hash RX queues container: %s",
-		      strerror(err));
-		goto error;
-	}
-	for (i = 0, j = 0, k = 0;
-	     ((i != hash_rxqs_n) && (j != ind_tables_n));
-	     ++i) {
-		struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
-		enum hash_rxq_type type =
-			hash_rxq_type_from_pos(&ind_table_init[j], k);
-		struct rte_eth_rss_conf *priv_rss_conf =
-			(*priv->rss_conf)[type];
-		struct ibv_exp_rx_hash_conf hash_conf = {
-			.rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
-			.rx_hash_key_len = (priv_rss_conf ?
-					    priv_rss_conf->rss_key_len :
-					    rss_hash_default_key_len),
-			.rx_hash_key = (priv_rss_conf ?
-					priv_rss_conf->rss_key :
-					rss_hash_default_key),
-			.rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
-			.rwq_ind_tbl = (*ind_tables)[j],
-		};
-		struct ibv_exp_qp_init_attr qp_init_attr = {
-			.max_inl_recv = 0, /* Currently not supported. */
-			.qp_type = IBV_QPT_RAW_PACKET,
-			.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
-				      IBV_EXP_QP_INIT_ATTR_RX_HASH),
-			.pd = priv->pd,
-			.rx_hash_conf = &hash_conf,
-			.port_num = priv->port,
-		};
-
-		DEBUG("using indirection table %u for hash RX queue %u type %d",
-		      j, i, type);
-		*hash_rxq = (struct hash_rxq){
-			.priv = priv,
-			.qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr),
-			.type = type,
-		};
-		if (hash_rxq->qp == NULL) {
-			err = (errno ? errno : EINVAL);
-			ERROR("Hash RX QP creation failure: %s",
-			      strerror(err));
-			goto error;
-		}
-		if (++k < ind_table_init[j].hash_types_n)
-			continue;
-		/* Switch to the next indirection table and reset hash RX
-		 * queue type array index. */
-		++j;
-		k = 0;
-	}
-	priv->ind_tables = ind_tables;
-	priv->ind_tables_n = ind_tables_n;
-	priv->hash_rxqs = hash_rxqs;
-	priv->hash_rxqs_n = hash_rxqs_n;
-	assert(err == 0);
-	return 0;
-error:
-	if (hash_rxqs != NULL) {
-		for (i = 0; (i != hash_rxqs_n); ++i) {
-			struct ibv_qp *qp = (*hash_rxqs)[i].qp;
-
-			if (qp == NULL)
-				continue;
-			claim_zero(ibv_destroy_qp(qp));
-		}
-		rte_free(hash_rxqs);
-	}
-	if (ind_tables != NULL) {
-		for (j = 0; (j != ind_tables_n); ++j) {
-			struct ibv_exp_rwq_ind_table *ind_table =
-				(*ind_tables)[j];
-
-			if (ind_table == NULL)
-				continue;
-			claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
-		}
-		rte_free(ind_tables);
-	}
-	return err;
-}
-
-/**
- * Clean up hash RX queues and indirection table.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void
-priv_destroy_hash_rxqs(struct priv *priv)
-{
-	unsigned int i;
-
-	DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
-	if (priv->hash_rxqs_n == 0) {
-		assert(priv->hash_rxqs == NULL);
-		assert(priv->ind_tables == NULL);
-		return;
-	}
-	for (i = 0; (i != priv->hash_rxqs_n); ++i) {
-		struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-		unsigned int j, k;
-
-		assert(hash_rxq->priv == priv);
-		assert(hash_rxq->qp != NULL);
-		/* Also check that there are no remaining flows. */
-		for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j)
-			for (k = 0;
-			     (k != RTE_DIM(hash_rxq->special_flow[j]));
-			     ++k)
-				assert(hash_rxq->special_flow[j][k] == NULL);
-		for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
-			for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
-				assert(hash_rxq->mac_flow[j][k] == NULL);
-		claim_zero(ibv_destroy_qp(hash_rxq->qp));
-	}
-	priv->hash_rxqs_n = 0;
-	rte_free(priv->hash_rxqs);
-	priv->hash_rxqs = NULL;
-	for (i = 0; (i != priv->ind_tables_n); ++i) {
-		struct ibv_exp_rwq_ind_table *ind_table =
-			(*priv->ind_tables)[i];
-
-		assert(ind_table != NULL);
-		claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
-	}
-	priv->ind_tables_n = 0;
-	rte_free(priv->ind_tables);
-	priv->ind_tables = NULL;
-}
-
-/**
- * Check whether a given flow type is allowed.
- *
- * @param priv
- *   Pointer to private structure.
- * @param type
- *   Flow type to check.
- *
- * @return
- *   Nonzero if the given flow type is allowed.
- */
-int
-priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
-{
-	/* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode
-	 * has been requested. */
-	if (priv->promisc_req)
-		return type == HASH_RXQ_FLOW_TYPE_PROMISC;
-	switch (type) {
-	case HASH_RXQ_FLOW_TYPE_PROMISC:
-		return !!priv->promisc_req;
-	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
-		return !!priv->allmulti_req;
-	case HASH_RXQ_FLOW_TYPE_BROADCAST:
-	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-		/* If allmulti is enabled, broadcast and ipv6multi
-		 * are unnecessary. */
-		return !priv->allmulti_req;
-	case HASH_RXQ_FLOW_TYPE_MAC:
-		return 1;
-	default:
-		/* Unsupported flow type is not allowed. */
-		return 0;
-	}
-	return 0;
-}
-
-/**
- * Automatically enable/disable flows according to configuration.
- *
- * @param priv
- *   Private structure.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-int
-priv_rehash_flows(struct priv *priv)
-{
-	enum hash_rxq_flow_type i;
-
-	for (i = HASH_RXQ_FLOW_TYPE_PROMISC;
-			i != RTE_DIM((*priv->hash_rxqs)[0].special_flow);
-			++i)
-		if (!priv_allow_flow_type(priv, i)) {
-			priv_special_flow_disable(priv, i);
-		} else {
-			int ret = priv_special_flow_enable(priv, i);
-
-			if (ret)
-				return ret;
-		}
-	if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
-		return priv_mac_addrs_enable(priv);
-	priv_mac_addrs_disable(priv);
-	return 0;
-}
-
-/**
- * Unlike regular Rx function, vPMD Rx doesn't replace mbufs immediately when
- * receiving packets. Instead it replaces later in bulk. In rxq->elts[], entries
- * from rq_pi to rq_ci are owned by device but the rest is already delivered to
- * application. In order not to reuse those mbufs by rxq_alloc_elts(), this
- * function must be called to replace used mbufs.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- */
-static void
-rxq_trim_elts(struct rxq *rxq)
-{
-	const uint16_t q_n = (1 << rxq->elts_n);
-	const uint16_t q_mask = q_n - 1;
-	uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
-	uint16_t i;
-
-	if (!rxq->trim_elts)
-		return;
-	for (i = 0; i < used; ++i)
-		(*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
-	rxq->trim_elts = 0;
-	return;
-}
-
-/**
  * Allocate RX queue elements.
  *
  * @param rxq_ctrl
  *   Pointer to RX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
- * @param[in] pool
- *   If not NULL, fetch buffers from this array instead of allocating them
- *   with rte_pktmbuf_alloc().
  *
  * @return
  *   0 on success, errno value on failure.
  */
-static int
-rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
-	       struct rte_mbuf *(*pool)[])
+int
+rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
+	unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
 	unsigned int i;
 	int ret = 0;
 
 	/* Iterate on segments. */
 	for (i = 0; (i != elts_n); ++i) {
 		struct rte_mbuf *buf;
-		volatile struct mlx5_wqe_data_seg *scat =
-			&(*rxq_ctrl->rxq.wqes)[i];
-
-		buf = (pool != NULL) ? (*pool)[i] : NULL;
-		if (buf != NULL) {
-			rte_pktmbuf_reset(buf);
-			rte_pktmbuf_refcnt_update(buf, 1);
-		} else
-			buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
+
+		buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
 		if (buf == NULL) {
 			ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
 			ret = ENOMEM;
@@ -711,21 +121,35 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
 		DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
 		PKT_LEN(buf) = DATA_LEN(buf);
 		NB_SEGS(buf) = 1;
-		/* scat->addr must be able to store a pointer. */
-		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
-		*scat = (struct mlx5_wqe_data_seg){
-			.addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
-			.byte_count = htonl(DATA_LEN(buf)),
-			.lkey = htonl(rxq_ctrl->mr->lkey),
-		};
 		(*rxq_ctrl->rxq.elts)[i] = buf;
 	}
+	/* If Rx vector is activated. */
+	if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
+		struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
+		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
+		int j;
+
+		/* Initialize default rearm_data for vPMD. */
+		mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_mbuf_refcnt_set(mbuf_init, 1);
+		mbuf_init->nb_segs = 1;
+		mbuf_init->port = rxq->port_id;
+		/*
+		 * prevent compiler reordering:
+		 * rearm_data covers previous fields.
+		 */
+		rte_compiler_barrier();
+		rxq->mbuf_initializer =
+			*(uint64_t *)&mbuf_init->rearm_data;
+		/* Padding with a fake mbuf for vectorized Rx. */
+		for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
+			(*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
+	}
 	DEBUG("%p: allocated and configured %u segments (max %u packets)",
 	      (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
 	assert(ret == 0);
 	return 0;
 error:
-	assert(pool == NULL);
 	elts_n = i;
 	for (i = 0; (i != elts_n); ++i) {
 		if ((*rxq_ctrl->rxq.elts)[i] != NULL)
@@ -744,19 +168,30 @@ error:
  *   Pointer to RX queue structure.
  */
 static void
-rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
+rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
-	unsigned int i;
+	struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
+	const uint16_t q_n = (1 << rxq->elts_n);
+	const uint16_t q_mask = q_n - 1;
+	uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
+	uint16_t i;
 
-	rxq_trim_elts(&rxq_ctrl->rxq);
 	DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
-	if (rxq_ctrl->rxq.elts == NULL)
+	if (rxq->elts == NULL)
 		return;
-
-	for (i = 0; (i != (1u << rxq_ctrl->rxq.elts_n)); ++i) {
-		if ((*rxq_ctrl->rxq.elts)[i] != NULL)
-			rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
-		(*rxq_ctrl->rxq.elts)[i] = NULL;
+	/**
+	 * Some mbuf in the Ring belongs to the application.  They cannot be
+	 * freed.
+	 */
+	if (rxq_check_vec_support(rxq) > 0) {
+		for (i = 0; i < used; ++i)
+			(*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
+		rxq->rq_pi = rxq->rq_ci;
+	}
+	for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
+		if ((*rxq->elts)[i] != NULL)
+			rte_pktmbuf_free_seg((*rxq->elts)[i]);
+		(*rxq->elts)[i] = NULL;
 	}
 }
 
@@ -769,343 +204,15 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
  *   Pointer to RX queue structure.
  */
 void
-rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
+mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
 	DEBUG("cleaning up %p", (void *)rxq_ctrl);
-	rxq_free_elts(rxq_ctrl);
-	if (rxq_ctrl->fdir_queue != NULL)
-		priv_fdir_queue_destroy(rxq_ctrl->priv, rxq_ctrl->fdir_queue);
-	if (rxq_ctrl->wq != NULL)
-		claim_zero(ibv_exp_destroy_wq(rxq_ctrl->wq));
-	if (rxq_ctrl->cq != NULL)
-		claim_zero(ibv_destroy_cq(rxq_ctrl->cq));
-	if (rxq_ctrl->channel != NULL)
-		claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel));
-	if (rxq_ctrl->mr != NULL)
-		claim_zero(ibv_dereg_mr(rxq_ctrl->mr));
+	if (rxq_ctrl->ibv)
+		mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
 	memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
 }
 
 /**
- * Initialize RX queue.
- *
- * @param tmpl
- *   Pointer to RX queue control template.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static inline int
-rxq_setup(struct rxq_ctrl *tmpl)
-{
-	struct ibv_cq *ibcq = tmpl->cq;
-	struct ibv_mlx5_cq_info cq_info;
-	struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
-	struct rte_mbuf *(*elts)[1 << tmpl->rxq.elts_n] =
-		rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);
-
-	if (ibv_mlx5_exp_get_cq_info(ibcq, &cq_info)) {
-		ERROR("Unable to query CQ info. check your OFED.");
-		return ENOTSUP;
-	}
-	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
-		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
-		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
-		return EINVAL;
-	}
-	if (elts == NULL)
-		return ENOMEM;
-	tmpl->rxq.rq_db = rwq->rq.db;
-	tmpl->rxq.cqe_n = log2above(cq_info.cqe_cnt);
-	tmpl->rxq.cq_ci = 0;
-	tmpl->rxq.rq_ci = 0;
-	tmpl->rxq.rq_pi = 0;
-	tmpl->rxq.cq_db = cq_info.dbrec;
-	tmpl->rxq.wqes =
-		(volatile struct mlx5_wqe_data_seg (*)[])
-		(uintptr_t)rwq->rq.buff;
-	tmpl->rxq.cqes =
-		(volatile struct mlx5_cqe (*)[])
-		(uintptr_t)cq_info.buf;
-	tmpl->rxq.elts = elts;
-	return 0;
-}
-
-/**
- * Configure a RX queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param rxq_ctrl
- *   Pointer to RX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
- * @param mp
- *   Memory pool for buffer allocations.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
-	       uint16_t desc, unsigned int socket,
-	       const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
-{
-	struct priv *priv = dev->data->dev_private;
-	struct rxq_ctrl tmpl = {
-		.priv = priv,
-		.socket = socket,
-		.rxq = {
-			.elts_n = log2above(desc),
-			.mp = mp,
-			.rss_hash = priv->rxqs_n > 1,
-		},
-	};
-	struct ibv_exp_wq_attr mod;
-	union {
-		struct ibv_exp_cq_init_attr cq;
-		struct ibv_exp_wq_init_attr wq;
-		struct ibv_exp_cq_attr cq_attr;
-	} attr;
-	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-	unsigned int cqe_n = desc - 1;
-	struct rte_mbuf *(*elts)[desc] = NULL;
-	int ret = 0;
-
-	(void)conf; /* Thresholds configuration (ignored). */
-	/* Enable scattered packets support for this queue if necessary. */
-	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
-	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
-	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
-		tmpl.rxq.sges_n = 0;
-	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
-		unsigned int size =
-			RTE_PKTMBUF_HEADROOM +
-			dev->data->dev_conf.rxmode.max_rx_pkt_len;
-		unsigned int sges_n;
-
-		/*
-		 * Determine the number of SGEs needed for a full packet
-		 * and round it to the next power of two.
-		 */
-		sges_n = log2above((size / mb_len) + !!(size % mb_len));
-		tmpl.rxq.sges_n = sges_n;
-		/* Make sure rxq.sges_n did not overflow. */
-		size = mb_len * (1 << tmpl.rxq.sges_n);
-		size -= RTE_PKTMBUF_HEADROOM;
-		if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
-			ERROR("%p: too many SGEs (%u) needed to handle"
-			      " requested maximum packet size %u",
-			      (void *)dev,
-			      1 << sges_n,
-			      dev->data->dev_conf.rxmode.max_rx_pkt_len);
-			return EOVERFLOW;
-		}
-	} else {
-		WARN("%p: the requested maximum Rx packet size (%u) is"
-		     " larger than a single mbuf (%u) and scattered"
-		     " mode has not been requested",
-		     (void *)dev,
-		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
-		     mb_len - RTE_PKTMBUF_HEADROOM);
-	}
-	DEBUG("%p: maximum number of segments per packet: %u",
-	      (void *)dev, 1 << tmpl.rxq.sges_n);
-	if (desc % (1 << tmpl.rxq.sges_n)) {
-		ERROR("%p: number of RX queue descriptors (%u) is not a"
-		      " multiple of SGEs per packet (%u)",
-		      (void *)dev,
-		      desc,
-		      1 << tmpl.rxq.sges_n);
-		return EINVAL;
-	}
-	/* Toggle RX checksum offload if hardware supports it. */
-	if (priv->hw_csum)
-		tmpl.rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	if (priv->hw_csum_l2tun)
-		tmpl.rxq.csum_l2tun =
-			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
-	/* Use the entire RX mempool as the memory region. */
-	tmpl.mr = mlx5_mp2mr(priv->pd, mp);
-	if (tmpl.mr == NULL) {
-		ret = EINVAL;
-		ERROR("%p: MR creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	if (dev->data->dev_conf.intr_conf.rxq) {
-		tmpl.channel = ibv_create_comp_channel(priv->ctx);
-		if (tmpl.channel == NULL) {
-			ret = ENOMEM;
-			ERROR("%p: Rx interrupt completion channel creation"
-			      " failure: %s",
-			      (void *)dev, strerror(ret));
-			goto error;
-		}
-	}
-	attr.cq = (struct ibv_exp_cq_init_attr){
-		.comp_mask = 0,
-	};
-	if (priv->cqe_comp) {
-		attr.cq.comp_mask |= IBV_EXP_CQ_INIT_ATTR_FLAGS;
-		attr.cq.flags |= IBV_EXP_CQ_COMPRESSED_CQE;
-		/*
-		 * For vectorized Rx, it must not be doubled in order to
-		 * make cq_ci and rq_ci aligned.
-		 */
-		if (rxq_check_vec_support(&tmpl.rxq) < 0)
-			cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
-	}
-	tmpl.cq = ibv_exp_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0,
-				    &attr.cq);
-	if (tmpl.cq == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: CQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	DEBUG("priv->device_attr.max_qp_wr is %d",
-	      priv->device_attr.max_qp_wr);
-	DEBUG("priv->device_attr.max_sge is %d",
-	      priv->device_attr.max_sge);
-	/* Configure VLAN stripping. */
-	tmpl.rxq.vlan_strip = (priv->hw_vlan_strip &&
-			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
-	attr.wq = (struct ibv_exp_wq_init_attr){
-		.wq_context = NULL, /* Could be useful in the future. */
-		.wq_type = IBV_EXP_WQT_RQ,
-		/* Max number of outstanding WRs. */
-		.max_recv_wr = desc >> tmpl.rxq.sges_n,
-		/* Max number of scatter/gather elements in a WR. */
-		.max_recv_sge = 1 << tmpl.rxq.sges_n,
-		.pd = priv->pd,
-		.cq = tmpl.cq,
-		.comp_mask =
-			IBV_EXP_CREATE_WQ_VLAN_OFFLOADS |
-			0,
-		.vlan_offloads = (tmpl.rxq.vlan_strip ?
-				  IBV_EXP_RECEIVE_WQ_CVLAN_STRIP :
-				  0),
-	};
-	/* By default, FCS (CRC) is stripped by hardware. */
-	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
-		tmpl.rxq.crc_present = 0;
-	} else if (priv->hw_fcs_strip) {
-		/* Ask HW/Verbs to leave CRC in place when supported. */
-		attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS;
-		attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS;
-		tmpl.rxq.crc_present = 1;
-	} else {
-		WARN("%p: CRC stripping has been disabled but will still"
-		     " be performed by hardware, make sure MLNX_OFED and"
-		     " firmware are up to date",
-		     (void *)dev);
-		tmpl.rxq.crc_present = 0;
-	}
-	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
-	      " incoming frames to hide it",
-	      (void *)dev,
-	      tmpl.rxq.crc_present ? "disabled" : "enabled",
-	      tmpl.rxq.crc_present << 2);
-	if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING"))
-		; /* Nothing else to do. */
-	else if (priv->hw_padding) {
-		INFO("%p: enabling packet padding on queue %p",
-		     (void *)dev, (void *)rxq_ctrl);
-		attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING;
-		attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS;
-	} else
-		WARN("%p: packet padding has been requested but is not"
-		     " supported, make sure MLNX_OFED and firmware are"
-		     " up to date",
-		     (void *)dev);
-
-	tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
-	if (tmpl.wq == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: WQ creation failure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	/*
-	 * Make sure number of WRs*SGEs match expectations since a queue
-	 * cannot allocate more than "desc" buffers.
-	 */
-	if (((int)attr.wq.max_recv_wr != (desc >> tmpl.rxq.sges_n)) ||
-	    ((int)attr.wq.max_recv_sge != (1 << tmpl.rxq.sges_n))) {
-		ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
-		      (void *)dev,
-		      (desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n),
-		      attr.wq.max_recv_wr, attr.wq.max_recv_sge);
-		ret = EINVAL;
-		goto error;
-	}
-	/* Save port ID. */
-	tmpl.rxq.port_id = dev->data->port_id;
-	DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
-	/* Change queue state to ready. */
-	mod = (struct ibv_exp_wq_attr){
-		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
-		.wq_state = IBV_EXP_WQS_RDY,
-	};
-	ret = ibv_exp_modify_wq(tmpl.wq, &mod);
-	if (ret) {
-		ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	ret = rxq_setup(&tmpl);
-	if (ret) {
-		ERROR("%p: cannot initialize RX queue structure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	/* Reuse buffers from original queue if possible. */
-	if (rxq_ctrl->rxq.elts_n) {
-		assert(1 << rxq_ctrl->rxq.elts_n == desc);
-		assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts);
-		rxq_trim_elts(&rxq_ctrl->rxq);
-		ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts);
-	} else
-		ret = rxq_alloc_elts(&tmpl, desc, NULL);
-	if (ret) {
-		ERROR("%p: RXQ allocation failed: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	/* Clean up rxq in case we're reinitializing it. */
-	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
-	rxq_cleanup(rxq_ctrl);
-	/* Move mbuf pointers to dedicated storage area in RX queue. */
-	elts = (void *)(rxq_ctrl + 1);
-	rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts));
-#ifndef NDEBUG
-	memset(tmpl.rxq.elts, 0x55, sizeof(*elts));
-#endif
-	rte_free(tmpl.rxq.elts);
-	tmpl.rxq.elts = elts;
-	*rxq_ctrl = tmpl;
-	/* Update doorbell counter. */
-	rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n;
-	rte_wmb();
-	*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
-	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
-	assert(ret == 0);
-	return 0;
-error:
-	elts = tmpl.rxq.elts;
-	rxq_cleanup(&tmpl);
-	rte_free(elts);
-	assert(ret > 0);
-	return ret;
-}
-
-/**
- * DPDK callback to configure a RX queue.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -1129,14 +236,14 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		    struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
-	const uint16_t desc_pad = MLX5_VPMD_DESCS_PER_LOOP; /* For vPMD. */
-	int ret;
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+	int ret = 0;
 
+	(void)conf;
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
-
 	priv_lock(priv);
 	if (!rte_is_power_of_2(desc)) {
 		desc = 1 << log2above(desc);
@@ -1152,50 +259,24 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		priv_unlock(priv);
 		return -EOVERFLOW;
 	}
-	if (rxq != NULL) {
-		DEBUG("%p: reusing already allocated queue index %u (%p)",
-		      (void *)dev, idx, (void *)rxq);
-		if (priv->started) {
-			priv_unlock(priv);
-			return -EEXIST;
-		}
-		(*priv->rxqs)[idx] = NULL;
-		rxq_cleanup(rxq_ctrl);
-		/* Resize if rxq size is changed. */
-		if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
-			rxq_ctrl = rte_realloc(rxq_ctrl,
-					       sizeof(*rxq_ctrl) +
-					       (desc + desc_pad) *
-						sizeof(struct rte_mbuf *),
-					       RTE_CACHE_LINE_SIZE);
-			if (!rxq_ctrl) {
-				ERROR("%p: unable to reallocate queue index %u",
-					(void *)dev, idx);
-				priv_unlock(priv);
-				return -ENOMEM;
-			}
-		}
-	} else {
-		rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) +
-					     (desc + desc_pad) *
-					      sizeof(struct rte_mbuf *),
-					     0, socket);
-		if (rxq_ctrl == NULL) {
-			ERROR("%p: unable to allocate queue index %u",
-			      (void *)dev, idx);
-			priv_unlock(priv);
-			return -ENOMEM;
-		}
+	if (!mlx5_priv_rxq_releasable(priv, idx)) {
+		ret = EBUSY;
+		ERROR("%p: unable to release queue index %u",
+		      (void *)dev, idx);
+		goto out;
 	}
-	ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
-	if (ret)
-		rte_free(rxq_ctrl);
-	else {
-		rxq_ctrl->rxq.stats.idx = idx;
-		DEBUG("%p: adding RX queue %p to list",
-		      (void *)dev, (void *)rxq_ctrl);
-		(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+	mlx5_priv_rxq_release(priv, idx);
+	rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
+	if (!rxq_ctrl) {
+		ERROR("%p: unable to allocate queue index %u",
+		      (void *)dev, idx);
+		ret = ENOMEM;
+		goto out;
 	}
+	DEBUG("%p: adding RX queue %p to list",
+	      (void *)dev, (void *)rxq_ctrl);
+	(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+out:
 	priv_unlock(priv);
 	return -ret;
 }
@@ -1209,76 +290,26 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 void
 mlx5_rx_queue_release(void *dpdk_rxq)
 {
-	struct rxq *rxq = (struct rxq *)dpdk_rxq;
-	struct rxq_ctrl *rxq_ctrl;
+	struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
+	struct mlx5_rxq_ctrl *rxq_ctrl;
 	struct priv *priv;
-	unsigned int i;
 
 	if (mlx5_is_secondary())
 		return;
 
 	if (rxq == NULL)
 		return;
-	rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
 	priv = rxq_ctrl->priv;
 	priv_lock(priv);
-	if (priv_flow_rxq_in_use(priv, rxq))
+	if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
 		rte_panic("Rx queue %p is still used by a flow and cannot be"
 			  " removed\n", (void *)rxq_ctrl);
-	for (i = 0; (i != priv->rxqs_n); ++i)
-		if ((*priv->rxqs)[i] == rxq) {
-			DEBUG("%p: removing RX queue %p from list",
-			      (void *)priv->dev, (void *)rxq_ctrl);
-			(*priv->rxqs)[i] = NULL;
-			break;
-		}
-	rxq_cleanup(rxq_ctrl);
-	rte_free(rxq_ctrl);
+	mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
 	priv_unlock(priv);
 }
 
 /**
- * DPDK callback for RX in secondary processes.
- *
- * This function configures all queues from primary process information
- * if necessary before reverting to the normal RX burst callback.
- *
- * @param dpdk_rxq
- *   Generic pointer to RX queue structure.
- * @param[out] pkts
- *   Array to store received packets.
- * @param pkts_n
- *   Maximum number of packets in array.
- *
- * @return
- *   Number of packets successfully received (<= pkts_n).
- */
-uint16_t
-mlx5_rx_burst_secondary_setup(void *dpdk_rxq, struct rte_mbuf **pkts,
-			      uint16_t pkts_n)
-{
-	struct rxq *rxq = dpdk_rxq;
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
-	struct priv *priv = mlx5_secondary_data_setup(rxq_ctrl->priv);
-	struct priv *primary_priv;
-	unsigned int index;
-
-	if (priv == NULL)
-		return 0;
-	primary_priv =
-		mlx5_secondary_data[priv->dev->data->port_id].primary_priv;
-	/* Look for queue index in both private structures. */
-	for (index = 0; index != priv->rxqs_n; ++index)
-		if (((*primary_priv->rxqs)[index] == rxq) ||
-		    ((*priv->rxqs)[index] == rxq))
-			break;
-	if (index == priv->rxqs_n)
-		return 0;
-	rxq = (*priv->rxqs)[index];
-	return priv->dev->rx_pkt_burst(rxq, pkts, pkts_n);
-}
-
-/**
  * Allocate queue vector and fill epoll fd list for Rx interrupts.
  *
  * @param priv
@@ -1296,6 +327,7 @@ priv_rx_intr_vec_enable(struct priv *priv)
 	unsigned int count = 0;
 	struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
 
+	assert(!mlx5_is_secondary());
 	if (!priv->dev->data->dev_conf.intr_conf.rxq)
 		return 0;
 	priv_rx_intr_vec_disable(priv);
@@ -1307,15 +339,14 @@ priv_rx_intr_vec_enable(struct priv *priv)
 	}
 	intr_handle->type = RTE_INTR_HANDLE_EXT;
 	for (i = 0; i != n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
-		struct rxq_ctrl *rxq_ctrl =
-			container_of(rxq, struct rxq_ctrl, rxq);
+		/* This rxq ibv must not be released in this function. */
+		struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
 		int fd;
 		int flags;
 		int rc;
 
 		/* Skip queues that cannot request interrupts. */
-		if (!rxq || !rxq_ctrl->channel) {
+		if (!rxq_ibv || !rxq_ibv->channel) {
 			/* Use invalid intr_vec[] index to disable entry. */
 			intr_handle->intr_vec[i] =
 				RTE_INTR_VEC_RXTX_OFFSET +
@@ -1329,7 +360,7 @@ priv_rx_intr_vec_enable(struct priv *priv)
 			priv_rx_intr_vec_disable(priv);
 			return -1;
 		}
-		fd = rxq_ctrl->channel->fd;
+		fd = rxq_ibv->channel->fd;
 		flags = fcntl(fd, F_GETFL);
 		rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
 		if (rc < 0) {
@@ -1359,14 +390,61 @@ void
 priv_rx_intr_vec_disable(struct priv *priv)
 {
 	struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+	unsigned int i;
+	unsigned int rxqs_n = priv->rxqs_n;
+	unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
 
+	if (!priv->dev->data->dev_conf.intr_conf.rxq)
+		return;
+	if (!intr_handle->intr_vec)
+		goto free;
+	for (i = 0; i != n; ++i) {
+		struct mlx5_rxq_ctrl *rxq_ctrl;
+		struct mlx5_rxq_data *rxq_data;
+
+		if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
+		    RTE_MAX_RXTX_INTR_VEC_ID)
+			continue;
+		/**
+		 * Need to access directly the queue to release the reference
+		 * kept in priv_rx_intr_vec_enable().
+		 */
+		rxq_data = (*priv->rxqs)[i];
+		rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
+	}
+free:
 	rte_intr_free_epoll_fd(intr_handle);
-	free(intr_handle->intr_vec);
+	if (intr_handle->intr_vec)
+		free(intr_handle->intr_vec);
 	intr_handle->nb_efd = 0;
 	intr_handle->intr_vec = NULL;
 }
 
-#ifdef HAVE_UPDATE_CQ_CI
+/**
+ *  MLX5 CQ notification .
+ *
+ *  @param rxq
+ *     Pointer to receive queue structure.
+ *  @param sq_n_rxq
+ *     Sequence number per receive queue .
+ */
+static inline void
+mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
+{
+	int sq_n = 0;
+	uint32_t doorbell_hi;
+	uint64_t doorbell;
+	void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
+
+	sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
+	doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
+	doorbell = (uint64_t)doorbell_hi << 32;
+	doorbell |=  rxq->cqn;
+	rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
+	rte_wmb();
+	rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
+}
 
 /**
  * DPDK callback for Rx queue interrupt enable.
@@ -1383,16 +461,30 @@ int
 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct rxq *rxq = (*priv->rxqs)[rx_queue_id];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
-	int ret;
+	struct mlx5_rxq_data *rxq_data;
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+	int ret = 0;
 
-	if (!rxq || !rxq_ctrl->channel) {
+	priv_lock(priv);
+	rxq_data = (*priv->rxqs)[rx_queue_id];
+	if (!rxq_data) {
 		ret = EINVAL;
-	} else {
-		ibv_mlx5_exp_update_cq_ci(rxq_ctrl->cq, rxq->cq_ci);
-		ret = ibv_req_notify_cq(rxq_ctrl->cq, 0);
+		goto exit;
+	}
+	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	if (rxq_ctrl->irq) {
+		struct mlx5_rxq_ibv *rxq_ibv;
+
+		rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
+		if (!rxq_ibv) {
+			ret = EINVAL;
+			goto exit;
+		}
+		mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
+		mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
 	}
+exit:
+	priv_unlock(priv);
 	if (ret)
 		WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
 	return -ret;
@@ -1413,25 +505,920 @@ int
 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct priv *priv = mlx5_get_priv(dev);
-	struct rxq *rxq = (*priv->rxqs)[rx_queue_id];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+	struct mlx5_rxq_data *rxq_data;
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+	struct mlx5_rxq_ibv *rxq_ibv = NULL;
 	struct ibv_cq *ev_cq;
 	void *ev_ctx;
-	int ret;
+	int ret = 0;
 
-	if (!rxq || !rxq_ctrl->channel) {
+	priv_lock(priv);
+	rxq_data = (*priv->rxqs)[rx_queue_id];
+	if (!rxq_data) {
 		ret = EINVAL;
-	} else {
-		ret = ibv_get_cq_event(rxq_ctrl->cq->channel, &ev_cq, &ev_ctx);
-		if (ret || ev_cq != rxq_ctrl->cq)
-			ret = EINVAL;
+		goto exit;
+	}
+	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	if (!rxq_ctrl->irq)
+		goto exit;
+	rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
+	if (!rxq_ibv) {
+		ret = EINVAL;
+		goto exit;
+	}
+	ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
+	if (ret || ev_cq != rxq_ibv->cq) {
+		ret = EINVAL;
+		goto exit;
 	}
+	rxq_data->cq_arm_sn++;
+	ibv_ack_cq_events(rxq_ibv->cq, 1);
+exit:
+	if (rxq_ibv)
+		mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
+	priv_unlock(priv);
 	if (ret)
 		WARN("unable to disable interrupt on rx queue %d",
 		     rx_queue_id);
-	else
-		ibv_ack_cq_events(rxq_ctrl->cq, 1);
 	return -ret;
 }
 
-#endif /* HAVE_UPDATE_CQ_CI */
+/**
+ * Create the Rx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object initialised if it can be created.
+ */
+struct mlx5_rxq_ibv*
+mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	struct ibv_wq_attr mod;
+	union {
+		struct {
+			struct ibv_cq_init_attr_ex ibv;
+			struct mlx5dv_cq_init_attr mlx5;
+		} cq;
+		struct ibv_wq_init_attr wq;
+		struct ibv_cq_ex cq_attr;
+	} attr;
+	unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
+	struct mlx5_rxq_ibv *tmpl;
+	struct mlx5dv_cq cq_info;
+	struct mlx5dv_rwq rwq;
+	unsigned int i;
+	int ret = 0;
+	struct mlx5dv_obj obj;
+
+	assert(rxq_data);
+	assert(!rxq_ctrl->ibv);
+	tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
+				 rxq_ctrl->socket);
+	if (!tmpl) {
+		ERROR("%p: cannot allocate verbs resources",
+		       (void *)rxq_ctrl);
+		goto error;
+	}
+	tmpl->rxq_ctrl = rxq_ctrl;
+	/* Use the entire RX mempool as the memory region. */
+	tmpl->mr = priv_mr_get(priv, rxq_data->mp);
+	if (!tmpl->mr) {
+		tmpl->mr = priv_mr_new(priv, rxq_data->mp);
+		if (!tmpl->mr) {
+			ERROR("%p: MR creation failure", (void *)rxq_ctrl);
+			goto error;
+		}
+	}
+	if (rxq_ctrl->irq) {
+		tmpl->channel = ibv_create_comp_channel(priv->ctx);
+		if (!tmpl->channel) {
+			ERROR("%p: Comp Channel creation failure",
+			      (void *)rxq_ctrl);
+			goto error;
+		}
+	}
+	attr.cq.ibv = (struct ibv_cq_init_attr_ex){
+		.cqe = cqe_n,
+		.channel = tmpl->channel,
+		.comp_mask = 0,
+	};
+	attr.cq.mlx5 = (struct mlx5dv_cq_init_attr){
+		.comp_mask = 0,
+	};
+	if (priv->cqe_comp && !rxq_data->hw_timestamp) {
+		attr.cq.mlx5.comp_mask |=
+			MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
+		attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
+		/*
+		 * For vectorized Rx, it must not be doubled in order to
+		 * make cq_ci and rq_ci aligned.
+		 */
+		if (rxq_check_vec_support(rxq_data) < 0)
+			attr.cq.ibv.cqe *= 2;
+	} else if (priv->cqe_comp && rxq_data->hw_timestamp) {
+		DEBUG("Rx CQE compression is disabled for HW timestamp");
+	}
+	tmpl->cq = ibv_cq_ex_to_cq(mlx5dv_create_cq(priv->ctx, &attr.cq.ibv,
+						    &attr.cq.mlx5));
+	if (tmpl->cq == NULL) {
+		ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
+		goto error;
+	}
+	DEBUG("priv->device_attr.max_qp_wr is %d",
+	      priv->device_attr.orig_attr.max_qp_wr);
+	DEBUG("priv->device_attr.max_sge is %d",
+	      priv->device_attr.orig_attr.max_sge);
+	attr.wq = (struct ibv_wq_init_attr){
+		.wq_context = NULL, /* Could be useful in the future. */
+		.wq_type = IBV_WQT_RQ,
+		/* Max number of outstanding WRs. */
+		.max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
+		/* Max number of scatter/gather elements in a WR. */
+		.max_sge = 1 << rxq_data->sges_n,
+		.pd = priv->pd,
+		.cq = tmpl->cq,
+		.comp_mask =
+			IBV_WQ_FLAGS_CVLAN_STRIPPING |
+			0,
+		.create_flags = (rxq_data->vlan_strip ?
+				 IBV_WQ_FLAGS_CVLAN_STRIPPING :
+				 0),
+	};
+	/* By default, FCS (CRC) is stripped by hardware. */
+	if (rxq_data->crc_present) {
+		attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
+		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+	}
+#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
+	if (priv->hw_padding) {
+		attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
+		attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+	}
+#endif
+	tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
+	if (tmpl->wq == NULL) {
+		ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
+		goto error;
+	}
+	/*
+	 * Make sure number of WRs*SGEs match expectations since a queue
+	 * cannot allocate more than "desc" buffers.
+	 */
+	if (((int)attr.wq.max_wr !=
+	     ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
+	    ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
+		ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
+		      (void *)rxq_ctrl,
+		      ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
+		      (1 << rxq_data->sges_n),
+		      attr.wq.max_wr, attr.wq.max_sge);
+		goto error;
+	}
+	/* Change queue state to ready. */
+	mod = (struct ibv_wq_attr){
+		.attr_mask = IBV_WQ_ATTR_STATE,
+		.wq_state = IBV_WQS_RDY,
+	};
+	ret = ibv_modify_wq(tmpl->wq, &mod);
+	if (ret) {
+		ERROR("%p: WQ state to IBV_WQS_RDY failed",
+		      (void *)rxq_ctrl);
+		goto error;
+	}
+	obj.cq.in = tmpl->cq;
+	obj.cq.out = &cq_info;
+	obj.rwq.in = tmpl->wq;
+	obj.rwq.out = &rwq;
+	ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
+	if (ret != 0)
+		goto error;
+	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
+		goto error;
+	}
+	/* Fill the rings. */
+	rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
+		(uintptr_t)rwq.buf;
+	for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
+		struct rte_mbuf *buf = (*rxq_data->elts)[i];
+		volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
+
+		/* scat->addr must be able to store a pointer. */
+		assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+		*scat = (struct mlx5_wqe_data_seg){
+			.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+								  uintptr_t)),
+			.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
+			.lkey = tmpl->mr->lkey,
+		};
+	}
+	rxq_data->rq_db = rwq.dbrec;
+	rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
+	rxq_data->cq_ci = 0;
+	rxq_data->rq_ci = 0;
+	rxq_data->rq_pi = 0;
+	rxq_data->zip = (struct rxq_zip){
+		.ai = 0,
+	};
+	rxq_data->cq_db = cq_info.dbrec;
+	rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
+	rxq_data->cq_uar = cq_info.cq_uar;
+	rxq_data->cqn = cq_info.cqn;
+	rxq_data->cq_arm_sn = 0;
+	/* Update doorbell counter. */
+	rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
+	rte_wmb();
+	*rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
+	DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
+	return tmpl;
+error:
+	if (tmpl->wq)
+		claim_zero(ibv_destroy_wq(tmpl->wq));
+	if (tmpl->cq)
+		claim_zero(ibv_destroy_cq(tmpl->cq));
+	if (tmpl->channel)
+		claim_zero(ibv_destroy_comp_channel(tmpl->channel));
+	if (tmpl->mr)
+		priv_mr_release(priv, tmpl->mr);
+	return NULL;
+}
+
+/**
+ * Get an Rx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object if it exists.
+ */
+struct mlx5_rxq_ibv*
+mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+
+	if (idx >= priv->rxqs_n)
+		return NULL;
+	if (!rxq_data)
+		return NULL;
+	rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+	if (rxq_ctrl->ibv) {
+		priv_mr_get(priv, rxq_data->mp);
+		rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
+		DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+		      (void *)rxq_ctrl->ibv,
+		      rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
+	}
+	return rxq_ctrl->ibv;
+}
+
+/**
+ * Release an Rx verbs queue object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rxq_ibv
+ *   Verbs Rx queue object.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
+{
+	int ret;
+
+	assert(rxq_ibv);
+	assert(rxq_ibv->wq);
+	assert(rxq_ibv->cq);
+	assert(rxq_ibv->mr);
+	ret = priv_mr_release(priv, rxq_ibv->mr);
+	if (!ret)
+		rxq_ibv->mr = NULL;
+	DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
+	if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
+		rxq_free_elts(rxq_ibv->rxq_ctrl);
+		claim_zero(ibv_destroy_wq(rxq_ibv->wq));
+		claim_zero(ibv_destroy_cq(rxq_ibv->cq));
+		if (rxq_ibv->channel)
+			claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
+		LIST_REMOVE(rxq_ibv, next);
+		rte_free(rxq_ibv);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the Verbs Rx queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_ibv_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_rxq_ibv *rxq_ibv;
+
+	LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
+		DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
+		      (void *)rxq_ibv);
+		++ret;
+	}
+	return ret;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rxq_ibv
+ *   Verbs Rx queue object.
+ */
+int
+mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
+{
+	(void)priv;
+	assert(rxq_ibv);
+	return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
+}
+
+/**
+ * Create a DPDK Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ *
+ * @return
+ *   A DPDK queue object on success.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+		  unsigned int socket, struct rte_mempool *mp)
+{
+	struct rte_eth_dev *dev = priv->dev;
+	struct mlx5_rxq_ctrl *tmpl;
+	const uint16_t desc_n =
+		desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
+	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
+
+	tmpl = rte_calloc_socket("RXQ", 1,
+				 sizeof(*tmpl) +
+				 desc_n * sizeof(struct rte_mbuf *),
+				 0, socket);
+	if (!tmpl)
+		return NULL;
+	tmpl->socket = socket;
+	if (priv->dev->data->dev_conf.intr_conf.rxq)
+		tmpl->irq = 1;
+	/* Enable scattered packets support for this queue if necessary. */
+	assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+	if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
+		tmpl->rxq.sges_n = 0;
+	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
+		unsigned int size =
+			RTE_PKTMBUF_HEADROOM +
+			dev->data->dev_conf.rxmode.max_rx_pkt_len;
+		unsigned int sges_n;
+
+		/*
+		 * Determine the number of SGEs needed for a full packet
+		 * and round it to the next power of two.
+		 */
+		sges_n = log2above((size / mb_len) + !!(size % mb_len));
+		tmpl->rxq.sges_n = sges_n;
+		/* Make sure rxq.sges_n did not overflow. */
+		size = mb_len * (1 << tmpl->rxq.sges_n);
+		size -= RTE_PKTMBUF_HEADROOM;
+		if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+			ERROR("%p: too many SGEs (%u) needed to handle"
+			      " requested maximum packet size %u",
+			      (void *)dev,
+			      1 << sges_n,
+			      dev->data->dev_conf.rxmode.max_rx_pkt_len);
+			goto error;
+		}
+	} else {
+		WARN("%p: the requested maximum Rx packet size (%u) is"
+		     " larger than a single mbuf (%u) and scattered"
+		     " mode has not been requested",
+		     (void *)dev,
+		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
+		     mb_len - RTE_PKTMBUF_HEADROOM);
+	}
+	DEBUG("%p: maximum number of segments per packet: %u",
+	      (void *)dev, 1 << tmpl->rxq.sges_n);
+	if (desc % (1 << tmpl->rxq.sges_n)) {
+		ERROR("%p: number of RX queue descriptors (%u) is not a"
+		      " multiple of SGEs per packet (%u)",
+		      (void *)dev,
+		      desc,
+		      1 << tmpl->rxq.sges_n);
+		goto error;
+	}
+	/* Toggle RX checksum offload if hardware supports it. */
+	if (priv->hw_csum)
+		tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+	if (priv->hw_csum_l2tun)
+		tmpl->rxq.csum_l2tun =
+			!!dev->data->dev_conf.rxmode.hw_ip_checksum;
+	tmpl->rxq.hw_timestamp =
+			!!dev->data->dev_conf.rxmode.hw_timestamp;
+	/* Configure VLAN stripping. */
+	tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
+			       !!dev->data->dev_conf.rxmode.hw_vlan_strip);
+	/* By default, FCS (CRC) is stripped by hardware. */
+	if (dev->data->dev_conf.rxmode.hw_strip_crc) {
+		tmpl->rxq.crc_present = 0;
+	} else if (priv->hw_fcs_strip) {
+		tmpl->rxq.crc_present = 1;
+	} else {
+		WARN("%p: CRC stripping has been disabled but will still"
+		     " be performed by hardware, make sure MLNX_OFED and"
+		     " firmware are up to date",
+		     (void *)dev);
+		tmpl->rxq.crc_present = 0;
+	}
+	DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
+	      " incoming frames to hide it",
+	      (void *)dev,
+	      tmpl->rxq.crc_present ? "disabled" : "enabled",
+	      tmpl->rxq.crc_present << 2);
+	/* Save port ID. */
+	tmpl->rxq.rss_hash = priv->rxqs_n > 1;
+	tmpl->rxq.port_id = dev->data->port_id;
+	tmpl->priv = priv;
+	tmpl->rxq.mp = mp;
+	tmpl->rxq.stats.idx = idx;
+	tmpl->rxq.elts_n = log2above(desc);
+	tmpl->rxq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
+	return tmpl;
+error:
+	rte_free(tmpl);
+	return NULL;
+}
+
+/**
+ * Get a Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   A pointer to the queue if it exists.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
+
+	if ((*priv->rxqs)[idx]) {
+		rxq_ctrl = container_of((*priv->rxqs)[idx],
+					struct mlx5_rxq_ctrl,
+					rxq);
+
+		mlx5_priv_rxq_ibv_get(priv, idx);
+		rte_atomic32_inc(&rxq_ctrl->refcnt);
+		DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+		      (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+	}
+	return rxq_ctrl;
+}
+
+/**
+ * Release a Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+
+	if (!(*priv->rxqs)[idx])
+		return 0;
+	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+	assert(rxq_ctrl->priv);
+	if (rxq_ctrl->ibv) {
+		int ret;
+
+		ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
+		if (!ret)
+			rxq_ctrl->ibv = NULL;
+	}
+	DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+	if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
+		LIST_REMOVE(rxq_ctrl, next);
+		rte_free(rxq_ctrl);
+		(*priv->rxqs)[idx] = NULL;
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   1 if the queue can be released.
+ */
+int
+mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+
+	if (!(*priv->rxqs)[idx])
+		return -1;
+	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+	return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_verify(struct priv *priv)
+{
+	struct mlx5_rxq_ctrl *rxq_ctrl;
+	int ret = 0;
+
+	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
+		DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
+		      (void *)rxq_ctrl);
+		++ret;
+	}
+	return ret;
+}
+
+/**
+ * Create an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues entering in the indirection table.
+ * @param queues_n
+ *   Number of queues in the array.
+ *
+ * @return
+ *   A new indirection table.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
+			    uint16_t queues_n)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+	const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
+		log2above(queues_n) :
+		log2above(priv->ind_table_max_size);
+	struct ibv_wq *wq[1 << wq_n];
+	unsigned int i;
+	unsigned int j;
+
+	ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
+			     queues_n * sizeof(uint16_t), 0);
+	if (!ind_tbl)
+		return NULL;
+	for (i = 0; i != queues_n; ++i) {
+		struct mlx5_rxq_ctrl *rxq =
+			mlx5_priv_rxq_get(priv, queues[i]);
+
+		if (!rxq)
+			goto error;
+		wq[i] = rxq->ibv->wq;
+		ind_tbl->queues[i] = queues[i];
+	}
+	ind_tbl->queues_n = queues_n;
+	/* Finalise indirection table. */
+	for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
+		wq[i] = wq[j];
+	ind_tbl->ind_table = ibv_create_rwq_ind_table(
+		priv->ctx,
+		&(struct ibv_rwq_ind_table_init_attr){
+			.log_ind_tbl_size = wq_n,
+			.ind_tbl = wq,
+			.comp_mask = 0,
+		});
+	if (!ind_tbl->ind_table)
+		goto error;
+	rte_atomic32_inc(&ind_tbl->refcnt);
+	LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
+	DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+	      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+	return ind_tbl;
+error:
+	rte_free(ind_tbl);
+	DEBUG("%p cannot create indirection table", (void *)priv);
+	return NULL;
+}
+
+/**
+ * Get an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues entering in the indirection table.
+ * @param queues_n
+ *   Number of queues in the array.
+ *
+ * @return
+ *   An indirection table if found.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
+			    uint16_t queues_n)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+
+	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+		if ((ind_tbl->queues_n == queues_n) &&
+		    (memcmp(ind_tbl->queues, queues,
+			    ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
+		     == 0))
+			break;
+	}
+	if (ind_tbl) {
+		unsigned int i;
+
+		rte_atomic32_inc(&ind_tbl->refcnt);
+		DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+		      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+		for (i = 0; i != ind_tbl->queues_n; ++i)
+			mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
+	}
+	return ind_tbl;
+}
+
+/**
+ * Release an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param ind_table
+ *   Indirection table to release.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_ind_table_ibv_release(struct priv *priv,
+				struct mlx5_ind_table_ibv *ind_tbl)
+{
+	unsigned int i;
+
+	DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+	      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+	if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
+		claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
+	for (i = 0; i != ind_tbl->queues_n; ++i)
+		claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
+	if (!rte_atomic32_read(&ind_tbl->refcnt)) {
+		LIST_REMOVE(ind_tbl, next);
+		rte_free(ind_tbl);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_ind_table_ibv_verify(struct priv *priv)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+	int ret = 0;
+
+	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+		DEBUG("%p: Verbs indirection table %p still referenced",
+		      (void *)priv, (void *)ind_tbl);
+		++ret;
+	}
+	return ret;
+}
+
+/**
+ * Create an Rx Hash queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rss_key
+ *   RSS key for the Rx hash queue.
+ * @param rss_key_len
+ *   RSS key length.
+ * @param hash_fields
+ *   Verbs protocol hash field to make the RSS on.
+ * @param queues
+ *   Queues entering in hash queue. In case of empty hash_fields only the
+ *   first queue index will be taken for the indirection table.
+ * @param queues_n
+ *   Number of queues.
+ *
+ * @return
+ *   An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+	struct mlx5_hrxq *hrxq;
+	struct mlx5_ind_table_ibv *ind_tbl;
+	struct ibv_qp *qp;
+
+	queues_n = hash_fields ? queues_n : 1;
+	ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+	if (!ind_tbl)
+		ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
+	if (!ind_tbl)
+		return NULL;
+	qp = ibv_create_qp_ex(
+		priv->ctx,
+		&(struct ibv_qp_init_attr_ex){
+			.qp_type = IBV_QPT_RAW_PACKET,
+			.comp_mask =
+				IBV_QP_INIT_ATTR_PD |
+				IBV_QP_INIT_ATTR_IND_TABLE |
+				IBV_QP_INIT_ATTR_RX_HASH,
+			.rx_hash_conf = (struct ibv_rx_hash_conf){
+				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
+				.rx_hash_key_len = rss_key_len,
+				.rx_hash_key = rss_key,
+				.rx_hash_fields_mask = hash_fields,
+			},
+			.rwq_ind_tbl = ind_tbl->ind_table,
+			.pd = priv->pd,
+		});
+	if (!qp)
+		goto error;
+	hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
+	if (!hrxq)
+		goto error;
+	hrxq->ind_table = ind_tbl;
+	hrxq->qp = qp;
+	hrxq->rss_key_len = rss_key_len;
+	hrxq->hash_fields = hash_fields;
+	memcpy(hrxq->rss_key, rss_key, rss_key_len);
+	rte_atomic32_inc(&hrxq->refcnt);
+	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
+	DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+	return hrxq;
+error:
+	mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+	if (qp)
+		claim_zero(ibv_destroy_qp(qp));
+	return NULL;
+}
+
+/**
+ * Get an Rx Hash queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rss_conf
+ *   RSS configuration for the Rx hash queue.
+ * @param queues
+ *   Queues entering in hash queue. In case of empty hash_fields only the
+ *   first queue index will be taken for the indirection table.
+ * @param queues_n
+ *   Number of queues.
+ *
+ * @return
+ *   An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+		   uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+	struct mlx5_hrxq *hrxq;
+
+	queues_n = hash_fields ? queues_n : 1;
+	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+		struct mlx5_ind_table_ibv *ind_tbl;
+
+		if (hrxq->rss_key_len != rss_key_len)
+			continue;
+		if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
+			continue;
+		if (hrxq->hash_fields != hash_fields)
+			continue;
+		ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+		if (!ind_tbl)
+			continue;
+		if (ind_tbl != hrxq->ind_table) {
+			mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+			continue;
+		}
+		rte_atomic32_inc(&hrxq->refcnt);
+		DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+		      (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+		return hrxq;
+	}
+	return NULL;
+}
+
+/**
+ * Release the hash Rx queue.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param hrxq
+ *   Pointer to Hash Rx queue to release.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
+{
+	DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+	      (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+	if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
+		claim_zero(ibv_destroy_qp(hrxq->qp));
+		mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
+		LIST_REMOVE(hrxq, next);
+		rte_free(hrxq);
+		return 0;
+	}
+	claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
+	return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_hrxq_ibv_verify(struct priv *priv)
+{
+	struct mlx5_hrxq *hrxq;
+	int ret = 0;
+
+	LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+		DEBUG("%p: Verbs Hash Rx queue %p still referenced",
+		      (void *)priv, (void *)hrxq);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index b07bcd11..9658b378 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -42,25 +42,17 @@
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
-#include <infiniband/mlx5_hw.h>
-#include <infiniband/arch.h>
+#include <infiniband/mlx5dv.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_mbuf.h>
 #include <rte_mempool.h>
 #include <rte_prefetch.h>
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
 #include <rte_ether.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5.h"
 #include "mlx5_utils.h"
@@ -73,11 +65,11 @@ static __rte_always_inline uint32_t
 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe);
 
 static __rte_always_inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 		 uint16_t cqe_cnt, uint32_t *rss_hash);
 
 static __rte_always_inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe);
+rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
 
 uint32_t mlx5_ptype_table[] __rte_cache_aligned = {
 	[0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */
@@ -105,6 +97,8 @@ mlx5_set_ptype_table(void)
 	 * bit[6] = tunneled
 	 * bit[7] = outer_l3_type
 	 */
+	/* L2 */
+	(*p)[0x00] = RTE_PTYPE_L2_ETHER;
 	/* L3 */
 	(*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
 		     RTE_PTYPE_L4_NONFRAG;
@@ -171,29 +165,29 @@ mlx5_set_ptype_table(void)
 	/* Tunneled - TCP */
 	(*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-		     RTE_PTYPE_L4_TCP;
+		     RTE_PTYPE_INNER_L4_TCP;
 	(*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-		     RTE_PTYPE_L4_TCP;
+		     RTE_PTYPE_INNER_L4_TCP;
 	(*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-		     RTE_PTYPE_L4_TCP;
+		     RTE_PTYPE_INNER_L4_TCP;
 	(*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-		     RTE_PTYPE_L4_TCP;
+		     RTE_PTYPE_INNER_L4_TCP;
 	/* Tunneled - UDP */
 	(*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-		     RTE_PTYPE_L4_UDP;
+		     RTE_PTYPE_INNER_L4_UDP;
 	(*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-		     RTE_PTYPE_L4_UDP;
+		     RTE_PTYPE_INNER_L4_UDP;
 	(*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-		     RTE_PTYPE_L4_UDP;
+		     RTE_PTYPE_INNER_L4_UDP;
 	(*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
 		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-		     RTE_PTYPE_L4_UDP;
+		     RTE_PTYPE_INNER_L4_UDP;
 }
 
 /**
@@ -208,7 +202,7 @@ mlx5_set_ptype_table(void)
  *   Size of tailroom.
  */
 static inline size_t
-tx_mlx5_wq_tailroom(struct txq *txq, void *addr)
+tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr)
 {
 	size_t tailroom;
 	tailroom = (uintptr_t)(txq->wqes) +
@@ -266,7 +260,7 @@ mlx5_copy_to_wq(void *dst, const void *src, size_t n,
 int
 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
 {
-	struct txq *txq = tx_queue;
+	struct mlx5_txq_data *txq = tx_queue;
 	uint16_t used;
 
 	mlx5_tx_complete(txq);
@@ -290,7 +284,7 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
 int
 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
-	struct rxq *rxq = rx_queue;
+	struct mlx5_rxq_data *rxq = rx_queue;
 	struct rxq_zip *zip = &rxq->zip;
 	volatile struct mlx5_cqe *cqe;
 	const unsigned int cqe_n = (1 << rxq->cqe_n);
@@ -313,7 +307,7 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 
 		op_own = cqe->op_own;
 		if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED)
-			n = ntohl(cqe->byte_cnt);
+			n = rte_be_to_cpu_32(cqe->byte_cnt);
 		else
 			n = 1;
 		cq_ci += n;
@@ -342,7 +336,7 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 uint16_t
 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -413,8 +407,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 #ifdef MLX5_PMD_SOFT_COUNTERS
 		total_length = length;
 #endif
-		if (length < (MLX5_WQE_DWORD_SIZE + 2))
+		if (length < (MLX5_WQE_DWORD_SIZE + 2)) {
+			txq->stats.oerrors++;
 			break;
+		}
 		/* Update element. */
 		(*txq->elts)[elts_head & elts_m] = buf;
 		/* Prefetch next buffer data. */
@@ -441,7 +437,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
 		/* Replace the Ethernet type by the VLAN if necessary. */
 		if (buf->ol_flags & PKT_TX_VLAN_PKT) {
-			uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
+			uint32_t vlan = rte_cpu_to_be_32(0x81000000 |
+							 buf->vlan_tci);
 			unsigned int len = 2 * ETHER_ADDR_LEN - 2;
 
 			addr += 2;
@@ -461,6 +458,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			length -= pkt_inline_sz;
 			addr += pkt_inline_sz;
 		}
+		raw += MLX5_WQE_DWORD_SIZE;
 		if (txq->tso_en) {
 			tso = buf->ol_flags & PKT_TX_TCP_SEG;
 			if (tso) {
@@ -479,7 +477,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				tso_header_sz = buf->l2_len + vlan_sz +
 						buf->l3_len + buf->l4_len;
 				tso_segsz = buf->tso_segsz;
-
+				if (unlikely(tso_segsz == 0)) {
+					txq->stats.oerrors++;
+					break;
+				}
 				if (is_tunneled	&& txq->tunnel_en) {
 					tso_header_sz += buf->outer_l2_len +
 							 buf->outer_l3_len;
@@ -488,12 +489,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 					cs_flags |= MLX5_ETH_WQE_L4_CSUM;
 				}
 				if (unlikely(tso_header_sz >
-					     MLX5_MAX_TSO_HEADER))
+					     MLX5_MAX_TSO_HEADER)) {
+					txq->stats.oerrors++;
 					break;
+				}
 				copy_b = tso_header_sz - pkt_inline_sz;
 				/* First seg must contain all headers. */
 				assert(copy_b <= length);
-				raw += MLX5_WQE_DWORD_SIZE;
 				if (copy_b &&
 				   ((end - (uintptr_t)raw) > copy_b)) {
 					uint16_t n = (MLX5_WQE_DS(copy_b) -
@@ -506,19 +508,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 						   (void *)addr, copy_b);
 					addr += copy_b;
 					length -= copy_b;
+					/* Include padding for TSO header. */
+					copy_b = MLX5_WQE_DS(copy_b) *
+						 MLX5_WQE_DWORD_SIZE;
 					pkt_inline_sz += copy_b;
-					/*
-					 * Another DWORD will be added
-					 * in the inline part.
-					 */
-					raw += MLX5_WQE_DS(copy_b) *
-					       MLX5_WQE_DWORD_SIZE -
-					       MLX5_WQE_DWORD_SIZE;
+					raw += copy_b;
 				} else {
 					/* NOP WQE. */
 					wqe->ctrl = (rte_v128u32_t){
-						     htonl(txq->wqe_ci << 8),
-						     htonl(txq->qp_num_8s | 1),
+						     rte_cpu_to_be_32(
+							txq->wqe_ci << 8),
+						     rte_cpu_to_be_32(
+							txq->qp_num_8s | 1),
 						     0,
 						     0,
 					};
@@ -531,19 +532,20 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		}
 		/* Inline if enough room. */
 		if (inline_en || tso) {
+			uint32_t inl;
 			uintptr_t end = (uintptr_t)
 				(((uintptr_t)txq->wqes) +
 				 (1 << txq->wqe_n) * MLX5_WQE_SIZE);
 			unsigned int inline_room = max_inline *
 						   RTE_CACHE_LINE_SIZE -
-						   (pkt_inline_sz - 2);
+						   (pkt_inline_sz - 2) -
+						   !!tso * sizeof(inl);
 			uintptr_t addr_end = (addr + inline_room) &
 					     ~(RTE_CACHE_LINE_SIZE - 1);
 			unsigned int copy_b = (addr_end > addr) ?
 				RTE_MIN((addr_end - addr), length) :
 				0;
 
-			raw += MLX5_WQE_DWORD_SIZE;
 			if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
 				/*
 				 * One Dseg remains in the current WQE.  To
@@ -556,12 +558,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 					break;
 				max_wqe -= n;
 				if (tso) {
-					uint32_t inl =
-						htonl(copy_b | MLX5_INLINE_SEG);
-
-					pkt_inline_sz =
-						MLX5_WQE_DS(tso_header_sz) *
-						MLX5_WQE_DWORD_SIZE;
+					inl = rte_cpu_to_be_32(copy_b |
+							       MLX5_INLINE_SEG);
 					rte_memcpy((void *)raw,
 						   (void *)&inl, sizeof(inl));
 					raw += sizeof(inl);
@@ -610,9 +608,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			ds = 3;
 use_dseg:
 			/* Add the remaining packet as a simple ds. */
-			naddr = htonll(addr);
+			naddr = rte_cpu_to_be_64(addr);
 			*dseg = (rte_v128u32_t){
-				htonl(length),
+				rte_cpu_to_be_32(length),
 				mlx5_tx_mb2mr(txq, buf),
 				naddr,
 				naddr >> 32,
@@ -649,9 +647,9 @@ next_seg:
 		total_length += length;
 #endif
 		/* Store segment information. */
-		naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
+		naddr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
 		*dseg = (rte_v128u32_t){
-			htonl(length),
+			rte_cpu_to_be_32(length),
 			mlx5_tx_mb2mr(txq, buf),
 			naddr,
 			naddr >> 32,
@@ -664,27 +662,33 @@ next_seg:
 		else
 			j += sg;
 next_pkt:
+		if (ds > MLX5_DSEG_MAX) {
+			txq->stats.oerrors++;
+			break;
+		}
 		++elts_head;
 		++pkts;
 		++i;
 		/* Initialize known and common part of the WQE structure. */
 		if (tso) {
 			wqe->ctrl = (rte_v128u32_t){
-				htonl((txq->wqe_ci << 8) | MLX5_OPCODE_TSO),
-				htonl(txq->qp_num_8s | ds),
+				rte_cpu_to_be_32((txq->wqe_ci << 8) |
+						 MLX5_OPCODE_TSO),
+				rte_cpu_to_be_32(txq->qp_num_8s | ds),
 				0,
 				0,
 			};
 			wqe->eseg = (rte_v128u32_t){
 				0,
-				cs_flags | (htons(tso_segsz) << 16),
+				cs_flags | (rte_cpu_to_be_16(tso_segsz) << 16),
 				0,
-				(ehdr << 16) | htons(tso_header_sz),
+				(ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
 			};
 		} else {
 			wqe->ctrl = (rte_v128u32_t){
-				htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
-				htonl(txq->qp_num_8s | ds),
+				rte_cpu_to_be_32((txq->wqe_ci << 8) |
+						 MLX5_OPCODE_SEND),
+				rte_cpu_to_be_32(txq->qp_num_8s | ds),
 				0,
 				0,
 			};
@@ -692,7 +696,7 @@ next_pkt:
 				0,
 				cs_flags,
 				0,
-				(ehdr << 16) | htons(pkt_inline_sz),
+				(ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
 			};
 		}
 next_wqe:
@@ -712,7 +716,7 @@ next_wqe:
 	comp = txq->elts_comp + i + j + k;
 	if (comp >= MLX5_TX_COMP_THRESH) {
 		/* Request completion on last WQE. */
-		last_wqe->ctrl2 = htonl(8);
+		last_wqe->ctrl2 = rte_cpu_to_be_32(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
 		last_wqe->ctrl3 = txq->elts_head;
 		txq->elts_comp = 0;
@@ -739,7 +743,7 @@ next_wqe:
  *   Packet length.
  */
 static inline void
-mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 	volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
@@ -751,13 +755,14 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 	mpw->len = length;
 	mpw->total_len = 0;
 	mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
-	mpw->wqe->eseg.mss = htons(length);
+	mpw->wqe->eseg.mss = rte_cpu_to_be_16(length);
 	mpw->wqe->eseg.inline_hdr_sz = 0;
 	mpw->wqe->eseg.rsvd0 = 0;
 	mpw->wqe->eseg.rsvd1 = 0;
 	mpw->wqe->eseg.rsvd2 = 0;
-	mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
-				  (txq->wqe_ci << 8) | MLX5_OPCODE_TSO);
+	mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
+					     (txq->wqe_ci << 8) |
+					     MLX5_OPCODE_TSO);
 	mpw->wqe->ctrl[2] = 0;
 	mpw->wqe->ctrl[3] = 0;
 	mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
@@ -778,7 +783,7 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
  *   Pointer to MPW session structure.
  */
 static inline void
-mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	unsigned int num = mpw->pkts_n;
 
@@ -786,7 +791,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
 	 * Store size in multiple of 16 bytes. Control and Ethernet segments
 	 * count as 2.
 	 */
-	mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | (2 + num));
+	mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | (2 + num));
 	mpw->state = MLX5_MPW_STATE_CLOSED;
 	if (num < 3)
 		++txq->wqe_ci;
@@ -812,7 +817,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
 uint16_t
 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -850,8 +855,10 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (max_elts < segs_n)
 			break;
 		/* Do not bother with large packets MPW cannot handle. */
-		if (segs_n > MLX5_MPW_DSEG_MAX)
+		if (segs_n > MLX5_MPW_DSEG_MAX) {
+			txq->stats.oerrors++;
 			break;
+		}
 		max_elts -= segs_n;
 		--pkts_n;
 		/* Should we enable HW CKSUM offload */
@@ -893,9 +900,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			dseg = mpw.data.dseg[mpw.pkts_n];
 			addr = rte_pktmbuf_mtod(buf, uintptr_t);
 			*dseg = (struct mlx5_wqe_data_seg){
-				.byte_count = htonl(DATA_LEN(buf)),
+				.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
 				.lkey = mlx5_tx_mb2mr(txq, buf),
-				.addr = htonll(addr),
+				.addr = rte_cpu_to_be_64(addr),
 			};
 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
 			length += DATA_LEN(buf);
@@ -923,7 +930,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		volatile struct mlx5_wqe *wqe = mpw.wqe;
 
 		/* Request completion on last WQE. */
-		wqe->ctrl[2] = htonl(8);
+		wqe->ctrl[2] = rte_cpu_to_be_32(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
 		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
@@ -953,7 +960,8 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Packet length.
  */
 static inline void
-mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,
+		    uint32_t length)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 	struct mlx5_wqe_inl_small *inl;
@@ -963,12 +971,12 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 	mpw->len = length;
 	mpw->total_len = 0;
 	mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
-	mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
-				  (txq->wqe_ci << 8) |
-				  MLX5_OPCODE_TSO);
+	mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
+					     (txq->wqe_ci << 8) |
+					     MLX5_OPCODE_TSO);
 	mpw->wqe->ctrl[2] = 0;
 	mpw->wqe->ctrl[3] = 0;
-	mpw->wqe->eseg.mss = htons(length);
+	mpw->wqe->eseg.mss = rte_cpu_to_be_16(length);
 	mpw->wqe->eseg.inline_hdr_sz = 0;
 	mpw->wqe->eseg.cs_flags = 0;
 	mpw->wqe->eseg.rsvd0 = 0;
@@ -988,7 +996,7 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
  *   Pointer to MPW session structure.
  */
 static inline void
-mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	unsigned int size;
 	struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
@@ -999,9 +1007,10 @@ mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
 	 * Store size in multiple of 16 bytes. Control and Ethernet segments
 	 * count as 2.
 	 */
-	mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(size));
+	mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s |
+					     MLX5_WQE_DS(size));
 	mpw->state = MLX5_MPW_STATE_CLOSED;
-	inl->byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
+	inl->byte_cnt = rte_cpu_to_be_32(mpw->total_len | MLX5_INLINE_SEG);
 	txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
 }
 
@@ -1022,7 +1031,7 @@ uint16_t
 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 			 uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -1071,8 +1080,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 		if (max_elts < segs_n)
 			break;
 		/* Do not bother with large packets MPW cannot handle. */
-		if (segs_n > MLX5_MPW_DSEG_MAX)
+		if (segs_n > MLX5_MPW_DSEG_MAX) {
+			txq->stats.oerrors++;
 			break;
+		}
 		max_elts -= segs_n;
 		--pkts_n;
 		/*
@@ -1139,9 +1150,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 				dseg = mpw.data.dseg[mpw.pkts_n];
 				addr = rte_pktmbuf_mtod(buf, uintptr_t);
 				*dseg = (struct mlx5_wqe_data_seg){
-					.byte_count = htonl(DATA_LEN(buf)),
+					.byte_count =
+					       rte_cpu_to_be_32(DATA_LEN(buf)),
 					.lkey = mlx5_tx_mb2mr(txq, buf),
-					.addr = htonll(addr),
+					.addr = rte_cpu_to_be_64(addr),
 				};
 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
 				length += DATA_LEN(buf);
@@ -1213,7 +1225,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 		volatile struct mlx5_wqe *wqe = mpw.wqe;
 
 		/* Request completion on last WQE. */
-		wqe->ctrl[2] = htonl(8);
+		wqe->ctrl[2] = rte_cpu_to_be_32(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
 		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
@@ -1245,7 +1257,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
  *   Packet length.
  */
 static inline void
-mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
+mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding)
 {
 	uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
 
@@ -1253,9 +1265,10 @@ mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
 	mpw->pkts_n = 0;
 	mpw->total_len = sizeof(struct mlx5_wqe);
 	mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
-	mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_ENHANCED_MPSW << 24) |
-				  (txq->wqe_ci << 8) |
-				  MLX5_OPCODE_ENHANCED_MPSW);
+	mpw->wqe->ctrl[0] =
+		rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) |
+				 (txq->wqe_ci << 8) |
+				 MLX5_OPCODE_ENHANCED_MPSW);
 	mpw->wqe->ctrl[2] = 0;
 	mpw->wqe->ctrl[3] = 0;
 	memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE);
@@ -1263,9 +1276,9 @@ mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
 		uintptr_t addr = (uintptr_t)(mpw->wqe + 1);
 
 		/* Pad the first 2 DWORDs with zero-length inline header. */
-		*(volatile uint32_t *)addr = htonl(MLX5_INLINE_SEG);
+		*(volatile uint32_t *)addr = rte_cpu_to_be_32(MLX5_INLINE_SEG);
 		*(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) =
-			htonl(MLX5_INLINE_SEG);
+			rte_cpu_to_be_32(MLX5_INLINE_SEG);
 		mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE;
 		/* Start from the next WQEBB. */
 		mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1));
@@ -1286,14 +1299,15 @@ mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
  *   Number of consumed WQEs.
  */
 static inline uint16_t
-mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
 {
 	uint16_t ret;
 
 	/* Store size in multiple of 16 bytes. Control and Ethernet segments
 	 * count as 2.
 	 */
-	mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(mpw->total_len));
+	mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s |
+					     MLX5_WQE_DS(mpw->total_len));
 	mpw->state = MLX5_MPW_STATE_CLOSED;
 	ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
 	txq->wqe_ci += ret;
@@ -1316,7 +1330,7 @@ mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
 uint16_t
 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -1360,8 +1374,10 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (max_elts - j < segs_n)
 			break;
 		/* Do not bother with large packets MPW cannot handle. */
-		if (segs_n > MLX5_MPW_DSEG_MAX)
+		if (segs_n > MLX5_MPW_DSEG_MAX) {
+			txq->stats.oerrors++;
 			break;
+		}
 		/* Should we enable HW CKSUM offload. */
 		if (buf->ol_flags &
 		    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
@@ -1446,9 +1462,10 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				dseg = mpw.data.dseg[mpw.pkts_n];
 				addr = rte_pktmbuf_mtod(buf, uintptr_t);
 				*dseg = (struct mlx5_wqe_data_seg){
-					.byte_count = htonl(DATA_LEN(buf)),
+					.byte_count = rte_cpu_to_be_32(
+								DATA_LEN(buf)),
 					.lkey = mlx5_tx_mb2mr(txq, buf),
-					.addr = htonll(addr),
+					.addr = rte_cpu_to_be_64(addr),
 				};
 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
 				length += DATA_LEN(buf);
@@ -1471,7 +1488,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 
 			assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED);
 			assert(length == DATA_LEN(buf));
-			inl_hdr = htonl(length | MLX5_INLINE_SEG);
+			inl_hdr = rte_cpu_to_be_32(length | MLX5_INLINE_SEG);
 			addr = rte_pktmbuf_mtod(buf, uintptr_t);
 			mpw.data.raw = (volatile void *)
 				((uintptr_t)mpw.data.raw + inl_pad);
@@ -1527,9 +1544,9 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++)
 				rte_prefetch2((void *)(addr +
 						n * RTE_CACHE_LINE_SIZE));
-			naddr = htonll(addr);
+			naddr = rte_cpu_to_be_64(addr);
 			*dseg = (rte_v128u32_t) {
-				htonl(length),
+				rte_cpu_to_be_32(length),
 				mlx5_tx_mb2mr(txq, buf),
 				naddr,
 				naddr >> 32,
@@ -1557,7 +1574,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		volatile struct mlx5_wqe *wqe = mpw.wqe;
 
 		/* Request completion on last WQE. */
-		wqe->ctrl[2] = htonl(8);
+		wqe->ctrl[2] = rte_cpu_to_be_32(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
 		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
@@ -1627,7 +1644,7 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
  *   with error.
  */
 static inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 		 uint16_t cqe_cnt, uint32_t *rss_hash)
 {
 	struct rxq_zip *zip = &rxq->zip;
@@ -1641,8 +1658,8 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
 			(volatile struct mlx5_mini_cqe8 (*)[8])
 			(uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info);
 
-		len = ntohl((*mc)[zip->ai & 7].byte_cnt);
-		*rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result);
+		len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt);
+		*rss_hash = rte_be_to_cpu_32((*mc)[zip->ai & 7].rx_hash_result);
 		if ((++zip->ai & 7) == 0) {
 			/* Invalidate consumed CQEs */
 			idx = zip->ca;
@@ -1690,7 +1707,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
 							  cqe_cnt].pkt_info);
 
 			/* Fix endianness. */
-			zip->cqe_cnt = ntohl(cqe->byte_cnt);
+			zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt);
 			/*
 			 * Current mini array position is the one returned by
 			 * check_cqe64().
@@ -1705,8 +1722,8 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
 			--rxq->cq_ci;
 			zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
 			/* Get packet size to return. */
-			len = ntohl((*mc)[0].byte_cnt);
-			*rss_hash = ntohl((*mc)[0].rx_hash_result);
+			len = rte_be_to_cpu_32((*mc)[0].byte_cnt);
+			*rss_hash = rte_be_to_cpu_32((*mc)[0].rx_hash_result);
 			zip->ai = 1;
 			/* Prefetch all the entries to be invalidated */
 			idx = zip->ca;
@@ -1716,8 +1733,8 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
 				++idx;
 			}
 		} else {
-			len = ntohl(cqe->byte_cnt);
-			*rss_hash = ntohl(cqe->rx_hash_res);
+			len = rte_be_to_cpu_32(cqe->byte_cnt);
+			*rss_hash = rte_be_to_cpu_32(cqe->rx_hash_res);
 		}
 		/* Error while receiving packet. */
 		if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
@@ -1738,10 +1755,10 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
  *   Offload flags (ol_flags) for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
+rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
 {
 	uint32_t ol_flags = 0;
-	uint16_t flags = ntohs(cqe->hdr_type_etc);
+	uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc);
 
 	ol_flags =
 		TRANSPOSE(flags,
@@ -1777,7 +1794,7 @@ rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
 uint16_t
 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct rxq *rxq = dpdk_rxq;
+	struct mlx5_rxq_data *rxq = dpdk_rxq;
 	const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
 	const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
 	const unsigned int sges_n = rxq->sges_n;
@@ -1848,7 +1865,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			    MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) {
 				pkt->ol_flags |= PKT_RX_FDIR;
 				if (cqe->sop_drop_qpn !=
-				    htonl(MLX5_FLOW_MARK_DEFAULT)) {
+				    rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) {
 					uint32_t mark = cqe->sop_drop_qpn;
 
 					pkt->ol_flags |= PKT_RX_FDIR_ID;
@@ -1860,10 +1877,16 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				pkt->ol_flags |= rxq_cq_to_ol_flags(rxq, cqe);
 			if (rxq->vlan_strip &&
 			    (cqe->hdr_type_etc &
-			     htons(MLX5_CQE_VLAN_STRIPPED))) {
-				pkt->ol_flags |= PKT_RX_VLAN_PKT |
+			     rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) {
+				pkt->ol_flags |= PKT_RX_VLAN |
 					PKT_RX_VLAN_STRIPPED;
-				pkt->vlan_tci = ntohs(cqe->vlan_info);
+				pkt->vlan_tci =
+					rte_be_to_cpu_16(cqe->vlan_info);
+			}
+			if (rxq->hw_timestamp) {
+				pkt->timestamp =
+					rte_be_to_cpu_64(cqe->timestamp);
+				pkt->ol_flags |= PKT_RX_TIMESTAMP;
 			}
 			if (rxq->crc_present)
 				len -= ETHER_CRC_LEN;
@@ -1879,7 +1902,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		 * of the buffers are already known, only the buffer address
 		 * changes.
 		 */
-		wqe->addr = htonll(rte_pktmbuf_mtod(rep, uintptr_t));
+		wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
 		if (len > DATA_LEN(seg)) {
 			len -= DATA_LEN(seg);
 			++NB_SEGS(pkt);
@@ -1906,10 +1929,10 @@ skip:
 		return 0;
 	/* Update the consumer index. */
 	rxq->rq_ci = rq_ci >> sges_n;
-	rte_wmb();
-	*rxq->cq_db = htonl(rxq->cq_ci);
-	rte_wmb();
-	*rxq->rq_db = htonl(rxq->rq_ci);
+	rte_io_wmb();
+	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+	rte_io_wmb();
+	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	/* Increment packets counter. */
 	rxq->stats.ipackets += i;
@@ -2016,7 +2039,7 @@ priv_check_vec_tx_support(struct priv *priv)
 }
 
 int __attribute__((weak))
-rxq_check_vec_support(struct rxq *rxq)
+rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 {
 	(void)rxq;
 	return -ENOTSUP;
@@ -2028,9 +2051,3 @@ priv_check_vec_rx_support(struct priv *priv)
 	(void)priv;
 	return -ENOTSUP;
 }
-
-void __attribute__((weak))
-priv_prep_vec_rx_function(struct priv *priv)
-{
-	(void)priv;
-}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7de1d108..d34f3cc0 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -36,6 +36,7 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -43,21 +44,16 @@
 #pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
-#include <infiniband/mlx5_hw.h>
+#include <infiniband/mlx5dv.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_mbuf.h>
 #include <rte_mempool.h>
 #include <rte_common.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
+#include <rte_hexdump.h>
+#include <rte_atomic.h>
 
 #include "mlx5_utils.h"
 #include "mlx5.h"
@@ -81,19 +77,22 @@ struct mlx5_txq_stats {
 	uint64_t opackets; /**< Total of successfully sent packets. */
 	uint64_t obytes; /**< Total of successfully sent bytes. */
 #endif
-	uint64_t odropped; /**< Total of packets not sent when TX ring full. */
-};
-
-/* Flow director queue structure. */
-struct fdir_queue {
-	struct ibv_qp *qp; /* Associated RX QP. */
-	struct ibv_exp_rwq_ind_table *ind_table; /* Indirection table. */
-	struct ibv_exp_wq *wq; /* Work queue. */
-	struct ibv_cq *cq; /* Completion queue. */
+	uint64_t oerrors; /**< Total number of failed transmitted packets. */
 };
 
 struct priv;
 
+/* Memory region queue object. */
+struct mlx5_mr {
+	LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */
+	rte_atomic32_t refcnt; /*<< Reference counter. */
+	uint32_t lkey; /*<< rte_cpu_to_be_32(mr->lkey) */
+	uintptr_t start; /* Start address of MR */
+	uintptr_t end; /* End address of MR */
+	struct ibv_mr *mr; /*<< Memory Region. */
+	struct rte_mempool *mp; /*<< Memory Pool. */
+};
+
 /* Compressed CQE context. */
 struct rxq_zip {
 	uint16_t ai; /* Array index. */
@@ -104,22 +103,22 @@ struct rxq_zip {
 };
 
 /* RX queue descriptor. */
-struct rxq {
+struct mlx5_rxq_data {
 	unsigned int csum:1; /* Enable checksum offloading. */
 	unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
+	unsigned int hw_timestamp:1; /* Enable HW timestamp. */
 	unsigned int vlan_strip:1; /* Enable VLAN stripping. */
 	unsigned int crc_present:1; /* CRC must be subtracted. */
 	unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */
 	unsigned int cqe_n:4; /* Log 2 of CQ elements. */
 	unsigned int elts_n:4; /* Log 2 of Mbufs. */
-	unsigned int port_id:8;
 	unsigned int rss_hash:1; /* RSS hash result is enabled. */
 	unsigned int mark:1; /* Marked flow available on the queue. */
 	unsigned int pending_err:1; /* CQE error needs to be handled. */
-	unsigned int trim_elts:1; /* Whether elts needs clean-up. */
-	unsigned int :6; /* Remaining bits. */
+	unsigned int :14; /* Remaining bits. */
 	volatile uint32_t *rq_db;
 	volatile uint32_t *cq_db;
+	uint16_t port_id;
 	uint16_t rq_ci;
 	uint16_t rq_pi;
 	uint16_t cq_ci;
@@ -131,122 +130,56 @@ struct rxq {
 	struct mlx5_rxq_stats stats;
 	uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
 	struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
+	void *cq_uar; /* CQ user access region. */
+	uint32_t cqn; /* CQ number. */
+	uint8_t cq_arm_sn; /* CQ arm seq number. */
 } __rte_cache_aligned;
 
-/* RX queue control descriptor. */
-struct rxq_ctrl {
-	struct priv *priv; /* Back pointer to private data. */
+/* Verbs Rx queue elements. */
+struct mlx5_rxq_ibv {
+	LIST_ENTRY(mlx5_rxq_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */
 	struct ibv_cq *cq; /* Completion Queue. */
-	struct ibv_exp_wq *wq; /* Work Queue. */
-	struct fdir_queue *fdir_queue; /* Flow director queue. */
-	struct ibv_mr *mr; /* Memory Region (for mp). */
+	struct ibv_wq *wq; /* Work Queue. */
 	struct ibv_comp_channel *channel;
-	unsigned int socket; /* CPU socket ID for allocations. */
-	struct rxq rxq; /* Data path structure. */
-};
-
-/* Hash RX queue types. */
-enum hash_rxq_type {
-	HASH_RXQ_TCPV4,
-	HASH_RXQ_UDPV4,
-	HASH_RXQ_IPV4,
-	HASH_RXQ_TCPV6,
-	HASH_RXQ_UDPV6,
-	HASH_RXQ_IPV6,
-	HASH_RXQ_ETH,
-};
-
-/* Flow structure with Ethernet specification. It is packed to prevent padding
- * between attr and spec as this layout is expected by libibverbs. */
-struct flow_attr_spec_eth {
-	struct ibv_exp_flow_attr attr;
-	struct ibv_exp_flow_spec_eth spec;
-} __attribute__((packed));
-
-/* Define a struct flow_attr_spec_eth object as an array of at least
- * "size" bytes. Room after the first index is normally used to store
- * extra flow specifications. */
-#define FLOW_ATTR_SPEC_ETH(name, size) \
-	struct flow_attr_spec_eth name \
-		[((size) / sizeof(struct flow_attr_spec_eth)) + \
-		 !!((size) % sizeof(struct flow_attr_spec_eth))]
-
-/* Initialization data for hash RX queue. */
-struct hash_rxq_init {
-	uint64_t hash_fields; /* Fields that participate in the hash. */
-	uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
-	unsigned int flow_priority; /* Flow priority to use. */
-	union {
-		struct {
-			enum ibv_exp_flow_spec_type type;
-			uint16_t size;
-		} hdr;
-		struct ibv_exp_flow_spec_tcp_udp tcp_udp;
-		struct ibv_exp_flow_spec_ipv4 ipv4;
-		struct ibv_exp_flow_spec_ipv6 ipv6;
-		struct ibv_exp_flow_spec_eth eth;
-	} flow_spec; /* Flow specification template. */
-	const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */
+	struct mlx5_mr *mr; /* Memory Region (for mp). */
 };
 
-/* Initialization data for indirection table. */
-struct ind_table_init {
-	unsigned int max_size; /* Maximum number of WQs. */
-	/* Hash RX queues using this table. */
-	unsigned int hash_types;
-	unsigned int hash_types_n;
+/* RX queue control descriptor. */
+struct mlx5_rxq_ctrl {
+	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct priv *priv; /* Back pointer to private data. */
+	struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
+	struct mlx5_rxq_data rxq; /* Data path structure. */
+	unsigned int socket; /* CPU socket ID for allocations. */
+	unsigned int irq:1; /* Whether IRQ is enabled. */
 };
 
-/* Initialization data for special flows. */
-struct special_flow_init {
-	uint8_t dst_mac_val[6];
-	uint8_t dst_mac_mask[6];
-	unsigned int hash_types;
-	unsigned int per_vlan:1;
+/* Indirection table. */
+struct mlx5_ind_table_ibv {
+	LIST_ENTRY(mlx5_ind_table_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
+	uint16_t queues_n; /**< Number of queues in the list. */
+	uint16_t queues[]; /**< Queue list. */
 };
 
-enum hash_rxq_flow_type {
-	HASH_RXQ_FLOW_TYPE_PROMISC,
-	HASH_RXQ_FLOW_TYPE_ALLMULTI,
-	HASH_RXQ_FLOW_TYPE_BROADCAST,
-	HASH_RXQ_FLOW_TYPE_IPV6MULTI,
-	HASH_RXQ_FLOW_TYPE_MAC,
-};
-
-#ifndef NDEBUG
-static inline const char *
-hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
-{
-	switch (flow_type) {
-	case HASH_RXQ_FLOW_TYPE_PROMISC:
-		return "promiscuous";
-	case HASH_RXQ_FLOW_TYPE_ALLMULTI:
-		return "allmulticast";
-	case HASH_RXQ_FLOW_TYPE_BROADCAST:
-		return "broadcast";
-	case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
-		return "IPv6 multicast";
-	case HASH_RXQ_FLOW_TYPE_MAC:
-		return "MAC";
-	}
-	return NULL;
-}
-#endif /* NDEBUG */
-
-struct hash_rxq {
-	struct priv *priv; /* Back pointer to private data. */
-	struct ibv_qp *qp; /* Hash RX QP. */
-	enum hash_rxq_type type; /* Hash RX queue type. */
-	/* MAC flow steering rules, one per VLAN ID. */
-	struct ibv_exp_flow *mac_flow
-		[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
-	struct ibv_exp_flow *special_flow
-		[MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS];
+/* Hash Rx queue. */
+struct mlx5_hrxq {
+	LIST_ENTRY(mlx5_hrxq) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct mlx5_ind_table_ibv *ind_table; /* Indirection table. */
+	struct ibv_qp *qp; /* Verbs queue pair. */
+	uint64_t hash_fields; /* Verbs Hash fields. */
+	uint8_t rss_key_len; /* Hash key length in bytes. */
+	uint8_t rss_key[]; /* Hash key. */
 };
 
 /* TX queue descriptor. */
 __extension__
-struct txq {
+struct mlx5_txq_data {
 	uint16_t elts_head; /* Current counter in (*elts)[]. */
 	uint16_t elts_tail; /* Counter of first element awaiting completion. */
 	uint16_t elts_comp; /* Counter since last completion request. */
@@ -265,6 +198,7 @@ struct txq {
 	uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
 	uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
 	uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
+	uint16_t mr_cache_idx; /* Index of last hit entry. */
 	uint32_t qp_num_8s; /* QP number shifted by 8. */
 	uint32_t flags; /* Flags for Tx Queue. */
 	volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
@@ -272,61 +206,92 @@ struct txq {
 	volatile uint32_t *qp_db; /* Work queue doorbell. */
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register. */
-	struct {
-		uintptr_t start; /* Start address of MR */
-		uintptr_t end; /* End address of MR */
-		struct ibv_mr *mr; /* Memory Region (for mp). */
-		uint32_t lkey; /* htonl(mr->lkey) */
-	} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
-	uint16_t mr_cache_idx; /* Index of last hit entry. */
+	struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 } __rte_cache_aligned;
 
-/* TX queue control descriptor. */
-struct txq_ctrl {
-	struct priv *priv; /* Back pointer to private data. */
+/* Verbs Rx queue elements. */
+struct mlx5_txq_ibv {
+	LIST_ENTRY(mlx5_txq_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
 	struct ibv_cq *cq; /* Completion Queue. */
 	struct ibv_qp *qp; /* Queue Pair. */
+};
+
+/* TX queue control descriptor. */
+struct mlx5_txq_ctrl {
+	LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct priv *priv; /* Back pointer to private data. */
 	unsigned int socket; /* CPU socket ID for allocations. */
-	struct txq txq; /* Data path structure. */
+	unsigned int max_inline_data; /* Max inline data. */
+	unsigned int max_tso_header; /* Max TSO header size. */
+	struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
+	struct mlx5_txq_data txq; /* Data path structure. */
+	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 };
 
 /* mlx5_rxq.c */
 
-extern const struct hash_rxq_init hash_rxq_init[];
-extern const unsigned int hash_rxq_init_n;
-
 extern uint8_t rss_hash_default_key[];
 extern const size_t rss_hash_default_key_len;
 
-size_t priv_flow_attr(struct priv *, struct ibv_exp_flow_attr *,
-		      size_t, enum hash_rxq_type);
-int priv_create_hash_rxqs(struct priv *);
-void priv_destroy_hash_rxqs(struct priv *);
-int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
-int priv_rehash_flows(struct priv *);
-void rxq_cleanup(struct rxq_ctrl *);
+void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_rxconf *, struct rte_mempool *);
 void mlx5_rx_queue_release(void *);
-uint16_t mlx5_rx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t);
 int priv_rx_intr_vec_enable(struct priv *priv);
 void priv_rx_intr_vec_disable(struct priv *priv);
-#ifdef HAVE_UPDATE_CQ_CI
 int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
-#endif /* HAVE_UPDATE_CQ_CI */
+struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_new(struct priv *, uint16_t);
+struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_get(struct priv *, uint16_t);
+int mlx5_priv_rxq_ibv_release(struct priv *, struct mlx5_rxq_ibv *);
+int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *);
+int mlx5_priv_rxq_ibv_verify(struct priv *);
+struct mlx5_rxq_ctrl *mlx5_priv_rxq_new(struct priv *, uint16_t,
+					uint16_t, unsigned int,
+					struct rte_mempool *);
+struct mlx5_rxq_ctrl *mlx5_priv_rxq_get(struct priv *, uint16_t);
+int mlx5_priv_rxq_release(struct priv *, uint16_t);
+int mlx5_priv_rxq_releasable(struct priv *, uint16_t);
+int mlx5_priv_rxq_verify(struct priv *);
+int rxq_alloc_elts(struct mlx5_rxq_ctrl *);
+struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_new(struct priv *,
+						       uint16_t [],
+						       uint16_t);
+struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_get(struct priv *,
+						       uint16_t [],
+						       uint16_t);
+int mlx5_priv_ind_table_ibv_release(struct priv *, struct mlx5_ind_table_ibv *);
+int mlx5_priv_ind_table_ibv_verify(struct priv *);
+struct mlx5_hrxq *mlx5_priv_hrxq_new(struct priv *, uint8_t *, uint8_t,
+				     uint64_t, uint16_t [], uint16_t);
+struct mlx5_hrxq *mlx5_priv_hrxq_get(struct priv *, uint8_t *, uint8_t,
+				     uint64_t, uint16_t [], uint16_t);
+int mlx5_priv_hrxq_release(struct priv *, struct mlx5_hrxq *);
+int mlx5_priv_hrxq_ibv_verify(struct priv *);
 
 /* mlx5_txq.c */
 
-void txq_cleanup(struct txq_ctrl *);
-int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
-		   unsigned int, const struct rte_eth_txconf *);
 int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
 			const struct rte_eth_txconf *);
 void mlx5_tx_queue_release(void *);
-uint16_t mlx5_tx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t);
+int priv_tx_uar_remap(struct priv *priv, int fd);
+struct mlx5_txq_ibv *mlx5_priv_txq_ibv_new(struct priv *, uint16_t);
+struct mlx5_txq_ibv *mlx5_priv_txq_ibv_get(struct priv *, uint16_t);
+int mlx5_priv_txq_ibv_release(struct priv *, struct mlx5_txq_ibv *);
+int mlx5_priv_txq_ibv_releasable(struct priv *, struct mlx5_txq_ibv *);
+int mlx5_priv_txq_ibv_verify(struct priv *);
+struct mlx5_txq_ctrl *mlx5_priv_txq_new(struct priv *, uint16_t,
+					uint16_t, unsigned int,
+					const struct rte_eth_txconf *);
+struct mlx5_txq_ctrl *mlx5_priv_txq_get(struct priv *, uint16_t);
+int mlx5_priv_txq_release(struct priv *, uint16_t);
+int mlx5_priv_txq_releasable(struct priv *, uint16_t);
+int mlx5_priv_txq_verify(struct priv *);
+void txq_alloc_elts(struct mlx5_txq_ctrl *);
 
 /* mlx5_rxtx.c */
 
@@ -346,18 +311,19 @@ int mlx5_tx_descriptor_status(void *, uint16_t);
 /* Vectorized version of mlx5_rxtx.c */
 int priv_check_raw_vec_tx_support(struct priv *);
 int priv_check_vec_tx_support(struct priv *);
-int rxq_check_vec_support(struct rxq *);
+int rxq_check_vec_support(struct mlx5_rxq_data *);
 int priv_check_vec_rx_support(struct priv *);
-void priv_prep_vec_rx_function(struct priv *);
 uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
 
 /* mlx5_mr.c */
 
-struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
-void txq_mp2mr_iter(struct rte_mempool *, void *);
-uint32_t txq_mp2mr_reg(struct txq *, struct rte_mempool *, unsigned int);
+void mlx5_mp2mr_iter(struct rte_mempool *, void *);
+struct mlx5_mr *priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *,
+				   struct rte_mempool *, unsigned int);
+struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
+				   unsigned int);
 
 #ifndef NDEBUG
 /**
@@ -419,16 +385,24 @@ check_cqe(volatile struct mlx5_cqe *cqe,
 		if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
 		    (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
 			return 0;
-		if (!check_cqe_seen(cqe))
+		if (!check_cqe_seen(cqe)) {
 			ERROR("unexpected CQE error %u (0x%02x)"
 			      " syndrome 0x%02x",
 			      op_code, op_code, syndrome);
+			rte_hexdump(stderr, "MLX5 Error CQE:",
+				    (const void *)((uintptr_t)err_cqe),
+				    sizeof(*err_cqe));
+		}
 		return 1;
 	} else if ((op_code != MLX5_CQE_RESP_SEND) &&
 		   (op_code != MLX5_CQE_REQ)) {
-		if (!check_cqe_seen(cqe))
+		if (!check_cqe_seen(cqe)) {
 			ERROR("unexpected CQE opcode %u (0x%02x)",
 			      op_code, op_code);
+			rte_hexdump(stderr, "MLX5 CQE:",
+				    (const void *)((uintptr_t)cqe),
+				    sizeof(*cqe));
+		}
 		return 1;
 	}
 #endif /* NDEBUG */
@@ -447,7 +421,7 @@ check_cqe(volatile struct mlx5_cqe *cqe,
  *   WQE address.
  */
 static inline uintptr_t *
-tx_mlx5_wqe(struct txq *txq, uint16_t ci)
+tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci)
 {
 	ci &= ((1 << txq->wqe_n) - 1);
 	return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
@@ -462,7 +436,7 @@ tx_mlx5_wqe(struct txq *txq, uint16_t ci)
  *   Pointer to TX queue structure.
  */
 static __rte_always_inline void
-mlx5_tx_complete(struct txq *txq)
+mlx5_tx_complete(struct mlx5_txq_data *txq)
 {
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -483,13 +457,18 @@ mlx5_tx_complete(struct txq *txq)
 #ifndef NDEBUG
 	if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
 	    (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
-		if (!check_cqe_seen(cqe))
+		if (!check_cqe_seen(cqe)) {
 			ERROR("unexpected error CQE, TX stopped");
+			rte_hexdump(stderr, "MLX5 TXQ:",
+				    (const void *)((uintptr_t)txq->wqes),
+				    ((1 << txq->wqe_n) *
+				     MLX5_WQE_SIZE));
+		}
 		return;
 	}
 #endif /* NDEBUG */
 	++cq_ci;
-	txq->wqe_pi = ntohs(cqe->wqe_counter);
+	txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter);
 	ctrl = (volatile struct mlx5_wqe_ctrl *)
 		tx_mlx5_wqe(txq, txq->wqe_pi);
 	elts_tail = ctrl->ctrl3;
@@ -526,8 +505,8 @@ mlx5_tx_complete(struct txq *txq)
 	txq->cq_ci = cq_ci;
 	txq->elts_tail = elts_tail;
 	/* Update the consumer index. */
-	rte_wmb();
-	*txq->cq_db = htonl(cq_ci);
+	rte_compiler_barrier();
+	*txq->cq_db = rte_cpu_to_be_32(cq_ci);
 }
 
 /**
@@ -562,51 +541,80 @@ mlx5_tx_mb2mp(struct rte_mbuf *buf)
  *   mr->lkey on success, (uint32_t)-1 on failure.
  */
 static __rte_always_inline uint32_t
-mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
+mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
 {
 	uint16_t i = txq->mr_cache_idx;
 	uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
+	struct mlx5_mr *mr;
 
 	assert(i < RTE_DIM(txq->mp2mr));
-	if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
-		return txq->mp2mr[i].lkey;
+	if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
+		return txq->mp2mr[i]->lkey;
 	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
-		if (unlikely(txq->mp2mr[i].mr == NULL)) {
+		if (unlikely(txq->mp2mr[i]->mr == NULL)) {
 			/* Unknown MP, add a new MR for it. */
 			break;
 		}
-		if (txq->mp2mr[i].start <= addr &&
-		    txq->mp2mr[i].end >= addr) {
-			assert(txq->mp2mr[i].lkey != (uint32_t)-1);
-			assert(htonl(txq->mp2mr[i].mr->lkey) ==
-			       txq->mp2mr[i].lkey);
+		if (txq->mp2mr[i]->start <= addr &&
+		    txq->mp2mr[i]->end >= addr) {
+			assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
+			assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) ==
+			       txq->mp2mr[i]->lkey);
 			txq->mr_cache_idx = i;
-			return txq->mp2mr[i].lkey;
+			return txq->mp2mr[i]->lkey;
 		}
 	}
 	txq->mr_cache_idx = 0;
-	return txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+	mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+	/*
+	 * Request the reference to use in this queue, the original one is
+	 * kept by the control plane.
+	 */
+	if (mr) {
+		rte_atomic32_inc(&mr->refcnt);
+		return mr->lkey;
+	}
+	return (uint32_t)-1;
 }
 
 /**
- * Ring TX queue doorbell.
+ * Ring TX queue doorbell and flush the update if requested.
  *
  * @param txq
  *   Pointer to TX queue structure.
  * @param wqe
  *   Pointer to the last WQE posted in the NIC.
+ * @param cond
+ *   Request for write memory barrier after BlueFlame update.
  */
 static __rte_always_inline void
-mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
+mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
+		       int cond)
 {
 	uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
 	volatile uint64_t *src = ((volatile uint64_t *)wqe);
 
-	rte_wmb();
-	*txq->qp_db = htonl(txq->wqe_ci);
+	rte_io_wmb();
+	*txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
 	/* Ensure ordering between DB record and BF copy. */
 	rte_wmb();
 	*dst = *src;
+	if (cond)
+		rte_wmb();
+}
+
+/**
+ * Ring TX queue doorbell and flush the update by write memory barrier.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param wqe
+ *   Pointer to the last WQE posted in the NIC.
+ */
+static __rte_always_inline void
+mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
+{
+	mlx5_tx_dbrec_cond_wmb(txq, wqe, 1);
 }
 
 #endif /* RTE_PMD_MLX5_RXTX_H_ */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
new file mode 100644
index 00000000..ba6c8cef
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -0,0 +1,388 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#include <infiniband/mlx5dv.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_prefetch.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_rxtx_vec.h"
+#include "mlx5_autoconf.h"
+#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+
+#if defined RTE_ARCH_X86_64
+#include "mlx5_rxtx_vec_sse.h"
+#elif defined RTE_ARCH_ARM64
+#include "mlx5_rxtx_vec_neon.h"
+#else
+#error "This should not be compiled if SIMD instructions are not supported."
+#endif
+
+/**
+ * Count the number of continuous single segment packets.
+ *
+ * @param pkts
+ *   Pointer to array of packets.
+ * @param pkts_n
+ *   Number of packets.
+ *
+ * @return
+ *   Number of continuous single segment packets.
+ */
+static inline unsigned int
+txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	unsigned int pos;
+
+	if (!pkts_n)
+		return 0;
+	/* Count the number of continuous single segment packets. */
+	for (pos = 0; pos < pkts_n; ++pos)
+		if (NB_SEGS(pkts[pos]) > 1)
+			break;
+	return pos;
+}
+
+/**
+ * Count the number of packets having same ol_flags and calculate cs_flags.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param pkts
+ *   Pointer to array of packets.
+ * @param pkts_n
+ *   Number of packets.
+ * @param cs_flags
+ *   Pointer of flags to be returned.
+ *
+ * @return
+ *   Number of packets having same ol_flags.
+ */
+static inline unsigned int
+txq_calc_offload(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+		 uint16_t pkts_n, uint8_t *cs_flags)
+{
+	unsigned int pos;
+	const uint64_t ol_mask =
+		PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
+		PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
+		PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
+
+	if (!pkts_n)
+		return 0;
+	/* Count the number of packets having same ol_flags. */
+	for (pos = 1; pos < pkts_n; ++pos)
+		if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
+			break;
+	/* Should open another MPW session for the rest. */
+	if (pkts[0]->ol_flags &
+	    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
+		const uint64_t is_tunneled =
+			pkts[0]->ol_flags &
+			(PKT_TX_TUNNEL_GRE |
+			 PKT_TX_TUNNEL_VXLAN);
+
+		if (is_tunneled && txq->tunnel_en) {
+			*cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
+				    MLX5_ETH_WQE_L4_INNER_CSUM;
+			if (pkts[0]->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+				*cs_flags |= MLX5_ETH_WQE_L3_CSUM;
+		} else {
+			*cs_flags = MLX5_ETH_WQE_L3_CSUM |
+				    MLX5_ETH_WQE_L4_CSUM;
+		}
+	}
+	return pos;
+}
+
+/**
+ * DPDK callback for vectorized TX.
+ *
+ * @param dpdk_txq
+ *   Generic pointer to TX queue structure.
+ * @param[in] pkts
+ *   Packets to transmit.
+ * @param pkts_n
+ *   Number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully transmitted (<= pkts_n).
+ */
+uint16_t
+mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
+		      uint16_t pkts_n)
+{
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+	uint16_t nb_tx = 0;
+
+	while (pkts_n > nb_tx) {
+		uint16_t n;
+		uint16_t ret;
+
+		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
+		ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
+		nb_tx += ret;
+		if (!ret)
+			break;
+	}
+	return nb_tx;
+}
+
+/**
+ * DPDK callback for vectorized TX with multi-seg packets and offload.
+ *
+ * @param dpdk_txq
+ *   Generic pointer to TX queue structure.
+ * @param[in] pkts
+ *   Packets to transmit.
+ * @param pkts_n
+ *   Number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully transmitted (<= pkts_n).
+ */
+uint16_t
+mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+	uint16_t nb_tx = 0;
+
+	while (pkts_n > nb_tx) {
+		uint8_t cs_flags = 0;
+		uint16_t n;
+		uint16_t ret;
+
+		/* Transmit multi-seg packets in the head of pkts list. */
+		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) &&
+		    NB_SEGS(pkts[nb_tx]) > 1)
+			nb_tx += txq_scatter_v(txq,
+					       &pkts[nb_tx],
+					       pkts_n - nb_tx);
+		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
+		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS))
+			n = txq_check_multiseg(&pkts[nb_tx], n);
+		if (!(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
+			n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags);
+		ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
+		nb_tx += ret;
+		if (!ret)
+			break;
+	}
+	return nb_tx;
+}
+
+/**
+ * Skip error packets.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully received (<= pkts_n).
+ */
+static uint16_t
+rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
+			 uint16_t pkts_n)
+{
+	uint16_t n = 0;
+	unsigned int i;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	uint32_t err_bytes = 0;
+#endif
+
+	for (i = 0; i < pkts_n; ++i) {
+		struct rte_mbuf *pkt = pkts[i];
+
+		if (pkt->packet_type == RTE_PTYPE_ALL_MASK) {
+#ifdef MLX5_PMD_SOFT_COUNTERS
+			err_bytes += PKT_LEN(pkt);
+#endif
+			rte_pktmbuf_free_seg(pkt);
+		} else {
+			pkts[n++] = pkt;
+		}
+	}
+	rxq->stats.idropped += (pkts_n - n);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	/* Correct counters of errored completions. */
+	rxq->stats.ipackets -= (pkts_n - n);
+	rxq->stats.ibytes -= err_bytes;
+#endif
+	rxq->pending_err = 0;
+	return n;
+}
+
+/**
+ * DPDK callback for vectorized RX.
+ *
+ * @param dpdk_rxq
+ *   Generic pointer to RX queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully received (<= pkts_n).
+ */
+uint16_t
+mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	struct mlx5_rxq_data *rxq = dpdk_rxq;
+	uint16_t nb_rx;
+
+	nb_rx = rxq_burst_v(rxq, pkts, pkts_n);
+	if (unlikely(rxq->pending_err))
+		nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
+	return nb_rx;
+}
+
+/**
+ * Check Tx queue flags are set for raw vectorized Tx.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   1 if supported, negative errno value if not.
+ */
+int __attribute__((cold))
+priv_check_raw_vec_tx_support(struct priv *priv)
+{
+	uint16_t i;
+
+	/* All the configured queues should support. */
+	for (i = 0; i < priv->txqs_n; ++i) {
+		struct mlx5_txq_data *txq = (*priv->txqs)[i];
+
+		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) ||
+		    !(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
+			break;
+	}
+	if (i != priv->txqs_n)
+		return -ENOTSUP;
+	return 1;
+}
+
+/**
+ * Check a device can support vectorized TX.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   1 if supported, negative errno value if not.
+ */
+int __attribute__((cold))
+priv_check_vec_tx_support(struct priv *priv)
+{
+	if (!priv->tx_vec_en ||
+	    priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
+	    priv->mps != MLX5_MPW_ENHANCED ||
+	    priv->tso)
+		return -ENOTSUP;
+	return 1;
+}
+
+/**
+ * Check a RX queue can support vectorized RX.
+ *
+ * @param rxq
+ *   Pointer to RX queue.
+ *
+ * @return
+ *   1 if supported, negative errno value if not.
+ */
+int __attribute__((cold))
+rxq_check_vec_support(struct mlx5_rxq_data *rxq)
+{
+	struct mlx5_rxq_ctrl *ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+
+	if (!ctrl->priv->rx_vec_en || rxq->sges_n != 0)
+		return -ENOTSUP;
+	return 1;
+}
+
+/**
+ * Check a device can support vectorized RX.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   1 if supported, negative errno value if not.
+ */
+int __attribute__((cold))
+priv_check_vec_rx_support(struct priv *priv)
+{
+	uint16_t i;
+
+	if (!priv->rx_vec_en)
+		return -ENOTSUP;
+	/* All the configured queues should support. */
+	for (i = 0; i < priv->rxqs_n; ++i) {
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
+
+		if (!rxq)
+			continue;
+		if (rxq_check_vec_support(rxq) < 0)
+			break;
+	}
+	if (i != priv->rxqs_n)
+		return -ENOTSUP;
+	return 1;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h
new file mode 100644
index 00000000..1f08ed0b
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.h
@@ -0,0 +1,130 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX5_RXTX_VEC_H_
+#define RTE_PMD_MLX5_RXTX_VEC_H_
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+
+#include "mlx5_autoconf.h"
+#include "mlx5_prm.h"
+
+/*
+ * Compile time sanity check for vectorized functions.
+ */
+
+#define S_ASSERT_RTE_MBUF(s) \
+	static_assert(s, "A field of struct rte_mbuf is changed")
+#define S_ASSERT_MLX5_CQE(s) \
+	static_assert(s, "A field of struct mlx5_cqe is changed")
+
+/* rxq_cq_decompress_v() */
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, pkt_len) ==
+		  offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, data_len) ==
+		  offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, hash) ==
+		  offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+/* rxq_cq_to_ptype_oflags_v() */
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, ol_flags) ==
+		  offsetof(struct rte_mbuf, rearm_data) + 8);
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, rearm_data) ==
+		  RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+
+/* rxq_burst_v() */
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, pkt_len) ==
+		  offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, data_len) ==
+		  offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+#if (RTE_CACHE_LINE_SIZE == 128)
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, pkt_info) == 64);
+#else
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, pkt_info) == 0);
+#endif
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rx_hash_res) ==
+		  offsetof(struct mlx5_cqe, pkt_info) + 12);
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rsvd1) +
+		  sizeof(((struct mlx5_cqe *)0)->rsvd1) ==
+		  offsetof(struct mlx5_cqe, hdr_type_etc));
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, vlan_info) ==
+		  offsetof(struct mlx5_cqe, hdr_type_etc) + 2);
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rsvd2) +
+		  sizeof(((struct mlx5_cqe *)0)->rsvd2) ==
+		  offsetof(struct mlx5_cqe, byte_cnt));
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, sop_drop_qpn) ==
+		  RTE_ALIGN(offsetof(struct mlx5_cqe, sop_drop_qpn), 8));
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, op_own) ==
+		  offsetof(struct mlx5_cqe, sop_drop_qpn) + 7);
+
+/**
+ * Replenish buffers for RX in bulk.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param n
+ *   Number of buffers to be replenished.
+ */
+static inline void
+mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
+{
+	const uint16_t q_n = 1 << rxq->elts_n;
+	const uint16_t q_mask = q_n - 1;
+	uint16_t elts_idx = rxq->rq_ci & q_mask;
+	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+	volatile struct mlx5_wqe_data_seg *wq = &(*rxq->wqes)[elts_idx];
+	unsigned int i;
+
+	assert(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH);
+	assert(n <= (uint16_t)(q_n - (rxq->rq_ci - rxq->rq_pi)));
+	assert(MLX5_VPMD_RXQ_RPLNSH_THRESH > MLX5_VPMD_DESCS_PER_LOOP);
+	/* Not to cross queue end. */
+	n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
+	if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+		rxq->stats.rx_nombuf += n;
+		return;
+	}
+	for (i = 0; i < n; ++i)
+		wq[i].addr = rte_cpu_to_be_64((uintptr_t)elts[i]->buf_addr +
+					      RTE_PKTMBUF_HEADROOM);
+	rxq->rq_ci += n;
+	/* Prevent overflowing into consumed mbufs. */
+	elts_idx = rxq->rq_ci & q_mask;
+	for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+		(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
+	rte_io_wmb();
+	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+}
+
+#endif /* RTE_PMD_MLX5_RXTX_VEC_H_ */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
new file mode 100644
index 00000000..c721d80e
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -0,0 +1,1039 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2017 6WIND S.A.
+ *   Copyright 2017 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX5_RXTX_VEC_NEON_H_
+#define RTE_PMD_MLX5_RXTX_VEC_NEON_H_
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <arm_neon.h>
+
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_prefetch.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_rxtx_vec.h"
+#include "mlx5_autoconf.h"
+#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+/**
+ * Fill in buffer descriptors in a multi-packet send descriptor.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param dseg
+ *   Pointer to buffer descriptor to be written.
+ * @param pkts
+ *   Pointer to array of packets to be sent.
+ * @param n
+ *   Number of packets to be filled.
+ */
+static inline void
+txq_wr_dseg_v(struct mlx5_txq_data *txq, uint8_t *dseg,
+	      struct rte_mbuf **pkts, unsigned int n)
+{
+	unsigned int pos;
+	uintptr_t addr;
+	const uint8x16_t dseg_shuf_m = {
+		 3,  2,  1,  0, /* length, bswap32 */
+		 4,  5,  6,  7, /* lkey */
+		15, 14, 13, 12, /* addr, bswap64 */
+		11, 10,  9,  8
+	};
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	uint32_t tx_byte = 0;
+#endif
+
+	for (pos = 0; pos < n; ++pos, dseg += MLX5_WQE_DWORD_SIZE) {
+		uint8x16_t desc;
+		struct rte_mbuf *pkt = pkts[pos];
+
+		addr = rte_pktmbuf_mtod(pkt, uintptr_t);
+		desc = vreinterpretq_u8_u32((uint32x4_t) {
+				DATA_LEN(pkt),
+				mlx5_tx_mb2mr(txq, pkt),
+				addr,
+				addr >> 32 });
+		desc = vqtbl1q_u8(desc, dseg_shuf_m);
+		vst1q_u8(dseg, desc);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+		tx_byte += DATA_LEN(pkt);
+#endif
+	}
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	txq->stats.obytes += tx_byte;
+#endif
+}
+
+/**
+ * Send multi-segmented packets until it encounters a single segment packet in
+ * the pkts list.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param pkts
+ *   Pointer to array of packets to be sent.
+ * @param pkts_n
+ *   Number of packets to be sent.
+ *
+ * @return
+ *   Number of packets successfully transmitted (<= pkts_n).
+ */
+static uint16_t
+txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+	      uint16_t pkts_n)
+{
+	uint16_t elts_head = txq->elts_head;
+	const uint16_t elts_n = 1 << txq->elts_n;
+	const uint16_t elts_m = elts_n - 1;
+	const uint16_t wq_n = 1 << txq->wqe_n;
+	const uint16_t wq_mask = wq_n - 1;
+	const unsigned int nb_dword_per_wqebb =
+		MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;
+	const unsigned int nb_dword_in_hdr =
+		sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
+	unsigned int n;
+	volatile struct mlx5_wqe *wqe = NULL;
+
+	assert(elts_n > pkts_n);
+	mlx5_tx_complete(txq);
+	if (unlikely(!pkts_n))
+		return 0;
+	for (n = 0; n < pkts_n; ++n) {
+		struct rte_mbuf *buf = pkts[n];
+		unsigned int segs_n = buf->nb_segs;
+		unsigned int ds = nb_dword_in_hdr;
+		unsigned int len = PKT_LEN(buf);
+		uint16_t wqe_ci = txq->wqe_ci;
+		const uint8x16_t ctrl_shuf_m = {
+			3,  2,  1,  0, /* bswap32 */
+			7,  6,  5,  4, /* bswap32 */
+			11, 10,  9,  8, /* bswap32 */
+			12, 13, 14, 15
+		};
+		uint8_t cs_flags = 0;
+		uint16_t max_elts;
+		uint16_t max_wqe;
+		uint8x16_t *t_wqe;
+		uint8_t *dseg;
+		uint8x16_t ctrl;
+
+		assert(segs_n);
+		max_elts = elts_n - (elts_head - txq->elts_tail);
+		max_wqe = wq_n - (txq->wqe_ci - txq->wqe_pi);
+		/*
+		 * A MPW session consumes 2 WQEs at most to
+		 * include MLX5_MPW_DSEG_MAX pointers.
+		 */
+		if (segs_n == 1 ||
+		    max_elts < segs_n || max_wqe < 2)
+			break;
+		wqe = &((volatile struct mlx5_wqe64 *)
+			 txq->wqes)[wqe_ci & wq_mask].hdr;
+		if (buf->ol_flags &
+		     (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
+			const uint64_t is_tunneled =
+				buf->ol_flags & (PKT_TX_TUNNEL_GRE |
+						 PKT_TX_TUNNEL_VXLAN);
+
+			if (is_tunneled && txq->tunnel_en) {
+				cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
+					   MLX5_ETH_WQE_L4_INNER_CSUM;
+				if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+					cs_flags |= MLX5_ETH_WQE_L3_CSUM;
+			} else {
+				cs_flags = MLX5_ETH_WQE_L3_CSUM |
+					   MLX5_ETH_WQE_L4_CSUM;
+			}
+		}
+		/* Title WQEBB pointer. */
+		t_wqe = (uint8x16_t *)wqe;
+		dseg = (uint8_t *)(wqe + 1);
+		do {
+			if (!(ds++ % nb_dword_per_wqebb)) {
+				dseg = (uint8_t *)
+					&((volatile struct mlx5_wqe64 *)
+					   txq->wqes)[++wqe_ci & wq_mask];
+			}
+			txq_wr_dseg_v(txq, dseg, &buf, 1);
+			dseg += MLX5_WQE_DWORD_SIZE;
+			(*txq->elts)[elts_head++ & elts_m] = buf;
+			buf = buf->next;
+		} while (--segs_n);
+		++wqe_ci;
+		/* Fill CTRL in the header. */
+		ctrl = vreinterpretq_u8_u32((uint32x4_t) {
+				MLX5_OPC_MOD_MPW << 24 |
+				txq->wqe_ci << 8 | MLX5_OPCODE_TSO,
+				txq->qp_num_8s | ds, 0, 0});
+		ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
+		vst1q_u8((void *)t_wqe, ctrl);
+		/* Fill ESEG in the header. */
+		vst1q_u16((void *)(t_wqe + 1),
+			  (uint16x8_t) { 0, 0, cs_flags, rte_cpu_to_be_16(len),
+					 0, 0, 0, 0 });
+		txq->wqe_ci = wqe_ci;
+	}
+	if (!n)
+		return 0;
+	txq->elts_comp += (uint16_t)(elts_head - txq->elts_head);
+	txq->elts_head = elts_head;
+	if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
+		wqe->ctrl[2] = rte_cpu_to_be_32(8);
+		wqe->ctrl[3] = txq->elts_head;
+		txq->elts_comp = 0;
+		++txq->cq_pi;
+	}
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	txq->stats.opackets += n;
+#endif
+	mlx5_tx_dbrec(txq, wqe);
+	return n;
+}
+
+/**
+ * Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,
+ * it returns to make it processed by txq_scatter_v(). All the packets in
+ * the pkts list should be single segment packets having same offload flags.
+ * This must be checked by txq_check_multiseg() and txq_calc_offload().
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param pkts
+ *   Pointer to array of packets to be sent.
+ * @param pkts_n
+ *   Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
+ * @param cs_flags
+ *   Checksum offload flags to be written in the descriptor.
+ *
+ * @return
+ *   Number of packets successfully transmitted (<= pkts_n).
+ */
+static inline uint16_t
+txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
+	    uint8_t cs_flags)
+{
+	struct rte_mbuf **elts;
+	uint16_t elts_head = txq->elts_head;
+	const uint16_t elts_n = 1 << txq->elts_n;
+	const uint16_t elts_m = elts_n - 1;
+	const unsigned int nb_dword_per_wqebb =
+		MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;
+	const unsigned int nb_dword_in_hdr =
+		sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
+	unsigned int n = 0;
+	unsigned int pos;
+	uint16_t max_elts;
+	uint16_t max_wqe;
+	uint32_t comp_req = 0;
+	const uint16_t wq_n = 1 << txq->wqe_n;
+	const uint16_t wq_mask = wq_n - 1;
+	uint16_t wq_idx = txq->wqe_ci & wq_mask;
+	volatile struct mlx5_wqe64 *wq =
+		&((volatile struct mlx5_wqe64 *)txq->wqes)[wq_idx];
+	volatile struct mlx5_wqe *wqe = (volatile struct mlx5_wqe *)wq;
+	const uint8x16_t ctrl_shuf_m = {
+		 3,  2,  1,  0, /* bswap32 */
+		 7,  6,  5,  4, /* bswap32 */
+		11, 10,  9,  8, /* bswap32 */
+		12, 13, 14, 15
+	};
+	uint8x16_t *t_wqe;
+	uint8_t *dseg;
+	uint8x16_t ctrl;
+
+	/* Make sure all packets can fit into a single WQE. */
+	assert(elts_n > pkts_n);
+	mlx5_tx_complete(txq);
+	max_elts = (elts_n - (elts_head - txq->elts_tail));
+	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
+	pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+	if (unlikely(!pkts_n))
+		return 0;
+	elts = &(*txq->elts)[elts_head & elts_m];
+	/* Loop for available tailroom first. */
+	n = RTE_MIN(elts_n - (elts_head & elts_m), pkts_n);
+	for (pos = 0; pos < (n & -2); pos += 2)
+		vst1q_u64((void *)&elts[pos], vld1q_u64((void *)&pkts[pos]));
+	if (n & 1)
+		elts[pos] = pkts[pos];
+	/* Check if it crosses the end of the queue. */
+	if (unlikely(n < pkts_n)) {
+		elts = &(*txq->elts)[0];
+		for (pos = 0; pos < pkts_n - n; ++pos)
+			elts[pos] = pkts[n + pos];
+	}
+	txq->elts_head += pkts_n;
+	/* Save title WQEBB pointer. */
+	t_wqe = (uint8x16_t *)wqe;
+	dseg = (uint8_t *)(wqe + 1);
+	/* Calculate the number of entries to the end. */
+	n = RTE_MIN(
+		(wq_n - wq_idx) * nb_dword_per_wqebb - nb_dword_in_hdr,
+		pkts_n);
+	/* Fill DSEGs. */
+	txq_wr_dseg_v(txq, dseg, pkts, n);
+	/* Check if it crosses the end of the queue. */
+	if (n < pkts_n) {
+		dseg = (uint8_t *)txq->wqes;
+		txq_wr_dseg_v(txq, dseg, &pkts[n], pkts_n - n);
+	}
+	if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {
+		txq->elts_comp += pkts_n;
+	} else {
+		/* Request a completion. */
+		txq->elts_comp = 0;
+		++txq->cq_pi;
+		comp_req = 8;
+	}
+	/* Fill CTRL in the header. */
+	ctrl = vreinterpretq_u8_u32((uint32x4_t) {
+			MLX5_OPC_MOD_ENHANCED_MPSW << 24 |
+			txq->wqe_ci << 8 | MLX5_OPCODE_ENHANCED_MPSW,
+			txq->qp_num_8s | (pkts_n + 2),
+			comp_req,
+			txq->elts_head });
+	ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
+	vst1q_u8((void *)t_wqe, ctrl);
+	/* Fill ESEG in the header. */
+	vst1q_u8((void *)(t_wqe + 1),
+		 (uint8x16_t) { 0, 0, 0, 0,
+				cs_flags, 0, 0, 0,
+				0, 0, 0, 0,
+				0, 0, 0, 0 });
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	txq->stats.opackets += pkts_n;
+#endif
+	txq->wqe_ci += (nb_dword_in_hdr + pkts_n + (nb_dword_per_wqebb - 1)) /
+		       nb_dword_per_wqebb;
+	/* Ring QP doorbell. */
+	mlx5_tx_dbrec_cond_wmb(txq, wqe, pkts_n < MLX5_VPMD_TX_MAX_BURST);
+	return pkts_n;
+}
+
+/**
+ * Store free buffers to RX SW ring.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param pkts
+ *   Pointer to array of packets to be stored.
+ * @param pkts_n
+ *   Number of packets to be stored.
+ */
+static inline void
+rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)
+{
+	const uint16_t q_mask = (1 << rxq->elts_n) - 1;
+	struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];
+	unsigned int pos;
+	uint16_t p = n & -2;
+
+	for (pos = 0; pos < p; pos += 2) {
+		uint64x2_t mbp;
+
+		mbp = vld1q_u64((void *)&elts[pos]);
+		vst1q_u64((void *)&pkts[pos], mbp);
+	}
+	if (n & 1)
+		pkts[pos] = elts[pos];
+}
+
+/**
+ * Decompress a compressed completion and fill in mbufs in RX SW ring with data
+ * extracted from the title completion descriptor.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param cq
+ *   Pointer to completion array having a compressed completion at first.
+ * @param elts
+ *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from
+ *   the title completion descriptor to be copied to the rest of mbufs.
+ */
+static inline void
+rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
+		    struct rte_mbuf **elts)
+{
+	volatile struct mlx5_mini_cqe8 *mcq = (void *)&(cq + 1)->pkt_info;
+	struct rte_mbuf *t_pkt = elts[0]; /* Title packet is pre-built. */
+	unsigned int pos;
+	unsigned int i;
+	unsigned int inv = 0;
+	/* Mask to shuffle from extracted mini CQE to mbuf. */
+	const uint8x16_t mcqe_shuf_m1 = {
+		-1, -1, -1, -1, /* skip packet_type */
+		 7,  6, -1, -1, /* pkt_len, bswap16 */
+		 7,  6,         /* data_len, bswap16 */
+		-1, -1,         /* skip vlan_tci */
+		 3,  2,  1,  0  /* hash.rss, bswap32 */
+	};
+	const uint8x16_t mcqe_shuf_m2 = {
+		-1, -1, -1, -1, /* skip packet_type */
+		15, 14, -1, -1, /* pkt_len, bswap16 */
+		15, 14,         /* data_len, bswap16 */
+		-1, -1,         /* skip vlan_tci */
+		11, 10,  9,  8  /* hash.rss, bswap32 */
+	};
+	/* Restore the compressed count. Must be 16 bits. */
+	const uint16_t mcqe_n = t_pkt->data_len +
+				(rxq->crc_present * ETHER_CRC_LEN);
+	const uint64x2_t rearm =
+		vld1q_u64((void *)&t_pkt->rearm_data);
+	const uint32x4_t rxdf_mask = {
+		0xffffffff, /* packet_type */
+		0,          /* skip pkt_len */
+		0xffff0000, /* vlan_tci, skip data_len */
+		0,          /* skip hash.rss */
+	};
+	const uint8x16_t rxdf =
+		vandq_u8(vld1q_u8((void *)&t_pkt->rx_descriptor_fields1),
+			 vreinterpretq_u8_u32(rxdf_mask));
+	const uint16x8_t crc_adj = {
+		0, 0,
+		rxq->crc_present * ETHER_CRC_LEN, 0,
+		rxq->crc_present * ETHER_CRC_LEN, 0,
+		0, 0
+	};
+	const uint32_t flow_tag = t_pkt->hash.fdir.hi;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	uint32_t rcvd_byte = 0;
+#endif
+	/* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
+	const uint8x8_t len_shuf_m = {
+		 7,  6,         /* 1st mCQE */
+		15, 14,         /* 2nd mCQE */
+		23, 22,         /* 3rd mCQE */
+		31, 30          /* 4th mCQE */
+	};
+
+	/*
+	 * A. load mCQEs into a 128bit register.
+	 * B. store rearm data to mbuf.
+	 * C. combine data from mCQEs with rx_descriptor_fields1.
+	 * D. store rx_descriptor_fields1.
+	 * E. store flow tag (rte_flow mark).
+	 */
+	for (pos = 0; pos < mcqe_n; ) {
+		uint8_t *p = (void *)&mcq[pos % 8];
+		uint8_t *e0 = (void *)&elts[pos]->rearm_data;
+		uint8_t *e1 = (void *)&elts[pos + 1]->rearm_data;
+		uint8_t *e2 = (void *)&elts[pos + 2]->rearm_data;
+		uint8_t *e3 = (void *)&elts[pos + 3]->rearm_data;
+		uint16x4_t byte_cnt;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+		uint16x4_t invalid_mask =
+			vcreate_u16(mcqe_n - pos < MLX5_VPMD_DESCS_PER_LOOP ?
+				    -1UL << ((mcqe_n - pos) *
+					     sizeof(uint16_t) * 8) : 0);
+#endif
+
+		if (!(pos & 0x7) && pos + 8 < mcqe_n)
+			rte_prefetch0((void *)(cq + pos + 8));
+		__asm__ volatile (
+		/* A.1 load mCQEs into a 128bit register. */
+		"ld1 {v16.16b - v17.16b}, [%[mcq]] \n\t"
+		/* B.1 store rearm data to mbuf. */
+		"st1 {%[rearm].2d}, [%[e0]] \n\t"
+		"add %[e0], %[e0], #16 \n\t"
+		"st1 {%[rearm].2d}, [%[e1]] \n\t"
+		"add %[e1], %[e1], #16 \n\t"
+		/* C.1 combine data from mCQEs with rx_descriptor_fields1. */
+		"tbl v18.16b, {v16.16b}, %[mcqe_shuf_m1].16b \n\t"
+		"tbl v19.16b, {v16.16b}, %[mcqe_shuf_m2].16b \n\t"
+		"sub v18.8h, v18.8h, %[crc_adj].8h \n\t"
+		"sub v19.8h, v19.8h, %[crc_adj].8h \n\t"
+		"orr v18.16b, v18.16b, %[rxdf].16b \n\t"
+		"orr v19.16b, v19.16b, %[rxdf].16b \n\t"
+		/* D.1 store rx_descriptor_fields1. */
+		"st1 {v18.2d}, [%[e0]] \n\t"
+		"st1 {v19.2d}, [%[e1]] \n\t"
+		/* B.1 store rearm data to mbuf. */
+		"st1 {%[rearm].2d}, [%[e2]] \n\t"
+		"add %[e2], %[e2], #16 \n\t"
+		"st1 {%[rearm].2d}, [%[e3]] \n\t"
+		"add %[e3], %[e3], #16 \n\t"
+		/* C.1 combine data from mCQEs with rx_descriptor_fields1. */
+		"tbl v18.16b, {v17.16b}, %[mcqe_shuf_m1].16b \n\t"
+		"tbl v19.16b, {v17.16b}, %[mcqe_shuf_m2].16b \n\t"
+		"sub v18.8h, v18.8h, %[crc_adj].8h \n\t"
+		"sub v19.8h, v19.8h, %[crc_adj].8h \n\t"
+		"orr v18.16b, v18.16b, %[rxdf].16b \n\t"
+		"orr v19.16b, v19.16b, %[rxdf].16b \n\t"
+		/* D.1 store rx_descriptor_fields1. */
+		"st1 {v18.2d}, [%[e2]] \n\t"
+		"st1 {v19.2d}, [%[e3]] \n\t"
+#ifdef MLX5_PMD_SOFT_COUNTERS
+		"tbl %[byte_cnt].8b, {v16.16b - v17.16b}, %[len_shuf_m].8b \n\t"
+#endif
+		:[byte_cnt]"=&w"(byte_cnt)
+		:[mcq]"r"(p),
+		 [rxdf]"w"(rxdf),
+		 [rearm]"w"(rearm),
+		 [e3]"r"(e3), [e2]"r"(e2), [e1]"r"(e1), [e0]"r"(e0),
+		 [mcqe_shuf_m1]"w"(mcqe_shuf_m1),
+		 [mcqe_shuf_m2]"w"(mcqe_shuf_m2),
+		 [crc_adj]"w"(crc_adj),
+		 [len_shuf_m]"w"(len_shuf_m)
+		:"memory", "v16", "v17", "v18", "v19");
+#ifdef MLX5_PMD_SOFT_COUNTERS
+		byte_cnt = vbic_u16(byte_cnt, invalid_mask);
+		rcvd_byte += vget_lane_u64(vpaddl_u32(vpaddl_u16(byte_cnt)), 0);
+#endif
+		if (rxq->mark) {
+			/* E.1 store flow tag (rte_flow mark). */
+			elts[pos]->hash.fdir.hi = flow_tag;
+			elts[pos + 1]->hash.fdir.hi = flow_tag;
+			elts[pos + 2]->hash.fdir.hi = flow_tag;
+			elts[pos + 3]->hash.fdir.hi = flow_tag;
+		}
+		pos += MLX5_VPMD_DESCS_PER_LOOP;
+		/* Move to next CQE and invalidate consumed CQEs. */
+		if (!(pos & 0x7) && pos < mcqe_n) {
+			mcq = (void *)&(cq + pos)->pkt_info;
+			for (i = 0; i < 8; ++i)
+				cq[inv++].op_own = MLX5_CQE_INVALIDATE;
+		}
+	}
+	/* Invalidate the rest of CQEs. */
+	for (; inv < mcqe_n; ++inv)
+		cq[inv].op_own = MLX5_CQE_INVALIDATE;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	rxq->stats.ipackets += mcqe_n;
+	rxq->stats.ibytes += rcvd_byte;
+#endif
+	rxq->cq_ci += mcqe_n;
+}
+
+/**
+ * Calculate packet type and offload flag for mbuf and store it.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param ptype_info
+ *   Array of four 4bytes packet type info extracted from the original
+ *   completion descriptor.
+ * @param flow_tag
+ *   Array of four 4bytes flow ID extracted from the original completion
+ *   descriptor.
+ * @param op_err
+ *   Opcode vector having responder error status. Each field is 4B.
+ * @param pkts
+ *   Pointer to array of packets to be filled.
+ */
+static inline void
+rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
+			 uint32x4_t ptype_info, uint32x4_t flow_tag,
+			 uint16x4_t op_err, struct rte_mbuf **pkts)
+{
+	uint16x4_t ptype;
+	uint32x4_t pinfo, cv_flags;
+	uint32x4_t ol_flags =
+		vdupq_n_u32(rxq->rss_hash * PKT_RX_RSS_HASH |
+			    rxq->hw_timestamp * PKT_RX_TIMESTAMP);
+	const uint32x4_t ptype_ol_mask = { 0x106, 0x106, 0x106, 0x106 };
+	const uint8x16_t cv_flag_sel = {
+		0,
+		(uint8_t)(PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
+		(uint8_t)(PKT_RX_IP_CKSUM_GOOD >> 1),
+		0,
+		(uint8_t)(PKT_RX_L4_CKSUM_GOOD >> 1),
+		0,
+		(uint8_t)((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1),
+		0, 0, 0, 0, 0, 0, 0, 0, 0
+	};
+	const uint32x4_t cv_mask =
+		vdupq_n_u32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
+			    PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
+	const uint64x1_t mbuf_init = vld1_u64(&rxq->mbuf_initializer);
+	const uint64x1_t r32_mask = vcreate_u64(0xffffffff);
+	uint64x2_t rearm0, rearm1, rearm2, rearm3;
+
+	if (rxq->mark) {
+		const uint32x4_t ft_def = vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT);
+		const uint32x4_t fdir_flags = vdupq_n_u32(PKT_RX_FDIR);
+		const uint32x4_t fdir_id_flags = vdupq_n_u32(PKT_RX_FDIR_ID);
+
+		/* Check if flow tag is non-zero then set PKT_RX_FDIR. */
+		ol_flags = vorrq_u32(ol_flags, vbicq_u32(fdir_flags,
+							 vceqzq_u32(flow_tag)));
+		/* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
+		ol_flags = vorrq_u32(ol_flags,
+				     vbicq_u32(fdir_id_flags,
+					       vceqq_u32(flow_tag, ft_def)));
+	}
+	/*
+	 * ptype_info has the following:
+	 * bit[1]     = l3_ok
+	 * bit[2]     = l4_ok
+	 * bit[8]     = cv
+	 * bit[11:10] = l3_hdr_type
+	 * bit[14:12] = l4_hdr_type
+	 * bit[15]    = ip_frag
+	 * bit[16]    = tunneled
+	 * bit[17]    = outer_l3_type
+	 */
+	ptype = vshrn_n_u32(ptype_info, 10);
+	/* Errored packets will have RTE_PTYPE_ALL_MASK. */
+	ptype = vorr_u16(ptype, op_err);
+	pkts[0]->packet_type =
+		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 6)];
+	pkts[1]->packet_type =
+		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 4)];
+	pkts[2]->packet_type =
+		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 2)];
+	pkts[3]->packet_type =
+		mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 0)];
+	/* Fill flags for checksum and VLAN. */
+	pinfo = vandq_u32(ptype_info, ptype_ol_mask);
+	pinfo = vreinterpretq_u32_u8(
+		vqtbl1q_u8(cv_flag_sel, vreinterpretq_u8_u32(pinfo)));
+	/* Locate checksum flags at byte[2:1] and merge with VLAN flags. */
+	cv_flags = vshlq_n_u32(pinfo, 9);
+	cv_flags = vorrq_u32(pinfo, cv_flags);
+	/* Move back flags to start from byte[0]. */
+	cv_flags = vshrq_n_u32(cv_flags, 8);
+	/* Mask out garbage bits. */
+	cv_flags = vandq_u32(cv_flags, cv_mask);
+	/* Merge to ol_flags. */
+	ol_flags = vorrq_u32(ol_flags, cv_flags);
+	/* Merge mbuf_init and ol_flags, and store. */
+	rearm0 = vcombine_u64(mbuf_init,
+			      vshr_n_u64(vget_high_u64(vreinterpretq_u64_u32(
+						       ol_flags)), 32));
+	rearm1 = vcombine_u64(mbuf_init,
+			      vand_u64(vget_high_u64(vreinterpretq_u64_u32(
+						     ol_flags)), r32_mask));
+	rearm2 = vcombine_u64(mbuf_init,
+			      vshr_n_u64(vget_low_u64(vreinterpretq_u64_u32(
+						      ol_flags)), 32));
+	rearm3 = vcombine_u64(mbuf_init,
+			      vand_u64(vget_low_u64(vreinterpretq_u64_u32(
+						    ol_flags)), r32_mask));
+	vst1q_u64((void *)&pkts[0]->rearm_data, rearm0);
+	vst1q_u64((void *)&pkts[1]->rearm_data, rearm1);
+	vst1q_u64((void *)&pkts[2]->rearm_data, rearm2);
+	vst1q_u64((void *)&pkts[3]->rearm_data, rearm3);
+}
+
+/**
+ * Receive burst of packets. An errored completion also consumes a mbuf, but the
+ * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
+ * before returning to application.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets received including errors (<= pkts_n).
+ */
+static inline uint16_t
+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+	const uint16_t q_n = 1 << rxq->cqe_n;
+	const uint16_t q_mask = q_n - 1;
+	volatile struct mlx5_cqe *cq;
+	struct rte_mbuf **elts;
+	unsigned int pos;
+	uint64_t n;
+	uint16_t repl_n;
+	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
+	uint16_t nocmp_n = 0;
+	uint16_t rcvd_pkt = 0;
+	unsigned int cq_idx = rxq->cq_ci & q_mask;
+	unsigned int elts_idx;
+	const uint16x4_t ownership = vdup_n_u16(!(rxq->cq_ci & (q_mask + 1)));
+	const uint16x4_t owner_check = vcreate_u16(0x0001000100010001);
+	const uint16x4_t opcode_check = vcreate_u16(0x00f000f000f000f0);
+	const uint16x4_t format_check = vcreate_u16(0x000c000c000c000c);
+	const uint16x4_t resp_err_check = vcreate_u16(0x00e000e000e000e0);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	uint32_t rcvd_byte = 0;
+#endif
+	/* Mask to generate 16B length vector. */
+	const uint8x8_t len_shuf_m = {
+		52, 53,         /* 4th CQE */
+		36, 37,         /* 3rd CQE */
+		20, 21,         /* 2nd CQE */
+		 4,  5          /* 1st CQE */
+	};
+	/* Mask to extract 16B data from a 64B CQE. */
+	const uint8x16_t cqe_shuf_m = {
+		28, 29,         /* hdr_type_etc */
+		 0,             /* pkt_info */
+		-1,             /* null */
+		47, 46,         /* byte_cnt, bswap16 */
+		31, 30,         /* vlan_info, bswap16 */
+		15, 14, 13, 12, /* rx_hash_res, bswap32 */
+		57, 58, 59,     /* flow_tag */
+		63              /* op_own */
+	};
+	/* Mask to generate 16B data for mbuf. */
+	const uint8x16_t mb_shuf_m = {
+		 4,  5, -1, -1, /* pkt_len */
+		 4,  5,         /* data_len */
+		 6,  7,         /* vlan_tci */
+		 8,  9, 10, 11, /* hash.rss */
+		12, 13, 14, -1  /* hash.fdir.hi */
+	};
+	/* Mask to generate 16B owner vector. */
+	const uint8x8_t owner_shuf_m = {
+		63, -1,         /* 4th CQE */
+		47, -1,         /* 3rd CQE */
+		31, -1,         /* 2nd CQE */
+		15, -1          /* 1st CQE */
+	};
+	/* Mask to generate a vector having packet_type/ol_flags. */
+	const uint8x16_t ptype_shuf_m = {
+		48, 49, 50, -1, /* 4th CQE */
+		32, 33, 34, -1, /* 3rd CQE */
+		16, 17, 18, -1, /* 2nd CQE */
+		 0,  1,  2, -1  /* 1st CQE */
+	};
+	/* Mask to generate a vector having flow tags. */
+	const uint8x16_t ftag_shuf_m = {
+		60, 61, 62, -1, /* 4th CQE */
+		44, 45, 46, -1, /* 3rd CQE */
+		28, 29, 30, -1, /* 2nd CQE */
+		12, 13, 14, -1  /* 1st CQE */
+	};
+	const uint16x8_t crc_adj = {
+		0, 0, rxq->crc_present * ETHER_CRC_LEN, 0, 0, 0, 0, 0
+	};
+	const uint32x4_t flow_mark_adj = { 0, 0, 0, rxq->mark * (-1) };
+
+	assert(rxq->sges_n == 0);
+	assert(rxq->cqe_n == rxq->elts_n);
+	cq = &(*rxq->cqes)[cq_idx];
+	rte_prefetch_non_temporal(cq);
+	rte_prefetch_non_temporal(cq + 1);
+	rte_prefetch_non_temporal(cq + 2);
+	rte_prefetch_non_temporal(cq + 3);
+	pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
+	/*
+	 * Order of indexes:
+	 *   rq_ci >= cq_ci >= rq_pi
+	 * Definition of indexes:
+	 *   rq_ci - cq_ci := # of buffers owned by HW (posted).
+	 *   cq_ci - rq_pi := # of buffers not returned to app (decompressed).
+	 *   N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished).
+	 */
+	repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
+	if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH)
+		mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
+	/* See if there're unreturned mbufs from compressed CQE. */
+	rcvd_pkt = rxq->cq_ci - rxq->rq_pi;
+	if (rcvd_pkt > 0) {
+		rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
+		rxq_copy_mbuf_v(rxq, pkts, rcvd_pkt);
+		rxq->rq_pi += rcvd_pkt;
+		pkts += rcvd_pkt;
+	}
+	elts_idx = rxq->rq_pi & q_mask;
+	elts = &(*rxq->elts)[elts_idx];
+	/* Not to overflow pkts array. */
+	pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
+	/* Not to cross queue end. */
+	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
+	if (!pkts_n)
+		return rcvd_pkt;
+	/* At this point, there shouldn't be any remained packets. */
+	assert(rxq->rq_pi == rxq->cq_ci);
+	/*
+	 * Note that vectors have reverse order - {v3, v2, v1, v0}, because
+	 * there's no instruction to count trailing zeros. __builtin_clzl() is
+	 * used instead.
+	 *
+	 * A. copy 4 mbuf pointers from elts ring to returing pkts.
+	 * B. load 64B CQE and extract necessary fields
+	 *    Final 16bytes cqes[] extracted from original 64bytes CQE has the
+	 *    following structure:
+	 *        struct {
+	 *          uint16_t hdr_type_etc;
+	 *          uint8_t  pkt_info;
+	 *          uint8_t  rsvd;
+	 *          uint16_t byte_cnt;
+	 *          uint16_t vlan_info;
+	 *          uint32_t rx_has_res;
+	 *          uint8_t  flow_tag[3];
+	 *          uint8_t  op_own;
+	 *        } c;
+	 * C. fill in mbuf.
+	 * D. get valid CQEs.
+	 * E. find compressed CQE.
+	 */
+	for (pos = 0;
+	     pos < pkts_n;
+	     pos += MLX5_VPMD_DESCS_PER_LOOP) {
+		uint16x4_t op_own;
+		uint16x4_t opcode, owner_mask, invalid_mask;
+		uint16x4_t comp_mask;
+		uint16x4_t mask;
+		uint16x4_t byte_cnt;
+		uint32x4_t ptype_info, flow_tag;
+		uint8_t *p0, *p1, *p2, *p3;
+		uint8_t *e0 = (void *)&elts[pos]->pkt_len;
+		uint8_t *e1 = (void *)&elts[pos + 1]->pkt_len;
+		uint8_t *e2 = (void *)&elts[pos + 2]->pkt_len;
+		uint8_t *e3 = (void *)&elts[pos + 3]->pkt_len;
+		void *elts_p = (void *)&elts[pos];
+		void *pkts_p = (void *)&pkts[pos];
+
+		/* A.0 do not cross the end of CQ. */
+		mask = vcreate_u16(pkts_n - pos < MLX5_VPMD_DESCS_PER_LOOP ?
+				   -1UL >> ((pkts_n - pos) *
+					    sizeof(uint16_t) * 8) : 0);
+		p0 = (void *)&cq[pos].pkt_info;
+		p1 = p0 + (pkts_n - pos > 1) * sizeof(struct mlx5_cqe);
+		p2 = p1 + (pkts_n - pos > 2) * sizeof(struct mlx5_cqe);
+		p3 = p2 + (pkts_n - pos > 3) * sizeof(struct mlx5_cqe);
+		/* Prefetch next 4 CQEs. */
+		if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
+			unsigned int next = pos + MLX5_VPMD_DESCS_PER_LOOP;
+			rte_prefetch_non_temporal(&cq[next]);
+			rte_prefetch_non_temporal(&cq[next + 1]);
+			rte_prefetch_non_temporal(&cq[next + 2]);
+			rte_prefetch_non_temporal(&cq[next + 3]);
+		}
+		__asm__ volatile (
+		/* B.1 (CQE 3) load a block having op_own. */
+		"ld1 {v19.16b}, [%[p3]] \n\t"
+		"sub %[p3], %[p3], #48 \n\t"
+		/* B.2 (CQE 3) load the rest blocks. */
+		"ld1 {v16.16b - v18.16b}, [%[p3]] \n\t"
+		/* B.3 (CQE 3) extract 16B fields. */
+		"tbl v23.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+		/* B.4 (CQE 3) adjust CRC length. */
+		"sub v23.8h, v23.8h, %[crc_adj].8h \n\t"
+		/* B.1 (CQE 2) load a block having op_own. */
+		"ld1 {v19.16b}, [%[p2]] \n\t"
+		"sub %[p2], %[p2], #48 \n\t"
+		/* C.1 (CQE 3) generate final structure for mbuf. */
+		"tbl v15.16b, {v23.16b}, %[mb_shuf_m].16b \n\t"
+		/* B.2 (CQE 2) load the rest blocks. */
+		"ld1 {v16.16b - v18.16b}, [%[p2]] \n\t"
+		/* B.3 (CQE 2) extract 16B fields. */
+		"tbl v22.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+		/* B.4 (CQE 2) adjust CRC length. */
+		"sub v22.8h, v22.8h, %[crc_adj].8h \n\t"
+		/* B.1 (CQE 1) load a block having op_own. */
+		"ld1 {v19.16b}, [%[p1]] \n\t"
+		"sub %[p1], %[p1], #48 \n\t"
+		/* C.1 (CQE 2) generate final structure for mbuf. */
+		"tbl v14.16b, {v22.16b}, %[mb_shuf_m].16b \n\t"
+		/* B.2 (CQE 1) load the rest blocks. */
+		"ld1 {v16.16b - v18.16b}, [%[p1]] \n\t"
+		/* B.3 (CQE 1) extract 16B fields. */
+		"tbl v21.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+		/* B.4 (CQE 1) adjust CRC length. */
+		"sub v21.8h, v21.8h, %[crc_adj].8h \n\t"
+		/* B.1 (CQE 0) load a block having op_own. */
+		"ld1 {v19.16b}, [%[p0]] \n\t"
+		"sub %[p0], %[p0], #48 \n\t"
+		/* C.1 (CQE 1) generate final structure for mbuf. */
+		"tbl v13.16b, {v21.16b}, %[mb_shuf_m].16b \n\t"
+		/* B.2 (CQE 0) load the rest blocks. */
+		"ld1 {v16.16b - v18.16b}, [%[p0]] \n\t"
+		/* B.3 (CQE 0) extract 16B fields. */
+		"tbl v20.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+		/* B.4 (CQE 0) adjust CRC length. */
+		"sub v20.8h, v20.8h, %[crc_adj].8h \n\t"
+		/* A.1 load mbuf pointers. */
+		"ld1 {v24.2d - v25.2d}, [%[elts_p]] \n\t"
+		/* D.1 extract op_own byte. */
+		"tbl %[op_own].8b, {v20.16b - v23.16b}, %[owner_shuf_m].8b \n\t"
+		/* C.2 (CQE 3) adjust flow mark. */
+		"add v15.4s, v15.4s, %[flow_mark_adj].4s \n\t"
+		/* C.3 (CQE 3) fill in mbuf - rx_descriptor_fields1. */
+		"st1 {v15.2d}, [%[e3]] \n\t"
+		/* C.2 (CQE 2) adjust flow mark. */
+		"add v14.4s, v14.4s, %[flow_mark_adj].4s \n\t"
+		/* C.3 (CQE 2) fill in mbuf - rx_descriptor_fields1. */
+		"st1 {v14.2d}, [%[e2]] \n\t"
+		/* C.1 (CQE 0) generate final structure for mbuf. */
+		"tbl v12.16b, {v20.16b}, %[mb_shuf_m].16b \n\t"
+		/* C.2 (CQE 1) adjust flow mark. */
+		"add v13.4s, v13.4s, %[flow_mark_adj].4s \n\t"
+		/* C.3 (CQE 1) fill in mbuf - rx_descriptor_fields1. */
+		"st1 {v13.2d}, [%[e1]] \n\t"
+#ifdef MLX5_PMD_SOFT_COUNTERS
+		/* Extract byte_cnt. */
+		"tbl %[byte_cnt].8b, {v20.16b - v23.16b}, %[len_shuf_m].8b \n\t"
+#endif
+		/* Extract ptype_info. */
+		"tbl %[ptype_info].16b, {v20.16b - v23.16b}, %[ptype_shuf_m].16b \n\t"
+		/* Extract flow_tag. */
+		"tbl %[flow_tag].16b, {v20.16b - v23.16b}, %[ftag_shuf_m].16b \n\t"
+		/* A.2 copy mbuf pointers. */
+		"st1 {v24.2d - v25.2d}, [%[pkts_p]] \n\t"
+		/* C.2 (CQE 0) adjust flow mark. */
+		"add v12.4s, v12.4s, %[flow_mark_adj].4s \n\t"
+		/* C.3 (CQE 1) fill in mbuf - rx_descriptor_fields1. */
+		"st1 {v12.2d}, [%[e0]] \n\t"
+		:[op_own]"=&w"(op_own),
+		 [byte_cnt]"=&w"(byte_cnt),
+		 [ptype_info]"=&w"(ptype_info),
+		 [flow_tag]"=&w"(flow_tag)
+		:[p3]"r"(p3 + 48), [p2]"r"(p2 + 48),
+		 [p1]"r"(p1 + 48), [p0]"r"(p0 + 48),
+		 [e3]"r"(e3), [e2]"r"(e2), [e1]"r"(e1), [e0]"r"(e0),
+		 [elts_p]"r"(elts_p),
+		 [pkts_p]"r"(pkts_p),
+		 [cqe_shuf_m]"w"(cqe_shuf_m),
+		 [mb_shuf_m]"w"(mb_shuf_m),
+		 [owner_shuf_m]"w"(owner_shuf_m),
+		 [len_shuf_m]"w"(len_shuf_m),
+		 [ptype_shuf_m]"w"(ptype_shuf_m),
+		 [ftag_shuf_m]"w"(ftag_shuf_m),
+		 [crc_adj]"w"(crc_adj),
+		 [flow_mark_adj]"w"(flow_mark_adj)
+		:"memory",
+		 "v12", "v13", "v14", "v15",
+		 "v16", "v17", "v18", "v19",
+		 "v20", "v21", "v22", "v23",
+		 "v24", "v25");
+		/* D.2 flip owner bit to mark CQEs from last round. */
+		owner_mask = vand_u16(op_own, owner_check);
+		owner_mask = vceq_u16(owner_mask, ownership);
+		/* D.3 get mask for invalidated CQEs. */
+		opcode = vand_u16(op_own, opcode_check);
+		invalid_mask = vceq_u16(opcode_check, opcode);
+		/* E.1 find compressed CQE format. */
+		comp_mask = vand_u16(op_own, format_check);
+		comp_mask = vceq_u16(comp_mask, format_check);
+		/* D.4 mask out beyond boundary. */
+		invalid_mask = vorr_u16(invalid_mask, mask);
+		/* D.5 merge invalid_mask with invalid owner. */
+		invalid_mask = vorr_u16(invalid_mask, owner_mask);
+		/* E.2 mask out invalid entries. */
+		comp_mask = vbic_u16(comp_mask, invalid_mask);
+		/* E.3 get the first compressed CQE. */
+		comp_idx = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16(
+					  comp_mask), 0)) /
+					  (sizeof(uint16_t) * 8);
+		/* D.6 mask out entries after the compressed CQE. */
+		mask = vcreate_u16(comp_idx < MLX5_VPMD_DESCS_PER_LOOP ?
+				   -1UL >> (comp_idx * sizeof(uint16_t) * 8) :
+				   0);
+		invalid_mask = vorr_u16(invalid_mask, mask);
+		/* D.7 count non-compressed valid CQEs. */
+		n = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16(
+				   invalid_mask), 0)) / (sizeof(uint16_t) * 8);
+		nocmp_n += n;
+		/* D.2 get the final invalid mask. */
+		mask = vcreate_u16(n < MLX5_VPMD_DESCS_PER_LOOP ?
+				   -1UL >> (n * sizeof(uint16_t) * 8) : 0);
+		invalid_mask = vorr_u16(invalid_mask, mask);
+		/* D.3 check error in opcode. */
+		opcode = vceq_u16(resp_err_check, opcode);
+		opcode = vbic_u16(opcode, invalid_mask);
+		/* D.4 mark if any error is set */
+		rxq->pending_err |=
+			!!vget_lane_u64(vreinterpret_u64_u16(opcode), 0);
+		/* C.4 fill in mbuf - rearm_data and packet_type. */
+		rxq_cq_to_ptype_oflags_v(rxq, ptype_info, flow_tag,
+					 opcode, &elts[pos]);
+		if (rxq->hw_timestamp) {
+			elts[pos]->timestamp =
+				rte_be_to_cpu_64(
+					container_of(p0, struct mlx5_cqe,
+						     pkt_info)->timestamp);
+			elts[pos + 1]->timestamp =
+				rte_be_to_cpu_64(
+					container_of(p1, struct mlx5_cqe,
+						     pkt_info)->timestamp);
+			elts[pos + 2]->timestamp =
+				rte_be_to_cpu_64(
+					container_of(p2, struct mlx5_cqe,
+						     pkt_info)->timestamp);
+			elts[pos + 3]->timestamp =
+				rte_be_to_cpu_64(
+					container_of(p3, struct mlx5_cqe,
+						     pkt_info)->timestamp);
+		}
+#ifdef MLX5_PMD_SOFT_COUNTERS
+		/* Add up received bytes count. */
+		byte_cnt = vbic_u16(byte_cnt, invalid_mask);
+		rcvd_byte += vget_lane_u64(vpaddl_u32(vpaddl_u16(byte_cnt)), 0);
+#endif
+		/*
+		 * Break the loop unless more valid CQE is expected, or if
+		 * there's a compressed CQE.
+		 */
+		if (n != MLX5_VPMD_DESCS_PER_LOOP)
+			break;
+	}
+	/* If no new CQE seen, return without updating cq_db. */
+	if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP))
+		return rcvd_pkt;
+	/* Update the consumer indexes for non-compressed CQEs. */
+	assert(nocmp_n <= pkts_n);
+	rxq->cq_ci += nocmp_n;
+	rxq->rq_pi += nocmp_n;
+	rcvd_pkt += nocmp_n;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+	rxq->stats.ipackets += nocmp_n;
+	rxq->stats.ibytes += rcvd_byte;
+#endif
+	/* Decompress the last CQE if compressed. */
+	if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) {
+		assert(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
+		rxq_cq_decompress_v(rxq, &cq[nocmp_n], &elts[nocmp_n]);
+		/* Return more packets if needed. */
+		if (nocmp_n < pkts_n) {
+			uint16_t n = rxq->cq_ci - rxq->rq_pi;
+
+			n = RTE_MIN(n, pkts_n - nocmp_n);
+			rxq_copy_mbuf_v(rxq, &pkts[nocmp_n], n);
+			rxq->rq_pi += n;
+			rcvd_pkt += n;
+		}
+	}
+	rte_compiler_barrier();
+	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+	return rcvd_pkt;
+}
+
+#endif /* RTE_PMD_MLX5_RXTX_VEC_NEON_H_ */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 8560f745..2b9f1601 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -31,38 +31,23 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifndef RTE_PMD_MLX5_RXTX_VEC_SSE_H_
+#define RTE_PMD_MLX5_RXTX_VEC_SSE_H_
+
 #include <assert.h>
 #include <stdint.h>
 #include <string.h>
 #include <stdlib.h>
 #include <smmintrin.h>
 
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#include <infiniband/mlx5_hw.h>
-#include <infiniband/arch.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_mbuf.h>
 #include <rte_mempool.h>
 #include <rte_prefetch.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5.h"
 #include "mlx5_utils.h"
 #include "mlx5_rxtx.h"
+#include "mlx5_rxtx_vec.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 #include "mlx5_prm.h"
@@ -77,14 +62,14 @@
  * @param txq
  *   Pointer to TX queue structure.
  * @param dseg
- *   Pointer to buffer descriptor to be writen.
+ *   Pointer to buffer descriptor to be written.
  * @param pkts
  *   Pointer to array of packets to be sent.
  * @param n
  *   Number of packets to be filled.
  */
 static inline void
-txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
+txq_wr_dseg_v(struct mlx5_txq_data *txq, __m128i *dseg,
 	      struct rte_mbuf **pkts, unsigned int n)
 {
 	unsigned int pos;
@@ -119,85 +104,6 @@ txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
 }
 
 /**
- * Count the number of continuous single segment packets. The first packet must
- * be a single segment packet.
- *
- * @param pkts
- *   Pointer to array of packets.
- * @param pkts_n
- *   Number of packets.
- *
- * @return
- *   Number of continuous single segment packets.
- */
-static inline unsigned int
-txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
-{
-	unsigned int pos;
-
-	if (!pkts_n)
-		return 0;
-	assert(NB_SEGS(pkts[0]) == 1);
-	/* Count the number of continuous single segment packets. */
-	for (pos = 1; pos < pkts_n; ++pos)
-		if (NB_SEGS(pkts[pos]) > 1)
-			break;
-	return pos;
-}
-
-/**
- * Count the number of packets having same ol_flags and calculate cs_flags.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param pkts
- *   Pointer to array of packets.
- * @param pkts_n
- *   Number of packets.
- * @param cs_flags
- *   Pointer of flags to be returned.
- *
- * @return
- *   Number of packets having same ol_flags.
- */
-static inline unsigned int
-txq_calc_offload(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
-		 uint8_t *cs_flags)
-{
-	unsigned int pos;
-	const uint64_t ol_mask =
-		PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
-		PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
-		PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
-
-	if (!pkts_n)
-		return 0;
-	/* Count the number of packets having same ol_flags. */
-	for (pos = 1; pos < pkts_n; ++pos)
-		if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
-			break;
-	/* Should open another MPW session for the rest. */
-	if (pkts[0]->ol_flags &
-	    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-		const uint64_t is_tunneled =
-			pkts[0]->ol_flags &
-			(PKT_TX_TUNNEL_GRE |
-			 PKT_TX_TUNNEL_VXLAN);
-
-		if (is_tunneled && txq->tunnel_en) {
-			*cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
-				    MLX5_ETH_WQE_L4_INNER_CSUM;
-			if (pkts[0]->ol_flags & PKT_TX_OUTER_IP_CKSUM)
-				*cs_flags |= MLX5_ETH_WQE_L3_CSUM;
-		} else {
-			*cs_flags = MLX5_ETH_WQE_L3_CSUM |
-				    MLX5_ETH_WQE_L4_CSUM;
-		}
-	}
-	return pos;
-}
-
-/**
  * Send multi-segmented packets until it encounters a single segment packet in
  * the pkts list.
  *
@@ -212,7 +118,8 @@ txq_calc_offload(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 static uint16_t
-txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+	      uint16_t pkts_n)
 {
 	uint16_t elts_head = txq->elts_head;
 	const uint16_t elts_n = 1 << txq->elts_n;
@@ -257,13 +164,17 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (segs_n == 1 ||
 		    max_elts < segs_n || max_wqe < 2)
 			break;
+		if (segs_n > MLX5_MPW_DSEG_MAX) {
+			txq->stats.oerrors++;
+			break;
+		}
 		wqe = &((volatile struct mlx5_wqe64 *)
 			 txq->wqes)[wqe_ci & wq_mask].hdr;
 		if (buf->ol_flags &
 		     (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-			const uint64_t is_tunneled = buf->ol_flags &
-						      (PKT_TX_TUNNEL_GRE |
-						       PKT_TX_TUNNEL_VXLAN);
+			const uint64_t is_tunneled =
+				buf->ol_flags & (PKT_TX_TUNNEL_GRE |
+						 PKT_TX_TUNNEL_VXLAN);
 
 			if (is_tunneled && txq->tunnel_en) {
 				cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
@@ -298,7 +209,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		/* Fill ESEG in the header. */
 		_mm_store_si128(t_wqe + 1,
 				_mm_set_epi16(0, 0, 0, 0,
-					      htons(len), cs_flags,
+					      rte_cpu_to_be_16(len), cs_flags,
 					      0, 0));
 		txq->wqe_ci = wqe_ci;
 	}
@@ -307,7 +218,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	txq->elts_comp += (uint16_t)(elts_head - txq->elts_head);
 	txq->elts_head = elts_head;
 	if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
-		wqe->ctrl[2] = htonl(8);
+		wqe->ctrl[2] = rte_cpu_to_be_32(8);
 		wqe->ctrl[3] = txq->elts_head;
 		txq->elts_comp = 0;
 		++txq->cq_pi;
@@ -338,7 +249,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 static inline uint16_t
-txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
+txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	    uint8_t cs_flags)
 {
 	struct rte_mbuf **elts;
@@ -374,6 +285,7 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	max_elts = (elts_n - (elts_head - txq->elts_tail));
 	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
 	pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+	assert(pkts_n <= MLX5_DSEG_MAX - nb_dword_in_hdr);
 	if (unlikely(!pkts_n))
 		return 0;
 	elts = &(*txq->elts)[elts_head & elts_m];
@@ -432,87 +344,11 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	txq->wqe_ci += (nb_dword_in_hdr + pkts_n + (nb_dword_per_wqebb - 1)) /
 		       nb_dword_per_wqebb;
 	/* Ring QP doorbell. */
-	mlx5_tx_dbrec(txq, wqe);
+	mlx5_tx_dbrec_cond_wmb(txq, wqe, pkts_n < MLX5_VPMD_TX_MAX_BURST);
 	return pkts_n;
 }
 
 /**
- * DPDK callback for vectorized TX.
- *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
- * @param[in] pkts
- *   Packets to transmit.
- * @param pkts_n
- *   Number of packets in array.
- *
- * @return
- *   Number of packets successfully transmitted (<= pkts_n).
- */
-uint16_t
-mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
-		      uint16_t pkts_n)
-{
-	struct txq *txq = (struct txq *)dpdk_txq;
-	uint16_t nb_tx = 0;
-
-	while (pkts_n > nb_tx) {
-		uint16_t n;
-		uint16_t ret;
-
-		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
-		ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
-		nb_tx += ret;
-		if (!ret)
-			break;
-	}
-	return nb_tx;
-}
-
-/**
- * DPDK callback for vectorized TX with multi-seg packets and offload.
- *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
- * @param[in] pkts
- *   Packets to transmit.
- * @param pkts_n
- *   Number of packets in array.
- *
- * @return
- *   Number of packets successfully transmitted (<= pkts_n).
- */
-uint16_t
-mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-{
-	struct txq *txq = (struct txq *)dpdk_txq;
-	uint16_t nb_tx = 0;
-
-	while (pkts_n > nb_tx) {
-		uint8_t cs_flags = 0;
-		uint16_t n;
-		uint16_t ret;
-
-		/* Transmit multi-seg packets in the head of pkts list. */
-		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) &&
-		    NB_SEGS(pkts[nb_tx]) > 1)
-			nb_tx += txq_scatter_v(txq,
-					       &pkts[nb_tx],
-					       pkts_n - nb_tx);
-		n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
-		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS))
-			n = txq_check_multiseg(&pkts[nb_tx], n);
-		if (!(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
-			n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags);
-		ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
-		nb_tx += ret;
-		if (!ret)
-			break;
-	}
-	return nb_tx;
-}
-
-/**
  * Store free buffers to RX SW ring.
  *
  * @param rxq
@@ -523,7 +359,7 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets to be stored.
  */
 static inline void
-rxq_copy_mbuf_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t n)
+rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)
 {
 	const uint16_t q_mask = (1 << rxq->elts_n) - 1;
 	struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];
@@ -541,41 +377,6 @@ rxq_copy_mbuf_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t n)
 }
 
 /**
- * Replenish buffers for RX in bulk.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- * @param n
- *   Number of buffers to be replenished.
- */
-static inline void
-rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n)
-{
-	const uint16_t q_n = 1 << rxq->elts_n;
-	const uint16_t q_mask = q_n - 1;
-	const uint16_t elts_idx = rxq->rq_ci & q_mask;
-	struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
-	volatile struct mlx5_wqe_data_seg *wq = &(*rxq->wqes)[elts_idx];
-	unsigned int i;
-
-	assert(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH);
-	assert(n <= (uint16_t)(q_n - (rxq->rq_ci - rxq->rq_pi)));
-	assert(MLX5_VPMD_RXQ_RPLNSH_THRESH > MLX5_VPMD_DESCS_PER_LOOP);
-	/* Not to cross queue end. */
-	n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
-	if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
-		rxq->stats.rx_nombuf += n;
-		return;
-	}
-	for (i = 0; i < n; ++i)
-		wq[i].addr = htonll((uintptr_t)elts[i]->buf_addr +
-				    RTE_PKTMBUF_HEADROOM);
-	rxq->rq_ci += n;
-	rte_wmb();
-	*rxq->rq_db = htonl(rxq->rq_ci);
-}
-
-/**
  * Decompress a compressed completion and fill in mbufs in RX SW ring with data
  * extracted from the title completion descriptor.
  *
@@ -588,8 +389,7 @@ rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n)
  *   the title completion descriptor to be copied to the rest of mbufs.
  */
 static inline void
-rxq_cq_decompress_v(struct rxq *rxq,
-		    volatile struct mlx5_cqe *cq,
+rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 		    struct rte_mbuf **elts)
 {
 	volatile struct mlx5_mini_cqe8 *mcq = (void *)(cq + 1);
@@ -636,13 +436,6 @@ rxq_cq_decompress_v(struct rxq *rxq,
 			     10, 11,  2,  3);
 #endif
 
-	/* Compile time sanity check for this function. */
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
-			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
-			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
-			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
 	/*
 	 * A. load mCQEs into a 128bit register.
 	 * B. store rearm data to mbuf.
@@ -747,12 +540,13 @@ rxq_cq_decompress_v(struct rxq *rxq,
  *   Pointer to array of packets to be filled.
  */
 static inline void
-rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
-			 struct rte_mbuf **pkts)
+rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
+			 __m128i op_err, struct rte_mbuf **pkts)
 {
 	__m128i pinfo0, pinfo1;
 	__m128i pinfo, ptype;
-	__m128i ol_flags = _mm_set1_epi32(rxq->rss_hash * PKT_RX_RSS_HASH);
+	__m128i ol_flags = _mm_set1_epi32(rxq->rss_hash * PKT_RX_RSS_HASH |
+					  rxq->hw_timestamp * PKT_RX_TIMESTAMP);
 	__m128i cv_flags;
 	const __m128i zero = _mm_setzero_si128();
 	const __m128i ptype_mask =
@@ -769,17 +563,17 @@ rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
 			     (uint8_t)(PKT_RX_L4_CKSUM_GOOD >> 1),
 			     0,
 			     (uint8_t)(PKT_RX_IP_CKSUM_GOOD >> 1),
-			     (uint8_t)(PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED),
+			     (uint8_t)(PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
 			     0);
 	const __m128i cv_mask =
 		_mm_set_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
-			      PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
+			      PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
 			      PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
-			      PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
+			      PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
 			      PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
-			      PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
+			      PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
 			      PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
-			      PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
+			      PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
 	const __m128i mbuf_init =
 		_mm_loadl_epi64((__m128i *)&rxq->mbuf_initializer);
 	__m128i rearm0, rearm1, rearm2, rearm3;
@@ -853,15 +647,11 @@ rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
 	/* Merge to ol_flags. */
 	ol_flags = _mm_or_si128(ol_flags, cv_flags);
 	/* Merge mbuf_init and ol_flags. */
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
-			 offsetof(struct rte_mbuf, rearm_data) + 8);
 	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(ol_flags, 8), 0x30);
 	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(ol_flags, 4), 0x30);
 	rearm2 = _mm_blend_epi16(mbuf_init, ol_flags, 0x30);
 	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(ol_flags, 4), 0x30);
 	/* Write 8B rearm_data and 8B ol_flags. */
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
-			 RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
 	_mm_store_si128((__m128i *)&pkts[0]->rearm_data, rearm0);
 	_mm_store_si128((__m128i *)&pkts[1]->rearm_data, rearm1);
 	_mm_store_si128((__m128i *)&pkts[2]->rearm_data, rearm2);
@@ -869,51 +659,6 @@ rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
 }
 
 /**
- * Skip error packets.
- *
- * @param rxq
- *   Pointer to RX queue structure.
- * @param[out] pkts
- *   Array to store received packets.
- * @param pkts_n
- *   Maximum number of packets in array.
- *
- * @return
- *   Number of packets successfully received (<= pkts_n).
- */
-static uint16_t
-rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts,
-			 uint16_t pkts_n)
-{
-	uint16_t n = 0;
-	unsigned int i;
-#ifdef MLX5_PMD_SOFT_COUNTERS
-	uint32_t err_bytes = 0;
-#endif
-
-	for (i = 0; i < pkts_n; ++i) {
-		struct rte_mbuf *pkt = pkts[i];
-
-		if (pkt->packet_type == RTE_PTYPE_ALL_MASK) {
-#ifdef MLX5_PMD_SOFT_COUNTERS
-			err_bytes += PKT_LEN(pkt);
-#endif
-			rte_pktmbuf_free_seg(pkt);
-		} else {
-			pkts[n++] = pkt;
-		}
-	}
-	rxq->stats.idropped += (pkts_n - n);
-#ifdef MLX5_PMD_SOFT_COUNTERS
-	/* Correct counters of errored completions. */
-	rxq->stats.ipackets -= (pkts_n - n);
-	rxq->stats.ibytes -= err_bytes;
-#endif
-	rxq->pending_err = 0;
-	return n;
-}
-
-/**
  * Receive burst of packets. An errored completion also consumes a mbuf, but the
  * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
  * before returning to application.
@@ -929,7 +674,7 @@ rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts,
  *   Number of packets received including errors (<= pkts_n).
  */
 static inline uint16_t
-rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
 	const uint16_t q_n = 1 << rxq->cqe_n;
 	const uint16_t q_mask = q_n - 1;
@@ -984,26 +729,6 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			      rxq->crc_present * ETHER_CRC_LEN);
 	const __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0);
 
-	/* Compile time sanity check for this function. */
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
-			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
-	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
-			 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
-	RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, pkt_info) != 0);
-	RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, rx_hash_res) !=
-			 offsetof(struct mlx5_cqe, pkt_info) + 12);
-	RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, rsvd1) +
-			  sizeof(((struct mlx5_cqe *)0)->rsvd1) !=
-			 offsetof(struct mlx5_cqe, hdr_type_etc));
-	RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, vlan_info) !=
-			 offsetof(struct mlx5_cqe, hdr_type_etc) + 2);
-	RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, rsvd2) +
-			  sizeof(((struct mlx5_cqe *)0)->rsvd2) !=
-			 offsetof(struct mlx5_cqe, byte_cnt));
-	RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, sop_drop_qpn) !=
-			 RTE_ALIGN(offsetof(struct mlx5_cqe, sop_drop_qpn), 8));
-	RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, op_own) !=
-			 offsetof(struct mlx5_cqe, sop_drop_qpn) + 7);
 	assert(rxq->sges_n == 0);
 	assert(rxq->cqe_n == rxq->elts_n);
 	cq = &(*rxq->cqes)[cq_idx];
@@ -1022,7 +747,7 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	 */
 	repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
 	if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH)
-		rxq_replenish_bulk_mbuf(rxq, repl_n);
+		mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
 	/* See if there're unreturned mbufs from compressed CQE. */
 	rcvd_pkt = rxq->cq_ci - rxq->rq_pi;
 	if (rcvd_pkt > 0) {
@@ -1214,6 +939,16 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		rxq->pending_err |= !!_mm_cvtsi128_si64(opcode);
 		/* D.5 fill in mbuf - rearm_data and packet_type. */
 		rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]);
+		if (rxq->hw_timestamp) {
+			pkts[pos]->timestamp =
+				rte_be_to_cpu_64(cq[pos].timestamp);
+			pkts[pos + 1]->timestamp =
+				rte_be_to_cpu_64(cq[pos + p1].timestamp);
+			pkts[pos + 2]->timestamp =
+				rte_be_to_cpu_64(cq[pos + p2].timestamp);
+			pkts[pos + 3]->timestamp =
+				rte_be_to_cpu_64(cq[pos + p3].timestamp);
+		}
 #ifdef MLX5_PMD_SOFT_COUNTERS
 		/* Add up received bytes count. */
 		byte_cnt = _mm_shuffle_epi8(op_own, len_shuf_mask);
@@ -1254,164 +989,9 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			rcvd_pkt += n;
 		}
 	}
-	rte_wmb();
-	*rxq->cq_db = htonl(rxq->cq_ci);
+	rte_compiler_barrier();
+	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
 	return rcvd_pkt;
 }
 
-/**
- * DPDK callback for vectorized RX.
- *
- * @param dpdk_rxq
- *   Generic pointer to RX queue structure.
- * @param[out] pkts
- *   Array to store received packets.
- * @param pkts_n
- *   Maximum number of packets in array.
- *
- * @return
- *   Number of packets successfully received (<= pkts_n).
- */
-uint16_t
-mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
-{
-	struct rxq *rxq = dpdk_rxq;
-	uint16_t nb_rx;
-
-	nb_rx = rxq_burst_v(rxq, pkts, pkts_n);
-	if (unlikely(rxq->pending_err))
-		nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
-	return nb_rx;
-}
-
-/**
- * Check Tx queue flags are set for raw vectorized Tx.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   1 if supported, negative errno value if not.
- */
-int __attribute__((cold))
-priv_check_raw_vec_tx_support(struct priv *priv)
-{
-	uint16_t i;
-
-	/* All the configured queues should support. */
-	for (i = 0; i < priv->txqs_n; ++i) {
-		struct txq *txq = (*priv->txqs)[i];
-
-		if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) ||
-		    !(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
-			break;
-	}
-	if (i != priv->txqs_n)
-		return -ENOTSUP;
-	return 1;
-}
-
-/**
- * Check a device can support vectorized TX.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   1 if supported, negative errno value if not.
- */
-int __attribute__((cold))
-priv_check_vec_tx_support(struct priv *priv)
-{
-	if (!priv->tx_vec_en ||
-	    priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
-	    priv->mps != MLX5_MPW_ENHANCED ||
-	    priv->tso)
-		return -ENOTSUP;
-	return 1;
-}
-
-/**
- * Check a RX queue can support vectorized RX.
- *
- * @param rxq
- *   Pointer to RX queue.
- *
- * @return
- *   1 if supported, negative errno value if not.
- */
-int __attribute__((cold))
-rxq_check_vec_support(struct rxq *rxq)
-{
-	struct rxq_ctrl *ctrl = container_of(rxq, struct rxq_ctrl, rxq);
-
-	if (!ctrl->priv->rx_vec_en || rxq->sges_n != 0)
-		return -ENOTSUP;
-	return 1;
-}
-
-/**
- * Check a device can support vectorized RX.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   1 if supported, negative errno value if not.
- */
-int __attribute__((cold))
-priv_check_vec_rx_support(struct priv *priv)
-{
-	uint16_t i;
-
-	if (!priv->rx_vec_en)
-		return -ENOTSUP;
-	/* All the configured queues should support. */
-	for (i = 0; i < priv->rxqs_n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
-
-		if (rxq_check_vec_support(rxq) < 0)
-			break;
-	}
-	if (i != priv->rxqs_n)
-		return -ENOTSUP;
-	return 1;
-}
-
-/**
- * Prepare for vectorized RX.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void
-priv_prep_vec_rx_function(struct priv *priv)
-{
-	uint16_t i;
-
-	for (i = 0; i < priv->rxqs_n; ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
-		struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
-		const uint16_t desc = 1 << rxq->elts_n;
-		int j;
-
-		assert(rxq->elts_n == rxq->cqe_n);
-		/* Initialize default rearm_data for vPMD. */
-		mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
-		rte_mbuf_refcnt_set(mbuf_init, 1);
-		mbuf_init->nb_segs = 1;
-		mbuf_init->port = rxq->port_id;
-		/*
-		 * prevent compiler reordering:
-		 * rearm_data covers previous fields.
-		 */
-		rte_compiler_barrier();
-		rxq->mbuf_initializer =
-			*(uint64_t *)&mbuf_init->rearm_data;
-		/* Padding with a fake mbuf for vectorized Rx. */
-		for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
-			(*rxq->elts)[desc + j] = &rxq->fake_mbuf;
-		/* Mark that it need to be cleaned up for rxq_alloc_elts(). */
-		rxq->trim_elts = 1;
-	}
-}
+#endif /* RTE_PMD_MLX5_RXTX_VEC_SSE_H_ */
diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c
new file mode 100644
index 00000000..5cd1ab80
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_socket.c
@@ -0,0 +1,294 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+
+/**
+ * Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+priv_socket_init(struct priv *priv)
+{
+	struct sockaddr_un sun = {
+		.sun_family = AF_UNIX,
+	};
+	int ret;
+	int flags;
+	struct stat file_stat;
+
+	/*
+	 * Initialise the socket to communicate with the secondary
+	 * process.
+	 */
+	ret = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (ret < 0) {
+		WARN("secondary process not supported: %s", strerror(errno));
+		return ret;
+	}
+	priv->primary_socket = ret;
+	flags = fcntl(priv->primary_socket, F_GETFL, 0);
+	if (flags == -1)
+		goto out;
+	ret = fcntl(priv->primary_socket, F_SETFL, flags | O_NONBLOCK);
+	if (ret < 0)
+		goto out;
+	snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+		 MLX5_DRIVER_NAME, priv->primary_socket);
+	ret = stat(sun.sun_path, &file_stat);
+	if (!ret)
+		claim_zero(remove(sun.sun_path));
+	ret = bind(priv->primary_socket, (const struct sockaddr *)&sun,
+		   sizeof(sun));
+	if (ret < 0) {
+		WARN("cannot bind socket, secondary process not supported: %s",
+		     strerror(errno));
+		goto close;
+	}
+	ret = listen(priv->primary_socket, 0);
+	if (ret < 0) {
+		WARN("Secondary process not supported: %s", strerror(errno));
+		goto close;
+	}
+	return ret;
+close:
+	remove(sun.sun_path);
+out:
+	claim_zero(close(priv->primary_socket));
+	priv->primary_socket = 0;
+	return -(ret);
+}
+
+/**
+ * Un-Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+priv_socket_uninit(struct priv *priv)
+{
+	MKSTR(path, "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->primary_socket);
+	claim_zero(close(priv->primary_socket));
+	priv->primary_socket = 0;
+	claim_zero(remove(path));
+	return 0;
+}
+
+/**
+ * Handle socket interrupts.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+priv_socket_handle(struct priv *priv)
+{
+	int conn_sock;
+	int ret = 0;
+	struct cmsghdr *cmsg = NULL;
+	struct ucred *cred = NULL;
+	char buf[CMSG_SPACE(sizeof(struct ucred))] = { 0 };
+	char vbuf[1024] = { 0 };
+	struct iovec io = {
+		.iov_base = vbuf,
+		.iov_len = sizeof(*vbuf),
+	};
+	struct msghdr msg = {
+		.msg_iov = &io,
+		.msg_iovlen = 1,
+		.msg_control = buf,
+		.msg_controllen = sizeof(buf),
+	};
+	int *fd;
+
+	/* Accept the connection from the client. */
+	conn_sock = accept(priv->primary_socket, NULL, NULL);
+	if (conn_sock < 0) {
+		WARN("connection failed: %s", strerror(errno));
+		return;
+	}
+	ret = setsockopt(conn_sock, SOL_SOCKET, SO_PASSCRED, &(int){1},
+					 sizeof(int));
+	if (ret < 0) {
+		WARN("cannot change socket options");
+		goto out;
+	}
+	ret = recvmsg(conn_sock, &msg, MSG_WAITALL);
+	if (ret < 0) {
+		WARN("received an empty message: %s", strerror(errno));
+		goto out;
+	}
+	/* Expect to receive credentials only. */
+	cmsg = CMSG_FIRSTHDR(&msg);
+	if (cmsg == NULL) {
+		WARN("no message");
+		goto out;
+	}
+	if ((cmsg->cmsg_type == SCM_CREDENTIALS) &&
+		(cmsg->cmsg_len >= sizeof(*cred))) {
+		cred = (struct ucred *)CMSG_DATA(cmsg);
+		assert(cred != NULL);
+	}
+	cmsg = CMSG_NXTHDR(&msg, cmsg);
+	if (cmsg != NULL) {
+		WARN("Message wrongly formatted");
+		goto out;
+	}
+	/* Make sure all the ancillary data was received and valid. */
+	if ((cred == NULL) || (cred->uid != getuid()) ||
+	    (cred->gid != getgid())) {
+		WARN("wrong credentials");
+		goto out;
+	}
+	/* Set-up the ancillary data. */
+	cmsg = CMSG_FIRSTHDR(&msg);
+	assert(cmsg != NULL);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(priv->ctx->cmd_fd));
+	fd = (int *)CMSG_DATA(cmsg);
+	*fd = priv->ctx->cmd_fd;
+	ret = sendmsg(conn_sock, &msg, 0);
+	if (ret < 0)
+		WARN("cannot send response");
+out:
+	close(conn_sock);
+}
+
+/**
+ * Connect to the primary process.
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   fd on success, negative errno value on failure.
+ */
+int
+priv_socket_connect(struct priv *priv)
+{
+	struct sockaddr_un sun = {
+		.sun_family = AF_UNIX,
+	};
+	int socket_fd;
+	int *fd = NULL;
+	int ret;
+	struct ucred *cred;
+	char buf[CMSG_SPACE(sizeof(*cred))] = { 0 };
+	char vbuf[1024] = { 0 };
+	struct iovec io = {
+		.iov_base = vbuf,
+		.iov_len = sizeof(*vbuf),
+	};
+	struct msghdr msg = {
+		.msg_control = buf,
+		.msg_controllen = sizeof(buf),
+		.msg_iov = &io,
+		.msg_iovlen = 1,
+	};
+	struct cmsghdr *cmsg;
+
+	ret = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (ret < 0) {
+		WARN("cannot connect to primary");
+		return ret;
+	}
+	socket_fd = ret;
+	snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+		 MLX5_DRIVER_NAME, priv->primary_socket);
+	ret = connect(socket_fd, (const struct sockaddr *)&sun, sizeof(sun));
+	if (ret < 0) {
+		WARN("cannot connect to primary");
+		goto out;
+	}
+	cmsg = CMSG_FIRSTHDR(&msg);
+	if (cmsg == NULL) {
+		DEBUG("cannot get first message");
+		goto out;
+	}
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_CREDENTIALS;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(*cred));
+	cred = (struct ucred *)CMSG_DATA(cmsg);
+	if (cred == NULL) {
+		DEBUG("no credentials received");
+		goto out;
+	}
+	cred->pid = getpid();
+	cred->uid = getuid();
+	cred->gid = getgid();
+	ret = sendmsg(socket_fd, &msg, MSG_DONTWAIT);
+	if (ret < 0) {
+		WARN("cannot send credentials to primary: %s",
+		     strerror(errno));
+		goto out;
+	}
+	ret = recvmsg(socket_fd, &msg, MSG_WAITALL);
+	if (ret <= 0) {
+		WARN("no message from primary: %s", strerror(errno));
+		goto out;
+	}
+	cmsg = CMSG_FIRSTHDR(&msg);
+	if (cmsg == NULL) {
+		WARN("No file descriptor received");
+		goto out;
+	}
+	fd = (int *)CMSG_DATA(cmsg);
+	if (*fd <= 0) {
+		WARN("no file descriptor received: %s", strerror(errno));
+		ret = *fd;
+		goto out;
+	}
+	ret = *fd;
+out:
+	close(socket_fd);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 703f48c3..5e225d37 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -34,16 +34,9 @@
 #include <linux/sockios.h>
 #include <linux/ethtool.h>
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_ethdev.h>
 #include <rte_common.h>
 #include <rte_malloc.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
@@ -325,7 +318,7 @@ priv_xstats_reset(struct priv *priv)
  * @param[out] stats
  *   Stats structure output buffer.
  */
-void
+int
 mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct priv *priv = mlx5_get_priv(dev);
@@ -336,7 +329,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	priv_lock(priv);
 	/* Add software counters. */
 	for (i = 0; (i != priv->rxqs_n); ++i) {
-		struct rxq *rxq = (*priv->rxqs)[i];
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
 
 		if (rxq == NULL)
 			continue;
@@ -357,7 +350,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		tmp.rx_nombuf += rxq->stats.rx_nombuf;
 	}
 	for (i = 0; (i != priv->txqs_n); ++i) {
-		struct txq *txq = (*priv->txqs)[i];
+		struct mlx5_txq_data *txq = (*priv->txqs)[i];
 
 		if (txq == NULL)
 			continue;
@@ -367,19 +360,20 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 			tmp.q_opackets[idx] += txq->stats.opackets;
 			tmp.q_obytes[idx] += txq->stats.obytes;
 #endif
-			tmp.q_errors[idx] += txq->stats.odropped;
+			tmp.q_errors[idx] += txq->stats.oerrors;
 		}
 #ifdef MLX5_PMD_SOFT_COUNTERS
 		tmp.opackets += txq->stats.opackets;
 		tmp.obytes += txq->stats.obytes;
 #endif
-		tmp.oerrors += txq->stats.odropped;
+		tmp.oerrors += txq->stats.oerrors;
 	}
 #ifndef MLX5_PMD_SOFT_COUNTERS
 	/* FIXME: retrieve and add hardware counters. */
 #endif
 	*stats = tmp;
 	priv_unlock(priv);
+	return 0;
 }
 
 /**
@@ -442,8 +436,10 @@ mlx5_xstats_get(struct rte_eth_dev *dev,
 
 		priv_lock(priv);
 		stats_n = priv_ethtool_get_stats_n(priv);
-		if (stats_n < 0)
+		if (stats_n < 0) {
+			priv_unlock(priv);
 			return -1;
+		}
 		if (xstats_ctrl->stats_n != stats_n)
 			priv_xstats_init(priv);
 		ret = priv_xstats_get(priv, stats);
@@ -468,10 +464,11 @@ mlx5_xstats_reset(struct rte_eth_dev *dev)
 	priv_lock(priv);
 	stats_n = priv_ethtool_get_stats_n(priv);
 	if (stats_n < 0)
-		return;
+		goto unlock;
 	if (xstats_ctrl->stats_n != stats_n)
 		priv_xstats_init(priv);
 	priv_xstats_reset(priv);
+unlock:
 	priv_unlock(priv);
 }
 
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 595a9e06..5de2d026 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -30,23 +30,90 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
+#include <unistd.h>
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_interrupts.h>
 #include <rte_alarm.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
+static void
+priv_txq_stop(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; i != priv->txqs_n; ++i)
+		mlx5_priv_txq_release(priv, i);
+}
+
+static int
+priv_txq_start(struct priv *priv)
+{
+	unsigned int i;
+	int ret = 0;
+
+	/* Add memory regions to Tx queues. */
+	for (i = 0; i != priv->txqs_n; ++i) {
+		unsigned int idx = 0;
+		struct mlx5_mr *mr;
+		struct mlx5_txq_ctrl *txq_ctrl = mlx5_priv_txq_get(priv, i);
+
+		if (!txq_ctrl)
+			continue;
+		LIST_FOREACH(mr, &priv->mr, next)
+			priv_txq_mp2mr_reg(priv, &txq_ctrl->txq, mr->mp, idx++);
+		txq_alloc_elts(txq_ctrl);
+		txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i);
+		if (!txq_ctrl->ibv) {
+			ret = ENOMEM;
+			goto error;
+		}
+	}
+	return -ret;
+error:
+	priv_txq_stop(priv);
+	return -ret;
+}
+
+static void
+priv_rxq_stop(struct priv *priv)
+{
+	unsigned int i;
+
+	for (i = 0; i != priv->rxqs_n; ++i)
+		mlx5_priv_rxq_release(priv, i);
+}
+
+static int
+priv_rxq_start(struct priv *priv)
+{
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i != priv->rxqs_n; ++i) {
+		struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_priv_rxq_get(priv, i);
+
+		if (!rxq_ctrl)
+			continue;
+		ret = rxq_alloc_elts(rxq_ctrl);
+		if (ret)
+			goto error;
+		rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, i);
+		if (!rxq_ctrl->ibv) {
+			ret = ENOMEM;
+			goto error;
+		}
+	}
+	return -ret;
+error:
+	priv_rxq_stop(priv);
+	return -ret;
+}
+
 /**
  * DPDK callback to start the device.
  *
@@ -62,36 +129,47 @@ int
 mlx5_dev_start(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct mlx5_mr *mr = NULL;
 	int err;
 
 	if (mlx5_is_secondary())
 		return -E_RTE_SECONDARY;
 
+	dev->data->dev_started = 1;
 	priv_lock(priv);
-	if (priv->started) {
-		priv_unlock(priv);
-		return 0;
+	err = priv_flow_create_drop_queue(priv);
+	if (err) {
+		ERROR("%p: Drop queue allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
 	}
-	/* Update Rx/Tx callback. */
-	priv_select_tx_function(priv);
-	priv_select_rx_function(priv);
 	DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
-	err = priv_create_hash_rxqs(priv);
-	if (!err)
-		err = priv_rehash_flows(priv);
-	if (!err)
-		priv->started = 1;
-	else {
-		ERROR("%p: an error occurred while configuring hash RX queues:"
+	rte_mempool_walk(mlx5_mp2mr_iter, priv);
+	err = priv_txq_start(priv);
+	if (err) {
+		ERROR("%p: TXQ allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
+	/* Update send callback. */
+	priv_dev_select_tx_function(priv, dev);
+	err = priv_rxq_start(priv);
+	if (err) {
+		ERROR("%p: RXQ allocation failed: %s",
+		      (void *)dev, strerror(err));
+		goto error;
+	}
+	/* Update receive callback. */
+	priv_dev_select_rx_function(priv, dev);
+	err = priv_dev_traffic_enable(priv, dev);
+	if (err) {
+		ERROR("%p: an error occurred while configuring control flows:"
 		      " %s",
 		      (void *)priv, strerror(err));
 		goto error;
 	}
-	if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE)
-		priv_fdir_enable(priv);
-	err = priv_flow_start(priv);
+	err = priv_flow_start(priv, &priv->flows);
 	if (err) {
-		priv->started = 0;
 		ERROR("%p: an error occurred while configuring flows:"
 		      " %s",
 		      (void *)priv, strerror(err));
@@ -109,10 +187,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	return 0;
 error:
 	/* Rollback. */
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
-	priv_destroy_hash_rxqs(priv);
-	priv_flow_stop(priv);
+	dev->data->dev_started = 0;
+	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
+		priv_mr_release(priv, mr);
+	priv_flow_stop(priv, &priv->flows);
+	priv_dev_traffic_disable(priv, dev);
+	priv_txq_stop(priv);
+	priv_rxq_stop(priv);
+	priv_flow_delete_drop_queue(priv);
 	priv_unlock(priv);
 	return -err;
 }
@@ -129,23 +211,215 @@ void
 mlx5_dev_stop(struct rte_eth_dev *dev)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct mlx5_mr *mr;
 
 	if (mlx5_is_secondary())
 		return;
 
 	priv_lock(priv);
-	if (!priv->started) {
-		priv_unlock(priv);
-		return;
-	}
+	dev->data->dev_started = 0;
+	/* Prevent crashes when queues are still in use. */
+	dev->rx_pkt_burst = removed_rx_burst;
+	dev->tx_pkt_burst = removed_tx_burst;
+	rte_wmb();
+	usleep(1000 * priv->rxqs_n);
 	DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
-	priv_special_flow_disable_all(priv);
-	priv_mac_addrs_disable(priv);
-	priv_destroy_hash_rxqs(priv);
-	priv_fdir_disable(priv);
-	priv_flow_stop(priv);
+	priv_flow_stop(priv, &priv->flows);
+	priv_dev_traffic_disable(priv, dev);
 	priv_rx_intr_vec_disable(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
-	priv->started = 0;
+	priv_txq_stop(priv);
+	priv_rxq_stop(priv);
+	for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
+		priv_mr_release(priv, mr);
+	priv_flow_delete_drop_queue(priv);
+	priv_unlock(priv);
+}
+
+/**
+ * Enable traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev)
+{
+	struct rte_flow_item_eth bcast = {
+		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+	};
+	struct rte_flow_item_eth ipv6_multi_spec = {
+		.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
+	};
+	struct rte_flow_item_eth ipv6_multi_mask = {
+		.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
+	};
+	struct rte_flow_item_eth unicast = {
+		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+	};
+	struct rte_flow_item_eth unicast_mask = {
+		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+	};
+	const unsigned int vlan_filter_n = priv->vlan_filter_n;
+	const struct ether_addr cmp = {
+		.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+	};
+	unsigned int i;
+	unsigned int j;
+	int ret;
+
+	if (priv->isolated)
+		return 0;
+	if (dev->data->promiscuous) {
+		struct rte_flow_item_eth promisc = {
+			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.type = 0,
+		};
+
+		claim_zero(mlx5_ctrl_flow(dev, &promisc, &promisc));
+		return 0;
+	}
+	if (dev->data->all_multicast) {
+		struct rte_flow_item_eth multicast = {
+			.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+			.type = 0,
+		};
+
+		claim_zero(mlx5_ctrl_flow(dev, &multicast, &multicast));
+	} else {
+		/* Add broadcast/multicast flows. */
+		for (i = 0; i != vlan_filter_n; ++i) {
+			uint16_t vlan = priv->vlan_filter[i];
+
+			struct rte_flow_item_vlan vlan_spec = {
+				.tci = rte_cpu_to_be_16(vlan),
+			};
+			struct rte_flow_item_vlan vlan_mask = {
+				.tci = 0xffff,
+			};
+
+			ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
+						  &vlan_spec, &vlan_mask);
+			if (ret)
+				goto error;
+			ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
+						  &ipv6_multi_mask,
+						  &vlan_spec, &vlan_mask);
+			if (ret)
+				goto error;
+		}
+		if (!vlan_filter_n) {
+			ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
+			if (ret)
+				goto error;
+			ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
+					     &ipv6_multi_mask);
+			if (ret)
+				goto error;
+		}
+	}
+	/* Add MAC address flows. */
+	for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
+		struct ether_addr *mac = &dev->data->mac_addrs[i];
+
+		if (!memcmp(mac, &cmp, sizeof(*mac)))
+			continue;
+		memcpy(&unicast.dst.addr_bytes,
+		       mac->addr_bytes,
+		       ETHER_ADDR_LEN);
+		for (j = 0; j != vlan_filter_n; ++j) {
+			uint16_t vlan = priv->vlan_filter[j];
+
+			struct rte_flow_item_vlan vlan_spec = {
+				.tci = rte_cpu_to_be_16(vlan),
+			};
+			struct rte_flow_item_vlan vlan_mask = {
+				.tci = 0xffff,
+			};
+
+			ret = mlx5_ctrl_flow_vlan(dev, &unicast,
+						  &unicast_mask,
+						  &vlan_spec,
+						  &vlan_mask);
+			if (ret)
+				goto error;
+		}
+		if (!vlan_filter_n) {
+			ret = mlx5_ctrl_flow(dev, &unicast,
+					     &unicast_mask);
+			if (ret)
+				goto error;
+		}
+	}
+	return 0;
+error:
+	return rte_errno;
+}
+
+
+/**
+ * Disable traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_disable(struct priv *priv, struct rte_eth_dev *dev)
+{
+	(void)dev;
+	priv_flow_flush(priv, &priv->ctrl_flows);
+	return 0;
+}
+
+/**
+ * Restart traffic flows configured by control plane
+ *
+ * @param priv
+ *   Pointer to Ethernet device private data.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+priv_dev_traffic_restart(struct priv *priv, struct rte_eth_dev *dev)
+{
+	if (dev->data->dev_started) {
+		priv_dev_traffic_disable(priv, dev);
+		priv_dev_traffic_enable(priv, dev);
+	}
+	return 0;
+}
+
+/**
+ * Restart traffic flows configured by control plane
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success.
+ */
+int
+mlx5_traffic_restart(struct rte_eth_dev *dev)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	priv_dev_traffic_restart(priv, dev);
 	priv_unlock(priv);
+	return 0;
 }
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 98aaa7ca..9c5860ff 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -36,6 +36,8 @@
 #include <errno.h>
 #include <string.h>
 #include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -47,17 +49,10 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_common.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5_utils.h"
 #include "mlx5_defs.h"
@@ -70,23 +65,15 @@
  *
  * @param txq_ctrl
  *   Pointer to TX queue structure.
- * @param elts_n
- *   Number of elements to allocate.
  */
-static void
-txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
+void
+txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
+	const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n;
 	unsigned int i;
 
 	for (i = 0; (i != elts_n); ++i)
 		(*txq_ctrl->txq.elts)[i] = NULL;
-	for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
-		volatile struct mlx5_wqe64 *wqe =
-			(volatile struct mlx5_wqe64 *)
-			txq_ctrl->txq.wqes + i;
-
-		memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
-	}
 	DEBUG("%p: allocated and configured %u WRs", (void *)txq_ctrl, elts_n);
 	txq_ctrl->txq.elts_head = 0;
 	txq_ctrl->txq.elts_tail = 0;
@@ -100,7 +87,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
  *   Pointer to TX queue structure.
  */
 static void
-txq_free_elts(struct txq_ctrl *txq_ctrl)
+txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
 {
 	const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n;
 	const uint16_t elts_m = elts_n - 1;
@@ -129,155 +116,231 @@ txq_free_elts(struct txq_ctrl *txq_ctrl)
 }
 
 /**
- * Clean up a TX queue.
+ * DPDK callback to configure a TX queue.
  *
- * Destroy objects, free allocated memory and reset the structure for reuse.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   TX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param[in] conf
+ *   Thresholds parameters.
  *
- * @param txq_ctrl
- *   Pointer to TX queue structure.
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+		    unsigned int socket, const struct rte_eth_txconf *conf)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_txq_data *txq = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq, struct mlx5_txq_ctrl, txq);
+	int ret = 0;
+
+	if (mlx5_is_secondary())
+		return -E_RTE_SECONDARY;
+
+	priv_lock(priv);
+	if (desc <= MLX5_TX_COMP_THRESH) {
+		WARN("%p: number of descriptors requested for TX queue %u"
+		     " must be higher than MLX5_TX_COMP_THRESH, using"
+		     " %u instead of %u",
+		     (void *)dev, idx, MLX5_TX_COMP_THRESH + 1, desc);
+		desc = MLX5_TX_COMP_THRESH + 1;
+	}
+	if (!rte_is_power_of_2(desc)) {
+		desc = 1 << log2above(desc);
+		WARN("%p: increased number of descriptors in TX queue %u"
+		     " to the next power of two (%d)",
+		     (void *)dev, idx, desc);
+	}
+	DEBUG("%p: configuring queue %u for %u descriptors",
+	      (void *)dev, idx, desc);
+	if (idx >= priv->txqs_n) {
+		ERROR("%p: queue index out of range (%u >= %u)",
+		      (void *)dev, idx, priv->txqs_n);
+		priv_unlock(priv);
+		return -EOVERFLOW;
+	}
+	if (!mlx5_priv_txq_releasable(priv, idx)) {
+		ret = EBUSY;
+		ERROR("%p: unable to release queue index %u",
+		      (void *)dev, idx);
+		goto out;
+	}
+	mlx5_priv_txq_release(priv, idx);
+	txq_ctrl = mlx5_priv_txq_new(priv, idx, desc, socket, conf);
+	if (!txq_ctrl) {
+		ERROR("%p: unable to allocate queue index %u",
+		      (void *)dev, idx);
+		ret = ENOMEM;
+		goto out;
+	}
+	DEBUG("%p: adding TX queue %p to list",
+	      (void *)dev, (void *)txq_ctrl);
+	(*priv->txqs)[idx] = &txq_ctrl->txq;
+out:
+	priv_unlock(priv);
+	return -ret;
+}
+
+/**
+ * DPDK callback to release a TX queue.
+ *
+ * @param dpdk_txq
+ *   Generic TX queue pointer.
  */
 void
-txq_cleanup(struct txq_ctrl *txq_ctrl)
+mlx5_tx_queue_release(void *dpdk_txq)
 {
-	size_t i;
-
-	DEBUG("cleaning up %p", (void *)txq_ctrl);
-	txq_free_elts(txq_ctrl);
-	if (txq_ctrl->qp != NULL)
-		claim_zero(ibv_destroy_qp(txq_ctrl->qp));
-	if (txq_ctrl->cq != NULL)
-		claim_zero(ibv_destroy_cq(txq_ctrl->cq));
-	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		if (txq_ctrl->txq.mp2mr[i].mr == NULL)
+	struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
+	struct priv *priv;
+	unsigned int i;
+
+	if (mlx5_is_secondary())
+		return;
+
+	if (txq == NULL)
+		return;
+	txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+	priv = txq_ctrl->priv;
+	priv_lock(priv);
+	for (i = 0; (i != priv->txqs_n); ++i)
+		if ((*priv->txqs)[i] == txq) {
+			DEBUG("%p: removing TX queue %p from list",
+			      (void *)priv->dev, (void *)txq_ctrl);
+			mlx5_priv_txq_release(priv, i);
 			break;
-		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr));
-	}
-	memset(txq_ctrl, 0, sizeof(*txq_ctrl));
+		}
+	priv_unlock(priv);
 }
 
+
 /**
- * Initialize TX queue.
+ * Map locally UAR used in Tx queues for BlueFlame doorbell.
  *
- * @param tmpl
- *   Pointer to TX queue control template.
- * @param txq_ctrl
- *   Pointer to TX queue control.
+ * @param[in] priv
+ *   Pointer to private structure.
+ * @param fd
+ *   Verbs file descriptor to map UAR pages.
  *
  * @return
  *   0 on success, errno value on failure.
  */
-static inline int
-txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
+int
+priv_tx_uar_remap(struct priv *priv, int fd)
 {
-	struct mlx5_qp *qp = to_mqp(tmpl->qp);
-	struct ibv_cq *ibcq = tmpl->cq;
-	struct ibv_mlx5_cq_info cq_info;
+	unsigned int i, j;
+	uintptr_t pages[priv->txqs_n];
+	unsigned int pages_n = 0;
+	uintptr_t uar_va;
+	void *addr;
+	struct mlx5_txq_data *txq;
+	struct mlx5_txq_ctrl *txq_ctrl;
+	int already_mapped;
+	size_t page_size = sysconf(_SC_PAGESIZE);
 
-	if (ibv_mlx5_exp_get_cq_info(ibcq, &cq_info)) {
-		ERROR("Unable to query CQ info. check your OFED.");
-		return ENOTSUP;
-	}
-	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
-		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
-		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
-		return EINVAL;
+	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
+	/*
+	 * As rdma-core, UARs are mapped in size of OS page size.
+	 * Use aligned address to avoid duplicate mmap.
+	 * Ref to libmlx5 function: mlx5_init_context()
+	 */
+	for (i = 0; i != priv->txqs_n; ++i) {
+		txq = (*priv->txqs)[i];
+		txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+		uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
+		uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
+		already_mapped = 0;
+		for (j = 0; j != pages_n; ++j) {
+			if (pages[j] == uar_va) {
+				already_mapped = 1;
+				break;
+			}
+		}
+		if (already_mapped)
+			continue;
+		pages[pages_n++] = uar_va;
+		addr = mmap((void *)uar_va, page_size,
+			    PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+			    txq_ctrl->uar_mmap_offset);
+		if (addr != (void *)uar_va) {
+			ERROR("call to mmap failed on UAR for txq %d\n", i);
+			return -1;
+		}
 	}
-	tmpl->txq.cqe_n = log2above(cq_info.cqe_cnt);
-	tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
-	tmpl->txq.wqes = qp->gen_data.sqstart;
-	tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt);
-	tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
-	tmpl->txq.bf_reg = qp->gen_data.bf->reg;
-	tmpl->txq.cq_db = cq_info.dbrec;
-	tmpl->txq.cqes =
-		(volatile struct mlx5_cqe (*)[])
-		(uintptr_t)cq_info.buf;
-	tmpl->txq.elts =
-		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
-		((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
 	return 0;
 }
 
 /**
- * Configure a TX queue.
+ * Create the Tx queue Verbs object.
  *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param txq_ctrl
- *   Pointer to TX queue structure.
- * @param desc
- *   Number of descriptors to configure in queue.
- * @param socket
- *   NUMA socket on which memory must be allocated.
- * @param[in] conf
- *   Thresholds parameters.
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
  *
  * @return
- *   0 on success, errno value on failure.
+ *   The Verbs object initialised if it can be created.
  */
-int
-txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
-	       uint16_t desc, unsigned int socket,
-	       const struct rte_eth_txconf *conf)
+struct mlx5_txq_ibv*
+mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
 {
-	struct priv *priv = mlx5_get_priv(dev);
-	struct txq_ctrl tmpl = {
-		.priv = priv,
-		.socket = socket,
-	};
+	struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
+	struct mlx5_txq_ctrl *txq_ctrl =
+		container_of(txq_data, struct mlx5_txq_ctrl, txq);
+	struct mlx5_txq_ibv tmpl;
+	struct mlx5_txq_ibv *txq_ibv;
 	union {
-		struct ibv_exp_qp_init_attr init;
-		struct ibv_exp_cq_init_attr cq;
-		struct ibv_exp_qp_attr mod;
-		struct ibv_exp_cq_attr cq_attr;
+		struct ibv_qp_init_attr_ex init;
+		struct ibv_cq_init_attr_ex cq;
+		struct ibv_qp_attr mod;
+		struct ibv_cq_ex cq_attr;
 	} attr;
 	unsigned int cqe_n;
-	const unsigned int max_tso_inline = ((MLX5_MAX_TSO_HEADER +
-					     (RTE_CACHE_LINE_SIZE - 1)) /
-					      RTE_CACHE_LINE_SIZE);
+	struct mlx5dv_qp qp = { .comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET };
+	struct mlx5dv_cq cq_info;
+	struct mlx5dv_obj obj;
+	const int desc = 1 << txq_data->elts_n;
 	int ret = 0;
 
+	assert(txq_data);
 	if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
-		ret = ENOTSUP;
 		ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
 		goto error;
 	}
-	tmpl.txq.flags = conf->txq_flags;
-	assert(desc > MLX5_TX_COMP_THRESH);
-	tmpl.txq.elts_n = log2above(desc);
-	if (priv->mps == MLX5_MPW_ENHANCED)
-		tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
+	memset(&tmpl, 0, sizeof(struct mlx5_txq_ibv));
 	/* MRs will be registered in mp2mr[] later. */
-	attr.cq = (struct ibv_exp_cq_init_attr){
+	attr.cq = (struct ibv_cq_init_attr_ex){
 		.comp_mask = 0,
 	};
 	cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
 		((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
 	if (priv->mps == MLX5_MPW_ENHANCED)
 		cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
-	tmpl.cq = ibv_exp_create_cq(priv->ctx,
-				    cqe_n,
-				    NULL, NULL, 0, &attr.cq);
+	tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, NULL, 0);
 	if (tmpl.cq == NULL) {
-		ret = ENOMEM;
-		ERROR("%p: CQ creation failure: %s",
-		      (void *)dev, strerror(ret));
+		ERROR("%p: CQ creation failure", (void *)txq_ctrl);
 		goto error;
 	}
-	DEBUG("priv->device_attr.max_qp_wr is %d",
-	      priv->device_attr.max_qp_wr);
-	DEBUG("priv->device_attr.max_sge is %d",
-	      priv->device_attr.max_sge);
-	attr.init = (struct ibv_exp_qp_init_attr){
+	attr.init = (struct ibv_qp_init_attr_ex){
 		/* CQ to be associated with the send queue. */
 		.send_cq = tmpl.cq,
 		/* CQ to be associated with the receive queue. */
 		.recv_cq = tmpl.cq,
 		.cap = {
 			/* Max number of outstanding WRs. */
-			.max_send_wr = ((priv->device_attr.max_qp_wr < desc) ?
-					priv->device_attr.max_qp_wr :
-					desc),
+			.max_send_wr =
+				((priv->device_attr.orig_attr.max_qp_wr <
+				  desc) ?
+				 priv->device_attr.orig_attr.max_qp_wr :
+				 desc),
 			/*
 			 * Max number of scatter/gather elements in a WR,
 			 * must be 1 to prevent libmlx5 from trying to affect
@@ -288,124 +351,204 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
 			.max_send_sge = 1,
 		},
 		.qp_type = IBV_QPT_RAW_PACKET,
-		/* Do *NOT* enable this, completions events are managed per
-		 * TX burst. */
+		/*
+		 * Do *NOT* enable this, completions events are managed per
+		 * Tx burst.
+		 */
 		.sq_sig_all = 0,
 		.pd = priv->pd,
-		.comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
+		.comp_mask = IBV_QP_INIT_ATTR_PD,
 	};
-	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
-		tmpl.txq.max_inline =
-			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
-			 RTE_CACHE_LINE_SIZE);
-		tmpl.txq.inline_en = 1;
-		/* TSO and MPS can't be enabled concurrently. */
-		assert(!priv->tso || !priv->mps);
-		if (priv->mps == MLX5_MPW_ENHANCED) {
-			tmpl.txq.inline_max_packet_sz =
-				priv->inline_max_packet_sz;
-			/* To minimize the size of data set, avoid requesting
-			 * too large WQ.
-			 */
-			attr.init.cap.max_inline_data =
-				((RTE_MIN(priv->txq_inline,
-					  priv->inline_max_packet_sz) +
-				  (RTE_CACHE_LINE_SIZE - 1)) /
-				 RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
-		} else if (priv->tso) {
-			int inline_diff = tmpl.txq.max_inline - max_tso_inline;
-
-			/*
-			 * Adjust inline value as Verbs aggregates
-			 * tso_inline and txq_inline fields.
-			 */
-			attr.init.cap.max_inline_data = inline_diff > 0 ?
-							inline_diff *
-							RTE_CACHE_LINE_SIZE :
-							0;
-		} else {
-			attr.init.cap.max_inline_data =
-				tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
-		}
+	if (txq_data->inline_en)
+		attr.init.cap.max_inline_data = txq_ctrl->max_inline_data;
+	if (txq_data->tso_en) {
+		attr.init.max_tso_header = txq_ctrl->max_tso_header;
+		attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
 	}
-	if (priv->tso) {
-		attr.init.max_tso_header =
-			max_tso_inline * RTE_CACHE_LINE_SIZE;
-		attr.init.comp_mask |= IBV_EXP_QP_INIT_ATTR_MAX_TSO_HEADER;
-		tmpl.txq.max_inline = RTE_MAX(tmpl.txq.max_inline,
-					      max_tso_inline);
-		tmpl.txq.tso_en = 1;
-	}
-	if (priv->tunnel_en)
-		tmpl.txq.tunnel_en = 1;
-	tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
+	tmpl.qp = ibv_create_qp_ex(priv->ctx, &attr.init);
 	if (tmpl.qp == NULL) {
-		ret = (errno ? errno : EINVAL);
-		ERROR("%p: QP creation failure: %s",
-		      (void *)dev, strerror(ret));
+		ERROR("%p: QP creation failure", (void *)txq_ctrl);
 		goto error;
 	}
-	DEBUG("TX queue capabilities: max_send_wr=%u, max_send_sge=%u,"
-	      " max_inline_data=%u",
-	      attr.init.cap.max_send_wr,
-	      attr.init.cap.max_send_sge,
-	      attr.init.cap.max_inline_data);
-	attr.mod = (struct ibv_exp_qp_attr){
+	attr.mod = (struct ibv_qp_attr){
 		/* Move the QP to this state. */
 		.qp_state = IBV_QPS_INIT,
 		/* Primary port number. */
 		.port_num = priv->port
 	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod,
-				(IBV_EXP_QP_STATE | IBV_EXP_QP_PORT));
+	ret = ibv_modify_qp(tmpl.qp, &attr.mod, (IBV_QP_STATE | IBV_QP_PORT));
 	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
-		      (void *)dev, strerror(ret));
+		ERROR("%p: QP state to IBV_QPS_INIT failed", (void *)txq_ctrl);
 		goto error;
 	}
-	ret = txq_setup(&tmpl, txq_ctrl);
-	if (ret) {
-		ERROR("%p: cannot initialize TX queue structure: %s",
-		      (void *)dev, strerror(ret));
-		goto error;
-	}
-	txq_alloc_elts(&tmpl, desc);
-	attr.mod = (struct ibv_exp_qp_attr){
+	attr.mod = (struct ibv_qp_attr){
 		.qp_state = IBV_QPS_RTR
 	};
-	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
+	ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
 	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-		      (void *)dev, strerror(ret));
+		ERROR("%p: QP state to IBV_QPS_RTR failed", (void *)txq_ctrl);
 		goto error;
 	}
 	attr.mod.qp_state = IBV_QPS_RTS;
-	ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
+	ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
 	if (ret) {
-		ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
-		      (void *)dev, strerror(ret));
+		ERROR("%p: QP state to IBV_QPS_RTS failed", (void *)txq_ctrl);
 		goto error;
 	}
-	/* Clean up txq in case we're reinitializing it. */
-	DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
-	txq_cleanup(txq_ctrl);
-	*txq_ctrl = tmpl;
-	DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
-	/* Pre-register known mempools. */
-	rte_mempool_walk(txq_mp2mr_iter, txq_ctrl);
-	assert(ret == 0);
-	return 0;
+	txq_ibv = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_ibv), 0,
+				    txq_ctrl->socket);
+	if (!txq_ibv) {
+		ERROR("%p: cannot allocate memory", (void *)txq_ctrl);
+		goto error;
+	}
+	obj.cq.in = tmpl.cq;
+	obj.cq.out = &cq_info;
+	obj.qp.in = tmpl.qp;
+	obj.qp.out = &qp;
+	ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
+	if (ret != 0)
+		goto error;
+	if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
+		goto error;
+	}
+	txq_data->cqe_n = log2above(cq_info.cqe_cnt);
+	txq_data->qp_num_8s = tmpl.qp->qp_num << 8;
+	txq_data->wqes = qp.sq.buf;
+	txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
+	txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
+	txq_data->bf_reg = qp.bf.reg;
+	txq_data->cq_db = cq_info.dbrec;
+	txq_data->cqes =
+		(volatile struct mlx5_cqe (*)[])
+		(uintptr_t)cq_info.buf;
+	txq_data->cq_ci = 0;
+	txq_data->cq_pi = 0;
+	txq_data->wqe_ci = 0;
+	txq_data->wqe_pi = 0;
+	txq_ibv->qp = tmpl.qp;
+	txq_ibv->cq = tmpl.cq;
+	rte_atomic32_inc(&txq_ibv->refcnt);
+	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
+		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
+	} else {
+		ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
+		goto error;
+	}
+	DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
+	LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next);
+	return txq_ibv;
 error:
-	txq_cleanup(&tmpl);
-	assert(ret > 0);
+	if (tmpl.cq)
+		claim_zero(ibv_destroy_cq(tmpl.cq));
+	if (tmpl.qp)
+		claim_zero(ibv_destroy_qp(tmpl.qp));
+	return NULL;
+}
+
+/**
+ * Get an Tx queue Verbs object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   Queue index in DPDK Rx queue array
+ *
+ * @return
+ *   The Verbs object if it exists.
+ */
+struct mlx5_txq_ibv*
+mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_ctrl *txq_ctrl;
+
+	if (idx >= priv->txqs_n)
+		return NULL;
+	if (!(*priv->txqs)[idx])
+		return NULL;
+	txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	if (txq_ctrl->ibv) {
+		rte_atomic32_inc(&txq_ctrl->ibv->refcnt);
+		DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+		      (void *)txq_ctrl->ibv,
+		      rte_atomic32_read(&txq_ctrl->ibv->refcnt));
+	}
+	return txq_ctrl->ibv;
+}
+
+/**
+ * Release an Tx verbs queue object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param txq_ibv
+ *   Verbs Tx queue object.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
+{
+	(void)priv;
+	assert(txq_ibv);
+	DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
+	if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
+		claim_zero(ibv_destroy_qp(txq_ibv->qp));
+		claim_zero(ibv_destroy_cq(txq_ibv->cq));
+		LIST_REMOVE(txq_ibv, next);
+		rte_free(txq_ibv);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param txq_ibv
+ *   Verbs Tx queue object.
+ */
+int
+mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
+{
+	(void)priv;
+	assert(txq_ibv);
+	return (rte_atomic32_read(&txq_ibv->refcnt) == 1);
+}
+
+/**
+ * Verify the Verbs Tx queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_txq_ibv_verify(struct priv *priv)
+{
+	int ret = 0;
+	struct mlx5_txq_ibv *txq_ibv;
+
+	LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
+		DEBUG("%p: Verbs Tx queue %p still referenced", (void *)priv,
+		      (void *)txq_ibv);
+		++ret;
+	}
 	return ret;
 }
 
 /**
- * DPDK callback to configure a TX queue.
+ * Create a DPDK Tx queue.
  *
- * @param dev
- *   Pointer to Ethernet device structure.
+ * @param priv
+ *   Pointer to private structure.
  * @param idx
  *   TX queue index.
  * @param desc
@@ -413,164 +556,236 @@ error:
  * @param socket
  *   NUMA socket on which memory must be allocated.
  * @param[in] conf
- *   Thresholds parameters.
+ *  Thresholds parameters.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   A DPDK queue object on success.
  */
-int
-mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
-		    unsigned int socket, const struct rte_eth_txconf *conf)
+struct mlx5_txq_ctrl*
+mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+		  unsigned int socket,
+		  const struct rte_eth_txconf *conf)
 {
-	struct priv *priv = dev->data->dev_private;
-	struct txq *txq = (*priv->txqs)[idx];
-	struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
-	int ret;
+	const unsigned int max_tso_inline =
+		((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) /
+		 RTE_CACHE_LINE_SIZE);
+	struct mlx5_txq_ctrl *tmpl;
 
-	if (mlx5_is_secondary())
-		return -E_RTE_SECONDARY;
+	tmpl = rte_calloc_socket("TXQ", 1,
+				 sizeof(*tmpl) +
+				 desc * sizeof(struct rte_mbuf *),
+				 0, socket);
+	if (!tmpl)
+		return NULL;
+	assert(desc > MLX5_TX_COMP_THRESH);
+	tmpl->txq.flags = conf->txq_flags;
+	tmpl->priv = priv;
+	tmpl->socket = socket;
+	tmpl->txq.elts_n = log2above(desc);
+	if (priv->mps == MLX5_MPW_ENHANCED)
+		tmpl->txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
+	/* MRs will be registered in mp2mr[] later. */
+	DEBUG("priv->device_attr.max_qp_wr is %d",
+	      priv->device_attr.orig_attr.max_qp_wr);
+	DEBUG("priv->device_attr.max_sge is %d",
+	      priv->device_attr.orig_attr.max_sge);
+	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+		unsigned int ds_cnt;
 
-	priv_lock(priv);
-	if (desc <= MLX5_TX_COMP_THRESH) {
-		WARN("%p: number of descriptors requested for TX queue %u"
-		     " must be higher than MLX5_TX_COMP_THRESH, using"
-		     " %u instead of %u",
-		     (void *)dev, idx, MLX5_TX_COMP_THRESH + 1, desc);
-		desc = MLX5_TX_COMP_THRESH + 1;
-	}
-	if (!rte_is_power_of_2(desc)) {
-		desc = 1 << log2above(desc);
-		WARN("%p: increased number of descriptors in TX queue %u"
-		     " to the next power of two (%d)",
-		     (void *)dev, idx, desc);
-	}
-	DEBUG("%p: configuring queue %u for %u descriptors",
-	      (void *)dev, idx, desc);
-	if (idx >= priv->txqs_n) {
-		ERROR("%p: queue index out of range (%u >= %u)",
-		      (void *)dev, idx, priv->txqs_n);
-		priv_unlock(priv);
-		return -EOVERFLOW;
-	}
-	if (txq != NULL) {
-		DEBUG("%p: reusing already allocated queue index %u (%p)",
-		      (void *)dev, idx, (void *)txq);
-		if (priv->started) {
-			priv_unlock(priv);
-			return -EEXIST;
-		}
-		(*priv->txqs)[idx] = NULL;
-		txq_cleanup(txq_ctrl);
-		/* Resize if txq size is changed. */
-		if (txq_ctrl->txq.elts_n != log2above(desc)) {
-			txq_ctrl = rte_realloc(txq_ctrl,
-					       sizeof(*txq_ctrl) +
-					       desc * sizeof(struct rte_mbuf *),
-					       RTE_CACHE_LINE_SIZE);
-			if (!txq_ctrl) {
-				ERROR("%p: unable to reallocate queue index %u",
-					(void *)dev, idx);
-				priv_unlock(priv);
-				return -ENOMEM;
-			}
+		tmpl->txq.max_inline =
+			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
+			 RTE_CACHE_LINE_SIZE);
+		tmpl->txq.inline_en = 1;
+		/* TSO and MPS can't be enabled concurrently. */
+		assert(!priv->tso || !priv->mps);
+		if (priv->mps == MLX5_MPW_ENHANCED) {
+			tmpl->txq.inline_max_packet_sz =
+				priv->inline_max_packet_sz;
+			/* To minimize the size of data set, avoid requesting
+			 * too large WQ.
+			 */
+			tmpl->max_inline_data =
+				((RTE_MIN(priv->txq_inline,
+					  priv->inline_max_packet_sz) +
+				  (RTE_CACHE_LINE_SIZE - 1)) /
+				 RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
+		} else if (priv->tso) {
+			int inline_diff = tmpl->txq.max_inline - max_tso_inline;
+
+			/*
+			 * Adjust inline value as Verbs aggregates
+			 * tso_inline and txq_inline fields.
+			 */
+			tmpl->max_inline_data = inline_diff > 0 ?
+					       inline_diff *
+					       RTE_CACHE_LINE_SIZE :
+					       0;
+		} else {
+			tmpl->max_inline_data =
+				tmpl->txq.max_inline * RTE_CACHE_LINE_SIZE;
 		}
-	} else {
-		txq_ctrl =
-			rte_calloc_socket("TXQ", 1,
-					  sizeof(*txq_ctrl) +
-					  desc * sizeof(struct rte_mbuf *),
-					  0, socket);
-		if (txq_ctrl == NULL) {
-			ERROR("%p: unable to allocate queue index %u",
-			      (void *)dev, idx);
-			priv_unlock(priv);
-			return -ENOMEM;
+		/*
+		 * Check if the inline size is too large in a way which
+		 * can make the WQE DS to overflow.
+		 * Considering in calculation:
+		 *      WQE CTRL (1 DS)
+		 *      WQE ETH  (1 DS)
+		 *      Inline part (N DS)
+		 */
+		ds_cnt = 2 + (tmpl->txq.max_inline / MLX5_WQE_DWORD_SIZE);
+		if (ds_cnt > MLX5_DSEG_MAX) {
+			unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
+						  MLX5_WQE_DWORD_SIZE;
+
+			max_inline = max_inline - (max_inline %
+						   RTE_CACHE_LINE_SIZE);
+			WARN("txq inline is too large (%d) setting it to "
+			     "the maximum possible: %d\n",
+			     priv->txq_inline, max_inline);
+			tmpl->txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
 		}
 	}
-	ret = txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
-	if (ret)
-		rte_free(txq_ctrl);
-	else {
-		txq_ctrl->txq.stats.idx = idx;
-		DEBUG("%p: adding TX queue %p to list",
-		      (void *)dev, (void *)txq_ctrl);
-		(*priv->txqs)[idx] = &txq_ctrl->txq;
+	if (priv->tso) {
+		tmpl->max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
+		tmpl->txq.max_inline = RTE_MAX(tmpl->txq.max_inline,
+					       max_tso_inline);
+		tmpl->txq.tso_en = 1;
 	}
-	priv_unlock(priv);
-	return -ret;
+	if (priv->tunnel_en)
+		tmpl->txq.tunnel_en = 1;
+	tmpl->txq.elts =
+		(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
+	tmpl->txq.stats.idx = idx;
+	rte_atomic32_inc(&tmpl->refcnt);
+	DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+	LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
+	return tmpl;
 }
 
 /**
- * DPDK callback to release a TX queue.
+ * Get a Tx queue.
  *
- * @param dpdk_txq
- *   Generic TX queue pointer.
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   A pointer to the queue if it exists.
  */
-void
-mlx5_tx_queue_release(void *dpdk_txq)
+struct mlx5_txq_ctrl*
+mlx5_priv_txq_get(struct priv *priv, uint16_t idx)
 {
-	struct txq *txq = (struct txq *)dpdk_txq;
-	struct txq_ctrl *txq_ctrl;
-	struct priv *priv;
-	unsigned int i;
+	struct mlx5_txq_ctrl *ctrl = NULL;
 
-	if (mlx5_is_secondary())
-		return;
+	if ((*priv->txqs)[idx]) {
+		ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl,
+				    txq);
+		unsigned int i;
 
-	if (txq == NULL)
-		return;
-	txq_ctrl = container_of(txq, struct txq_ctrl, txq);
-	priv = txq_ctrl->priv;
-	priv_lock(priv);
-	for (i = 0; (i != priv->txqs_n); ++i)
-		if ((*priv->txqs)[i] == txq) {
-			DEBUG("%p: removing TX queue %p from list",
-			      (void *)priv->dev, (void *)txq_ctrl);
-			(*priv->txqs)[i] = NULL;
-			break;
+		mlx5_priv_txq_ibv_get(priv, idx);
+		for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
+			struct mlx5_mr *mr = NULL;
+
+			(void)mr;
+			if (ctrl->txq.mp2mr[i]) {
+				mr = priv_mr_get(priv, ctrl->txq.mp2mr[i]->mp);
+				assert(mr);
+			}
 		}
-	txq_cleanup(txq_ctrl);
-	rte_free(txq_ctrl);
-	priv_unlock(priv);
+		rte_atomic32_inc(&ctrl->refcnt);
+		DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+		      (void *)ctrl, rte_atomic32_read(&ctrl->refcnt));
+	}
+	return ctrl;
 }
 
 /**
- * DPDK callback for TX in secondary processes.
- *
- * This function configures all queues from primary process information
- * if necessary before reverting to the normal TX burst callback.
+ * Release a Tx queue.
  *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
- * @param[in] pkts
- *   Packets to transmit.
- * @param pkts_n
- *   Number of packets in array.
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
  *
  * @return
- *   Number of packets successfully transmitted (<= pkts_n).
+ *   0 on success, errno on failure.
  */
-uint16_t
-mlx5_tx_burst_secondary_setup(void *dpdk_txq, struct rte_mbuf **pkts,
-			      uint16_t pkts_n)
+int
+mlx5_priv_txq_release(struct priv *priv, uint16_t idx)
 {
-	struct txq *txq = dpdk_txq;
-	struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
-	struct priv *priv = mlx5_secondary_data_setup(txq_ctrl->priv);
-	struct priv *primary_priv;
-	unsigned int index;
+	unsigned int i;
+	struct mlx5_txq_ctrl *txq;
 
-	if (priv == NULL)
+	if (!(*priv->txqs)[idx])
 		return 0;
-	primary_priv =
-		mlx5_secondary_data[priv->dev->data->port_id].primary_priv;
-	/* Look for queue index in both private structures. */
-	for (index = 0; index != priv->txqs_n; ++index)
-		if (((*primary_priv->txqs)[index] == txq) ||
-		    ((*priv->txqs)[index] == txq))
-			break;
-	if (index == priv->txqs_n)
+	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+	      (void *)txq, rte_atomic32_read(&txq->refcnt));
+	if (txq->ibv) {
+		int ret;
+
+		ret = mlx5_priv_txq_ibv_release(priv, txq->ibv);
+		if (!ret)
+			txq->ibv = NULL;
+	}
+	for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
+		if (txq->txq.mp2mr[i]) {
+			priv_mr_release(priv, txq->txq.mp2mr[i]);
+			txq->txq.mp2mr[i] = NULL;
+		}
+	}
+	if (rte_atomic32_dec_and_test(&txq->refcnt)) {
+		txq_free_elts(txq);
+		LIST_REMOVE(txq, next);
+		rte_free(txq);
+		(*priv->txqs)[idx] = NULL;
 		return 0;
-	txq = (*priv->txqs)[index];
-	return priv->dev->tx_pkt_burst(txq, pkts, pkts_n);
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param idx
+ *   TX queue index.
+ *
+ * @return
+ *   1 if the queue can be released.
+ */
+int
+mlx5_priv_txq_releasable(struct priv *priv, uint16_t idx)
+{
+	struct mlx5_txq_ctrl *txq;
+
+	if (!(*priv->txqs)[idx])
+		return -1;
+	txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+	return (rte_atomic32_read(&txq->refcnt) == 1);
+}
+
+/**
+ * Verify the Tx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_txq_verify(struct priv *priv)
+{
+	struct mlx5_txq_ctrl *txq;
+	int ret = 0;
+
+	LIST_FOREACH(txq, &priv->txqsctrl, next) {
+		DEBUG("%p: Tx Queue %p still referenced", (void *)priv,
+		      (void *)txq);
+		++ret;
+	}
+	return ret;
 }
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index a824787f..218ae831 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -128,11 +128,13 @@ pmd_drv_log_basename(const char *s)
 
 #define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__)
 #define claim_zero(...) assert((__VA_ARGS__) == 0)
+#define claim_nonzero(...) assert((__VA_ARGS__) != 0)
 
 #else /* NDEBUG */
 
 #define DEBUG(...) (void)0
 #define claim_zero(...) (__VA_ARGS__)
+#define claim_nonzero(...) (__VA_ARGS__)
 
 #endif /* NDEBUG */
 
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 1b0fa40a..6fc315ef 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -36,22 +36,15 @@
 #include <assert.h>
 #include <stdint.h>
 
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
 #include <rte_ethdev.h>
 #include <rte_common.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
 
 #include "mlx5_utils.h"
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 
 /**
- * Configure a VLAN filter.
+ * DPDK callback to configure a VLAN filter.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -61,14 +54,16 @@
  *   Toggle filter.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, negative errno value on failure.
  */
-static int
-vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+int
+mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 {
 	struct priv *priv = dev->data->dev_private;
 	unsigned int i;
+	int ret = 0;
 
+	priv_lock(priv);
 	DEBUG("%p: %s VLAN filter ID %" PRIu16,
 	      (void *)dev, (on ? "enable" : "disable"), vlan_id);
 	assert(priv->vlan_filter_n <= RTE_DIM(priv->vlan_filter));
@@ -76,13 +71,15 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 		if (priv->vlan_filter[i] == vlan_id)
 			break;
 	/* Check if there's room for another VLAN filter. */
-	if (i == RTE_DIM(priv->vlan_filter))
-		return ENOMEM;
+	if (i == RTE_DIM(priv->vlan_filter)) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	if (i < priv->vlan_filter_n) {
 		assert(priv->vlan_filter_n != 0);
 		/* Enabling an existing VLAN filter has no effect. */
 		if (on)
-			return 0;
+			goto out;
 		/* Remove VLAN filter from list. */
 		--priv->vlan_filter_n;
 		memmove(&priv->vlan_filter[i],
@@ -94,41 +91,16 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 		assert(i == priv->vlan_filter_n);
 		/* Disabling an unknown VLAN filter has no effect. */
 		if (!on)
-			return 0;
+			goto out;
 		/* Add new VLAN filter. */
 		priv->vlan_filter[priv->vlan_filter_n] = vlan_id;
 		++priv->vlan_filter_n;
 	}
-	/* Rehash flows in all hash RX queues. */
-	priv_mac_addrs_disable(priv);
-	priv_special_flow_disable_all(priv);
-	return priv_rehash_flows(priv);
-}
-
-/**
- * DPDK callback to configure a VLAN filter.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param vlan_id
- *   VLAN ID to filter.
- * @param on
- *   Toggle filter.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
-{
-	struct priv *priv = dev->data->dev_private;
-	int ret;
-
-	priv_lock(priv);
-	ret = vlan_filter_set(dev, vlan_id, on);
+	if (dev->data->dev_started)
+		priv_dev_traffic_restart(priv, dev);
+out:
 	priv_unlock(priv);
-	assert(ret >= 0);
-	return -ret;
+	return ret;
 }
 
 /**
@@ -144,22 +116,24 @@ mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 static void
 priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
 {
-	struct rxq *rxq = (*priv->rxqs)[idx];
-	struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
-	struct ibv_exp_wq_attr mod;
+	struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+	struct mlx5_rxq_ctrl *rxq_ctrl =
+		container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+	struct ibv_wq_attr mod;
 	uint16_t vlan_offloads =
-		(on ? IBV_EXP_RECEIVE_WQ_CVLAN_STRIP : 0) |
+		(on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
 		0;
 	int err;
 
 	DEBUG("set VLAN offloads 0x%x for port %d queue %d",
 	      vlan_offloads, rxq->port_id, idx);
-	mod = (struct ibv_exp_wq_attr){
-		.attr_mask = IBV_EXP_WQ_ATTR_VLAN_OFFLOADS,
-		.vlan_offloads = vlan_offloads,
+	mod = (struct ibv_wq_attr){
+		.attr_mask = IBV_WQ_ATTR_FLAGS,
+		.flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
+		.flags = vlan_offloads,
 	};
 
-	err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod);
+	err = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod);
 	if (err) {
 		ERROR("%p: failed to modified stripping mode: %s",
 		      (void *)priv, strerror(err));
@@ -210,7 +184,7 @@ mlx5_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
  * @param mask
  *   VLAN offload bit mask.
  */
-void
+int
 mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	struct priv *priv = dev->data->dev_private;
@@ -221,7 +195,7 @@ mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 
 		if (!priv->hw_vlan_strip) {
 			ERROR("VLAN stripping is not supported");
-			return;
+			return 0;
 		}
 
 		/* Run on every RX queue and set/reset VLAN stripping. */
@@ -230,4 +204,6 @@ mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 			priv_vlan_strip_queue_set(priv, i, hw_vlan_strip);
 		priv_unlock(priv);
 	}
+
+	return 0;
 }
diff --git a/drivers/net/mrvl/Makefile b/drivers/net/mrvl/Makefile
new file mode 100644
index 00000000..815c3bae
--- /dev/null
+++ b/drivers/net/mrvl/Makefile
@@ -0,0 +1,68 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2017 Marvell International Ltd.
+#   Copyright(c) 2017 Semihalf.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(MAKECMDGOALS),clean)
+ifneq ($(MAKECMDGOALS),config)
+ifeq ($(LIBMUSDK_PATH),)
+$(error "Please define LIBMUSDK_PATH environment variable")
+endif
+endif
+endif
+
+# library name
+LIB = librte_pmd_mrvl.a
+
+# library version
+LIBABIVER := 1
+
+# versioning export map
+EXPORT_MAP := rte_pmd_mrvl_version.map
+
+# external library dependencies
+CFLAGS += -I$(LIBMUSDK_PATH)/include
+CFLAGS += -DMVCONF_ARCH_DMA_ADDR_T_64BIT
+CFLAGS += -DCONF_PP2_BPOOL_COOKIE_SIZE=32
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -O3
+LDLIBS += -L$(LIBMUSDK_PATH)/lib
+LDLIBS += -lmusdk
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_cfgfile
+LDLIBS += -lrte_bus_vdev
+
+# library source files
+SRCS-$(CONFIG_RTE_LIBRTE_MRVL_PMD) += mrvl_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_MRVL_PMD) += mrvl_qos.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/mrvl/mrvl_ethdev.c b/drivers/net/mrvl/mrvl_ethdev.c
new file mode 100644
index 00000000..29361652
--- /dev/null
+++ b/drivers/net/mrvl/mrvl_ethdev.c
@@ -0,0 +1,2294 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Marvell International Ltd.
+ *   Copyright(c) 2017 Semihalf.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Semihalf nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_ethdev.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_bus_vdev.h>
+
+/* Unluckily, container_of is defined by both DPDK and MUSDK,
+ * we'll declare only one version.
+ *
+ * Note that it is not used in this PMD anyway.
+ */
+#ifdef container_of
+#undef container_of
+#endif
+
+#include <drivers/mv_pp2.h>
+#include <drivers/mv_pp2_bpool.h>
+#include <drivers/mv_pp2_hif.h>
+
+#include <fcntl.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "mrvl_ethdev.h"
+#include "mrvl_qos.h"
+
+/* bitmask with reserved hifs */
+#define MRVL_MUSDK_HIFS_RESERVED 0x0F
+/* bitmask with reserved bpools */
+#define MRVL_MUSDK_BPOOLS_RESERVED 0x07
+/* bitmask with reserved kernel RSS tables */
+#define MRVL_MUSDK_RSS_RESERVED 0x01
+/* maximum number of available hifs */
+#define MRVL_MUSDK_HIFS_MAX 9
+
+/* prefetch shift */
+#define MRVL_MUSDK_PREFETCH_SHIFT 2
+
+/* TCAM has 25 entries reserved for uc/mc filter entries */
+#define MRVL_MAC_ADDRS_MAX 25
+#define MRVL_MATCH_LEN 16
+#define MRVL_PKT_EFFEC_OFFS (MRVL_PKT_OFFS + MV_MH_SIZE)
+/* Maximum allowable packet size */
+#define MRVL_PKT_SIZE_MAX (10240 - MV_MH_SIZE)
+
+#define MRVL_IFACE_NAME_ARG "iface"
+#define MRVL_CFG_ARG "cfg"
+
+#define MRVL_BURST_SIZE 64
+
+#define MRVL_ARP_LENGTH 28
+
+#define MRVL_COOKIE_ADDR_INVALID ~0ULL
+
+#define MRVL_COOKIE_HIGH_ADDR_SHIFT	(sizeof(pp2_cookie_t) * 8)
+#define MRVL_COOKIE_HIGH_ADDR_MASK	(~0ULL << MRVL_COOKIE_HIGH_ADDR_SHIFT)
+
+/* Memory size (in bytes) for MUSDK dma buffers */
+#define MRVL_MUSDK_DMA_MEMSIZE 41943040
+
+static const char * const valid_args[] = {
+	MRVL_IFACE_NAME_ARG,
+	MRVL_CFG_ARG,
+	NULL
+};
+
+static int used_hifs = MRVL_MUSDK_HIFS_RESERVED;
+static struct pp2_hif *hifs[RTE_MAX_LCORE];
+static int used_bpools[PP2_NUM_PKT_PROC] = {
+	MRVL_MUSDK_BPOOLS_RESERVED,
+	MRVL_MUSDK_BPOOLS_RESERVED
+};
+
+struct pp2_bpool *mrvl_port_to_bpool_lookup[RTE_MAX_ETHPORTS];
+int mrvl_port_bpool_size[PP2_NUM_PKT_PROC][PP2_BPOOL_NUM_POOLS][RTE_MAX_LCORE];
+uint64_t cookie_addr_high = MRVL_COOKIE_ADDR_INVALID;
+
+/*
+ * To use buffer harvesting based on loopback port shadow queue structure
+ * was introduced for buffers information bookkeeping.
+ *
+ * Before sending the packet, related buffer information (pp2_buff_inf) is
+ * stored in shadow queue. After packet is transmitted no longer used
+ * packet buffer is released back to it's original hardware pool,
+ * on condition it originated from interface.
+ * In case it  was generated by application itself i.e: mbuf->port field is
+ * 0xff then its released to software mempool.
+ */
+struct mrvl_shadow_txq {
+	int head;           /* write index - used when sending buffers */
+	int tail;           /* read index - used when releasing buffers */
+	u16 size;           /* queue occupied size */
+	u16 num_to_release; /* number of buffers sent, that can be released */
+	struct buff_release_entry ent[MRVL_PP2_TX_SHADOWQ_SIZE]; /* q entries */
+};
+
+struct mrvl_rxq {
+	struct mrvl_priv *priv;
+	struct rte_mempool *mp;
+	int queue_id;
+	int port_id;
+	int cksum_enabled;
+	uint64_t bytes_recv;
+	uint64_t drop_mac;
+};
+
+struct mrvl_txq {
+	struct mrvl_priv *priv;
+	int queue_id;
+	int port_id;
+	uint64_t bytes_sent;
+};
+
+/*
+ * Every tx queue should have dedicated shadow tx queue.
+ *
+ * Ports assigned by DPDK might not start at zero or be continuous so
+ * as a workaround define shadow queues for each possible port so that
+ * we eventually fit somewhere.
+ */
+struct mrvl_shadow_txq shadow_txqs[RTE_MAX_ETHPORTS][RTE_MAX_LCORE];
+
+/** Number of ports configured. */
+int mrvl_ports_nb;
+static int mrvl_lcore_first;
+static int mrvl_lcore_last;
+
+static inline int
+mrvl_get_bpool_size(int pp2_id, int pool_id)
+{
+	int i;
+	int size = 0;
+
+	for (i = mrvl_lcore_first; i <= mrvl_lcore_last; i++)
+		size += mrvl_port_bpool_size[pp2_id][pool_id][i];
+
+	return size;
+}
+
+static inline int
+mrvl_reserve_bit(int *bitmap, int max)
+{
+	int n = sizeof(*bitmap) * 8 - __builtin_clz(*bitmap);
+
+	if (n >= max)
+		return -1;
+
+	*bitmap |= 1 << n;
+
+	return n;
+}
+
+/**
+ * Configure rss based on dpdk rss configuration.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param rss_conf
+ *   Pointer to RSS configuration.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_configure_rss(struct mrvl_priv *priv, struct rte_eth_rss_conf *rss_conf)
+{
+	if (rss_conf->rss_key)
+		RTE_LOG(WARNING, PMD, "Changing hash key is not supported\n");
+
+	if (rss_conf->rss_hf == 0) {
+		priv->ppio_params.inqs_params.hash_type = PP2_PPIO_HASH_T_NONE;
+	} else if (rss_conf->rss_hf & ETH_RSS_IPV4) {
+		priv->ppio_params.inqs_params.hash_type =
+			PP2_PPIO_HASH_T_2_TUPLE;
+	} else if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) {
+		priv->ppio_params.inqs_params.hash_type =
+			PP2_PPIO_HASH_T_5_TUPLE;
+		priv->rss_hf_tcp = 1;
+	} else if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
+		priv->ppio_params.inqs_params.hash_type =
+			PP2_PPIO_HASH_T_5_TUPLE;
+		priv->rss_hf_tcp = 0;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * Ethernet device configuration.
+ *
+ * Prepare the driver for a given number of TX and RX queues and
+ * configure RSS.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_dev_configure(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	int ret;
+
+	if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_NONE &&
+	    dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) {
+		RTE_LOG(INFO, PMD, "Unsupported rx multi queue mode %d\n",
+			dev->data->dev_conf.rxmode.mq_mode);
+		return -EINVAL;
+	}
+
+	if (!dev->data->dev_conf.rxmode.hw_strip_crc) {
+		RTE_LOG(INFO, PMD,
+			"L2 CRC stripping is always enabled in hw\n");
+		dev->data->dev_conf.rxmode.hw_strip_crc = 1;
+	}
+
+	if (dev->data->dev_conf.rxmode.hw_vlan_strip) {
+		RTE_LOG(INFO, PMD, "VLAN stripping not supported\n");
+		return -EINVAL;
+	}
+
+	if (dev->data->dev_conf.rxmode.split_hdr_size) {
+		RTE_LOG(INFO, PMD, "Split headers not supported\n");
+		return -EINVAL;
+	}
+
+	if (dev->data->dev_conf.rxmode.enable_scatter) {
+		RTE_LOG(INFO, PMD, "RX Scatter/Gather not supported\n");
+		return -EINVAL;
+	}
+
+	if (dev->data->dev_conf.rxmode.enable_lro) {
+		RTE_LOG(INFO, PMD, "LRO not supported\n");
+		return -EINVAL;
+	}
+
+	if (dev->data->dev_conf.rxmode.jumbo_frame)
+		dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
+				 ETHER_HDR_LEN - ETHER_CRC_LEN;
+
+	ret = mrvl_configure_rxqs(priv, dev->data->port_id,
+				  dev->data->nb_rx_queues);
+	if (ret < 0)
+		return ret;
+
+	priv->ppio_params.outqs_params.num_outqs = dev->data->nb_tx_queues;
+	priv->ppio_params.maintain_stats = 1;
+	priv->nb_rx_queues = dev->data->nb_rx_queues;
+
+	if (dev->data->nb_rx_queues == 1 &&
+	    dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) {
+		RTE_LOG(WARNING, PMD, "Disabling hash for 1 rx queue\n");
+		priv->ppio_params.inqs_params.hash_type = PP2_PPIO_HASH_T_NONE;
+
+		return 0;
+	}
+
+	return mrvl_configure_rss(priv,
+				  &dev->data->dev_conf.rx_adv_conf.rss_conf);
+}
+
+/**
+ * DPDK callback to change the MTU.
+ *
+ * Setting the MTU affects hardware MRU (packets larger than the MRU
+ * will be dropped).
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param mtu
+ *   New MTU.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	/* extra MV_MH_SIZE bytes are required for Marvell tag */
+	uint16_t mru = mtu + MV_MH_SIZE + ETHER_HDR_LEN + ETHER_CRC_LEN;
+	int ret;
+
+	if (mtu < ETHER_MIN_MTU || mru > MRVL_PKT_SIZE_MAX)
+		return -EINVAL;
+
+	ret = pp2_ppio_set_mru(priv->ppio, mru);
+	if (ret)
+		return ret;
+
+	return pp2_ppio_set_mtu(priv->ppio, mtu);
+}
+
+/**
+ * DPDK callback to bring the link up.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_dev_set_link_up(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	int ret;
+
+	ret = pp2_ppio_enable(priv->ppio);
+	if (ret)
+		return ret;
+
+	/*
+	 * mtu/mru can be updated if pp2_ppio_enable() was called at least once
+	 * as pp2_ppio_enable() changes port->t_mode from default 0 to
+	 * PP2_TRAFFIC_INGRESS_EGRESS.
+	 *
+	 * Set mtu to default DPDK value here.
+	 */
+	ret = mrvl_mtu_set(dev, dev->data->mtu);
+	if (ret)
+		pp2_ppio_disable(priv->ppio);
+
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return ret;
+}
+
+/**
+ * DPDK callback to bring the link down.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_dev_set_link_down(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	int ret;
+
+	ret = pp2_ppio_disable(priv->ppio);
+	if (ret)
+		return ret;
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+
+	return ret;
+}
+
+/**
+ * DPDK callback to start the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+mrvl_dev_start(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	char match[MRVL_MATCH_LEN];
+	int ret;
+
+	snprintf(match, sizeof(match), "ppio-%d:%d",
+		 priv->pp_id, priv->ppio_id);
+	priv->ppio_params.match = match;
+
+	/*
+	 * Calculate the maximum bpool size for refill feature to 1.5 of the
+	 * configured size. In case the bpool size will exceed this value,
+	 * superfluous buffers will be removed
+	 */
+	priv->bpool_max_size = priv->bpool_init_size +
+			      (priv->bpool_init_size >> 1);
+	/*
+	 * Calculate the minimum bpool size for refill feature as follows:
+	 * 2 default burst sizes multiply by number of rx queues.
+	 * If the bpool size will be below this value, new buffers will
+	 * be added to the pool.
+	 */
+	priv->bpool_min_size = priv->nb_rx_queues * MRVL_BURST_SIZE * 2;
+
+	ret = pp2_ppio_init(&priv->ppio_params, &priv->ppio);
+	if (ret)
+		return ret;
+
+	/*
+	 * In case there are some some stale uc/mc mac addresses flush them
+	 * here. It cannot be done during mrvl_dev_close() as port information
+	 * is already gone at that point (due to pp2_ppio_deinit() in
+	 * mrvl_dev_stop()).
+	 */
+	if (!priv->uc_mc_flushed) {
+		ret = pp2_ppio_flush_mac_addrs(priv->ppio, 1, 1);
+		if (ret) {
+			RTE_LOG(ERR, PMD,
+				"Failed to flush uc/mc filter list\n");
+			goto out;
+		}
+		priv->uc_mc_flushed = 1;
+	}
+
+	if (!priv->vlan_flushed) {
+		ret = pp2_ppio_flush_vlan(priv->ppio);
+		if (ret) {
+			RTE_LOG(ERR, PMD, "Failed to flush vlan list\n");
+			/*
+			 * TODO
+			 * once pp2_ppio_flush_vlan() is supported jump to out
+			 * goto out;
+			 */
+		}
+		priv->vlan_flushed = 1;
+	}
+
+	/* For default QoS config, don't start classifier. */
+	if (mrvl_qos_cfg) {
+		ret = mrvl_start_qos_mapping(priv);
+		if (ret) {
+			pp2_ppio_deinit(priv->ppio);
+			return ret;
+		}
+	}
+
+	ret = mrvl_dev_set_link_up(dev);
+	if (ret)
+		goto out;
+
+	return 0;
+out:
+	pp2_ppio_deinit(priv->ppio);
+	return ret;
+}
+
+/**
+ * Flush receive queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_flush_rx_queues(struct rte_eth_dev *dev)
+{
+	int i;
+
+	RTE_LOG(INFO, PMD, "Flushing rx queues\n");
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		int ret, num;
+
+		do {
+			struct mrvl_rxq *q = dev->data->rx_queues[i];
+			struct pp2_ppio_desc descs[MRVL_PP2_RXD_MAX];
+
+			num = MRVL_PP2_RXD_MAX;
+			ret = pp2_ppio_recv(q->priv->ppio,
+					    q->priv->rxq_map[q->queue_id].tc,
+					    q->priv->rxq_map[q->queue_id].inq,
+					    descs, (uint16_t *)&num);
+		} while (ret == 0 && num);
+	}
+}
+
+/**
+ * Flush transmit shadow queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_flush_tx_shadow_queues(struct rte_eth_dev *dev)
+{
+	int i;
+
+	RTE_LOG(INFO, PMD, "Flushing tx shadow queues\n");
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		struct mrvl_shadow_txq *sq =
+			&shadow_txqs[dev->data->port_id][i];
+
+		while (sq->tail != sq->head) {
+			uint64_t addr = cookie_addr_high |
+					sq->ent[sq->tail].buff.cookie;
+			rte_pktmbuf_free((struct rte_mbuf *)addr);
+			sq->tail = (sq->tail + 1) & MRVL_PP2_TX_SHADOWQ_MASK;
+		}
+
+		memset(sq, 0, sizeof(*sq));
+	}
+}
+
+/**
+ * Flush hardware bpool (buffer-pool).
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_flush_bpool(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	uint32_t num;
+	int ret;
+
+	ret = pp2_bpool_get_num_buffs(priv->bpool, &num);
+	if (ret) {
+		RTE_LOG(ERR, PMD, "Failed to get bpool buffers number\n");
+		return;
+	}
+
+	while (num--) {
+		struct pp2_buff_inf inf;
+		uint64_t addr;
+
+		ret = pp2_bpool_get_buff(hifs[rte_lcore_id()], priv->bpool,
+					 &inf);
+		if (ret)
+			break;
+
+		addr = cookie_addr_high | inf.cookie;
+		rte_pktmbuf_free((struct rte_mbuf *)addr);
+	}
+}
+
+/**
+ * DPDK callback to stop the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_dev_stop(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+
+	mrvl_dev_set_link_down(dev);
+	mrvl_flush_rx_queues(dev);
+	mrvl_flush_tx_shadow_queues(dev);
+	if (priv->qos_tbl)
+		pp2_cls_qos_tbl_deinit(priv->qos_tbl);
+	pp2_ppio_deinit(priv->ppio);
+	priv->ppio = NULL;
+}
+
+/**
+ * DPDK callback to close the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_dev_close(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	size_t i;
+
+	for (i = 0; i < priv->ppio_params.inqs_params.num_tcs; ++i) {
+		struct pp2_ppio_tc_params *tc_params =
+			&priv->ppio_params.inqs_params.tcs_params[i];
+
+		if (tc_params->inqs_params) {
+			rte_free(tc_params->inqs_params);
+			tc_params->inqs_params = NULL;
+		}
+	}
+
+	mrvl_flush_bpool(dev);
+}
+
+/**
+ * DPDK callback to retrieve physical link information.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param wait_to_complete
+ *   Wait for request completion (ignored).
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
+{
+	/*
+	 * TODO
+	 * once MUSDK provides necessary API use it here
+	 */
+	struct ethtool_cmd edata;
+	struct ifreq req;
+	int ret, fd;
+
+	edata.cmd = ETHTOOL_GSET;
+
+	strcpy(req.ifr_name, dev->data->name);
+	req.ifr_data = (void *)&edata;
+
+	fd = socket(AF_INET, SOCK_DGRAM, 0);
+	if (fd == -1)
+		return -EFAULT;
+
+	ret = ioctl(fd, SIOCETHTOOL, &req);
+	if (ret == -1) {
+		close(fd);
+		return -EFAULT;
+	}
+
+	close(fd);
+
+	switch (ethtool_cmd_speed(&edata)) {
+	case SPEED_10:
+		dev->data->dev_link.link_speed = ETH_SPEED_NUM_10M;
+		break;
+	case SPEED_100:
+		dev->data->dev_link.link_speed = ETH_SPEED_NUM_100M;
+		break;
+	case SPEED_1000:
+		dev->data->dev_link.link_speed = ETH_SPEED_NUM_1G;
+		break;
+	case SPEED_10000:
+		dev->data->dev_link.link_speed = ETH_SPEED_NUM_10G;
+		break;
+	default:
+		dev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
+	}
+
+	dev->data->dev_link.link_duplex = edata.duplex ? ETH_LINK_FULL_DUPLEX :
+							 ETH_LINK_HALF_DUPLEX;
+	dev->data->dev_link.link_autoneg = edata.autoneg ? ETH_LINK_AUTONEG :
+							   ETH_LINK_FIXED;
+
+	return 0;
+}
+
+/**
+ * DPDK callback to enable promiscuous mode.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_promiscuous_enable(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	int ret;
+
+	ret = pp2_ppio_set_uc_promisc(priv->ppio, 1);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Failed to enable promiscuous mode\n");
+}
+
+/**
+ * DPDK callback to enable allmulti mode.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_allmulticast_enable(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	int ret;
+
+	ret = pp2_ppio_set_mc_promisc(priv->ppio, 1);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Failed enable all-multicast mode\n");
+}
+
+/**
+ * DPDK callback to disable promiscuous mode.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_promiscuous_disable(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	int ret;
+
+	ret = pp2_ppio_set_uc_promisc(priv->ppio, 0);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Failed to disable promiscuous mode\n");
+}
+
+/**
+ * DPDK callback to disable allmulticast mode.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_allmulticast_disable(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	int ret;
+
+	ret = pp2_ppio_set_mc_promisc(priv->ppio, 0);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Failed to disable all-multicast mode\n");
+}
+
+/**
+ * DPDK callback to remove a MAC address.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param index
+ *   MAC address index.
+ */
+static void
+mrvl_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	char buf[ETHER_ADDR_FMT_SIZE];
+	int ret;
+
+	ret = pp2_ppio_remove_mac_addr(priv->ppio,
+				       dev->data->mac_addrs[index].addr_bytes);
+	if (ret) {
+		ether_format_addr(buf, sizeof(buf),
+				  &dev->data->mac_addrs[index]);
+		RTE_LOG(ERR, PMD, "Failed to remove mac %s\n", buf);
+	}
+}
+
+/**
+ * DPDK callback to add a MAC address.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param mac_addr
+ *   MAC address to register.
+ * @param index
+ *   MAC address index.
+ * @param vmdq
+ *   VMDq pool index to associate address with (unused).
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+		  uint32_t index, uint32_t vmdq __rte_unused)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	char buf[ETHER_ADDR_FMT_SIZE];
+	int ret;
+
+	if (index == 0)
+		/* For setting index 0, mrvl_mac_addr_set() should be used.*/
+		return -1;
+
+	/*
+	 * Maximum number of uc addresses can be tuned via kernel module mvpp2x
+	 * parameter uc_filter_max. Maximum number of mc addresses is then
+	 * MRVL_MAC_ADDRS_MAX - uc_filter_max. Currently it defaults to 4 and
+	 * 21 respectively.
+	 *
+	 * If more than uc_filter_max uc addresses were added to filter list
+	 * then NIC will switch to promiscuous mode automatically.
+	 *
+	 * If more than MRVL_MAC_ADDRS_MAX - uc_filter_max number mc addresses
+	 * were added to filter list then NIC will switch to all-multicast mode
+	 * automatically.
+	 */
+	ret = pp2_ppio_add_mac_addr(priv->ppio, mac_addr->addr_bytes);
+	if (ret) {
+		ether_format_addr(buf, sizeof(buf), mac_addr);
+		RTE_LOG(ERR, PMD, "Failed to add mac %s\n", buf);
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * DPDK callback to set the primary MAC address.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param mac_addr
+ *   MAC address to register.
+ */
+static void
+mrvl_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+
+	pp2_ppio_set_mac_addr(priv->ppio, mac_addr->addr_bytes);
+	/*
+	 * TODO
+	 * Port stops sending packets if pp2_ppio_set_mac_addr()
+	 * was called after pp2_ppio_enable(). As a quick fix issue
+	 * enable port once again.
+	 */
+	pp2_ppio_enable(priv->ppio);
+}
+
+/**
+ * DPDK callback to get device statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param stats
+ *   Stats structure output buffer.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	struct pp2_ppio_statistics ppio_stats;
+	uint64_t drop_mac = 0;
+	unsigned int i, idx, ret;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct mrvl_rxq *rxq = dev->data->rx_queues[i];
+		struct pp2_ppio_inq_statistics rx_stats;
+
+		if (!rxq)
+			continue;
+
+		idx = rxq->queue_id;
+		if (unlikely(idx >= RTE_ETHDEV_QUEUE_STAT_CNTRS)) {
+			RTE_LOG(ERR, PMD,
+				"rx queue %d stats out of range (0 - %d)\n",
+				idx, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1);
+			continue;
+		}
+
+		ret = pp2_ppio_inq_get_statistics(priv->ppio,
+						  priv->rxq_map[idx].tc,
+						  priv->rxq_map[idx].inq,
+						  &rx_stats, 0);
+		if (unlikely(ret)) {
+			RTE_LOG(ERR, PMD,
+				"Failed to update rx queue %d stats\n", idx);
+			break;
+		}
+
+		stats->q_ibytes[idx] = rxq->bytes_recv;
+		stats->q_ipackets[idx] = rx_stats.enq_desc - rxq->drop_mac;
+		stats->q_errors[idx] = rx_stats.drop_early +
+				       rx_stats.drop_fullq +
+				       rx_stats.drop_bm +
+				       rxq->drop_mac;
+		stats->ibytes += rxq->bytes_recv;
+		drop_mac += rxq->drop_mac;
+	}
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct mrvl_txq *txq = dev->data->tx_queues[i];
+		struct pp2_ppio_outq_statistics tx_stats;
+
+		if (!txq)
+			continue;
+
+		idx = txq->queue_id;
+		if (unlikely(idx >= RTE_ETHDEV_QUEUE_STAT_CNTRS)) {
+			RTE_LOG(ERR, PMD,
+				"tx queue %d stats out of range (0 - %d)\n",
+				idx, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1);
+		}
+
+		ret = pp2_ppio_outq_get_statistics(priv->ppio, idx,
+						   &tx_stats, 0);
+		if (unlikely(ret)) {
+			RTE_LOG(ERR, PMD,
+				"Failed to update tx queue %d stats\n", idx);
+			break;
+		}
+
+		stats->q_opackets[idx] = tx_stats.deq_desc;
+		stats->q_obytes[idx] = txq->bytes_sent;
+		stats->obytes += txq->bytes_sent;
+	}
+
+	ret = pp2_ppio_get_statistics(priv->ppio, &ppio_stats, 0);
+	if (unlikely(ret)) {
+		RTE_LOG(ERR, PMD, "Failed to update port statistics\n");
+		return ret;
+	}
+
+	stats->ipackets += ppio_stats.rx_packets - drop_mac;
+	stats->opackets += ppio_stats.tx_packets;
+	stats->imissed += ppio_stats.rx_fullq_dropped +
+			  ppio_stats.rx_bm_dropped +
+			  ppio_stats.rx_early_dropped +
+			  ppio_stats.rx_fifo_dropped +
+			  ppio_stats.rx_cls_dropped;
+	stats->ierrors = drop_mac;
+
+	return 0;
+}
+
+/**
+ * DPDK callback to clear device statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_stats_reset(struct rte_eth_dev *dev)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	int i;
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct mrvl_rxq *rxq = dev->data->rx_queues[i];
+
+		pp2_ppio_inq_get_statistics(priv->ppio, priv->rxq_map[i].tc,
+					    priv->rxq_map[i].inq, NULL, 1);
+		rxq->bytes_recv = 0;
+		rxq->drop_mac = 0;
+	}
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct mrvl_txq *txq = dev->data->tx_queues[i];
+
+		pp2_ppio_outq_get_statistics(priv->ppio, i, NULL, 1);
+		txq->bytes_sent = 0;
+	}
+
+	pp2_ppio_get_statistics(priv->ppio, NULL, 1);
+}
+
+/**
+ * DPDK callback to get information about the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure (unused).
+ * @param info
+ *   Info structure output buffer.
+ */
+static void
+mrvl_dev_infos_get(struct rte_eth_dev *dev __rte_unused,
+		   struct rte_eth_dev_info *info)
+{
+	info->speed_capa = ETH_LINK_SPEED_10M |
+			   ETH_LINK_SPEED_100M |
+			   ETH_LINK_SPEED_1G |
+			   ETH_LINK_SPEED_10G;
+
+	info->max_rx_queues = MRVL_PP2_RXQ_MAX;
+	info->max_tx_queues = MRVL_PP2_TXQ_MAX;
+	info->max_mac_addrs = MRVL_MAC_ADDRS_MAX;
+
+	info->rx_desc_lim.nb_max = MRVL_PP2_RXD_MAX;
+	info->rx_desc_lim.nb_min = MRVL_PP2_RXD_MIN;
+	info->rx_desc_lim.nb_align = MRVL_PP2_RXD_ALIGN;
+
+	info->tx_desc_lim.nb_max = MRVL_PP2_TXD_MAX;
+	info->tx_desc_lim.nb_min = MRVL_PP2_TXD_MIN;
+	info->tx_desc_lim.nb_align = MRVL_PP2_TXD_ALIGN;
+
+	info->rx_offload_capa = DEV_RX_OFFLOAD_JUMBO_FRAME |
+				DEV_RX_OFFLOAD_VLAN_FILTER |
+				DEV_RX_OFFLOAD_IPV4_CKSUM |
+				DEV_RX_OFFLOAD_UDP_CKSUM |
+				DEV_RX_OFFLOAD_TCP_CKSUM;
+
+	info->tx_offload_capa = DEV_TX_OFFLOAD_IPV4_CKSUM |
+				DEV_TX_OFFLOAD_UDP_CKSUM |
+				DEV_TX_OFFLOAD_TCP_CKSUM;
+
+	info->flow_type_rss_offloads = ETH_RSS_IPV4 |
+				       ETH_RSS_NONFRAG_IPV4_TCP |
+				       ETH_RSS_NONFRAG_IPV4_UDP;
+
+	/* By default packets are dropped if no descriptors are available */
+	info->default_rxconf.rx_drop_en = 1;
+
+	info->max_rx_pktlen = MRVL_PKT_SIZE_MAX;
+}
+
+/**
+ * Return supported packet types.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure (unused).
+ *
+ * @return
+ *   Const pointer to the table with supported packet types.
+ */
+static const uint32_t *
+mrvl_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
+{
+	static const uint32_t ptypes[] = {
+		RTE_PTYPE_L2_ETHER,
+		RTE_PTYPE_L3_IPV4,
+		RTE_PTYPE_L3_IPV4_EXT,
+		RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
+		RTE_PTYPE_L3_IPV6,
+		RTE_PTYPE_L3_IPV6_EXT,
+		RTE_PTYPE_L2_ETHER_ARP,
+		RTE_PTYPE_L4_TCP,
+		RTE_PTYPE_L4_UDP
+	};
+
+	return ptypes;
+}
+
+/**
+ * DPDK callback to get information about specific receive queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param rx_queue_id
+ *   Receive queue index.
+ * @param qinfo
+ *   Receive queue information structure.
+ */
+static void mrvl_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+			      struct rte_eth_rxq_info *qinfo)
+{
+	struct mrvl_rxq *q = dev->data->rx_queues[rx_queue_id];
+	struct mrvl_priv *priv = dev->data->dev_private;
+	int inq = priv->rxq_map[rx_queue_id].inq;
+	int tc = priv->rxq_map[rx_queue_id].tc;
+	struct pp2_ppio_tc_params *tc_params =
+		&priv->ppio_params.inqs_params.tcs_params[tc];
+
+	qinfo->mp = q->mp;
+	qinfo->nb_desc = tc_params->inqs_params[inq].size;
+}
+
+/**
+ * DPDK callback to get information about specific transmit queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param tx_queue_id
+ *   Transmit queue index.
+ * @param qinfo
+ *   Transmit queue information structure.
+ */
+static void mrvl_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+			      struct rte_eth_txq_info *qinfo)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+
+	qinfo->nb_desc =
+		priv->ppio_params.outqs_params.outqs_params[tx_queue_id].size;
+}
+
+/**
+ * DPDK callback to Configure a VLAN filter.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param vlan_id
+ *   VLAN ID to filter.
+ * @param on
+ *   Toggle filter.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+
+	return on ? pp2_ppio_add_vlan(priv->ppio, vlan_id) :
+		    pp2_ppio_remove_vlan(priv->ppio, vlan_id);
+}
+
+/**
+ * Release buffers to hardware bpool (buffer-pool)
+ *
+ * @param rxq
+ *   Receive queue pointer.
+ * @param num
+ *   Number of buffers to release to bpool.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_fill_bpool(struct mrvl_rxq *rxq, int num)
+{
+	struct buff_release_entry entries[MRVL_PP2_TXD_MAX];
+	struct rte_mbuf *mbufs[MRVL_PP2_TXD_MAX];
+	int i, ret;
+	unsigned int core_id = rte_lcore_id();
+	struct pp2_hif *hif = hifs[core_id];
+	struct pp2_bpool *bpool = rxq->priv->bpool;
+
+	ret = rte_pktmbuf_alloc_bulk(rxq->mp, mbufs, num);
+	if (ret)
+		return ret;
+
+	if (cookie_addr_high == MRVL_COOKIE_ADDR_INVALID)
+		cookie_addr_high =
+			(uint64_t)mbufs[0] & MRVL_COOKIE_HIGH_ADDR_MASK;
+
+	for (i = 0; i < num; i++) {
+		if (((uint64_t)mbufs[i] & MRVL_COOKIE_HIGH_ADDR_MASK)
+			!= cookie_addr_high) {
+			RTE_LOG(ERR, PMD,
+				"mbuf virtual addr high 0x%lx out of range\n",
+				(uint64_t)mbufs[i] >> 32);
+			goto out;
+		}
+
+		entries[i].buff.addr =
+			rte_mbuf_data_iova_default(mbufs[i]);
+		entries[i].buff.cookie = (pp2_cookie_t)(uint64_t)mbufs[i];
+		entries[i].bpool = bpool;
+	}
+
+	pp2_bpool_put_buffs(hif, entries, (uint16_t *)&i);
+	mrvl_port_bpool_size[bpool->pp2_id][bpool->id][core_id] += i;
+
+	if (i != num)
+		goto out;
+
+	return 0;
+out:
+	for (; i < num; i++)
+		rte_pktmbuf_free(mbufs[i]);
+
+	return -1;
+}
+
+/**
+ * DPDK callback to configure the receive queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   RX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param conf
+ *   Thresholds parameters (unused_).
+ * @param mp
+ *   Memory pool for buffer allocations.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+		    unsigned int socket,
+		    const struct rte_eth_rxconf *conf __rte_unused,
+		    struct rte_mempool *mp)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	struct mrvl_rxq *rxq;
+	uint32_t min_size,
+		 max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
+	int ret, tc, inq;
+
+	if (priv->rxq_map[idx].tc == MRVL_UNKNOWN_TC) {
+		/*
+		 * Unknown TC mapping, mapping will not have a correct queue.
+		 */
+		RTE_LOG(ERR, PMD, "Unknown TC mapping for queue %hu eth%hhu\n",
+			idx, priv->ppio_id);
+		return -EFAULT;
+	}
+
+	min_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM -
+		   MRVL_PKT_EFFEC_OFFS;
+	if (min_size < max_rx_pkt_len) {
+		RTE_LOG(ERR, PMD,
+			"Mbuf size must be increased to %u bytes to hold up to %u bytes of data.\n",
+			max_rx_pkt_len + RTE_PKTMBUF_HEADROOM +
+			MRVL_PKT_EFFEC_OFFS,
+			max_rx_pkt_len);
+		return -EINVAL;
+	}
+
+	if (dev->data->rx_queues[idx]) {
+		rte_free(dev->data->rx_queues[idx]);
+		dev->data->rx_queues[idx] = NULL;
+	}
+
+	rxq = rte_zmalloc_socket("rxq", sizeof(*rxq), 0, socket);
+	if (!rxq)
+		return -ENOMEM;
+
+	rxq->priv = priv;
+	rxq->mp = mp;
+	rxq->cksum_enabled = dev->data->dev_conf.rxmode.hw_ip_checksum;
+	rxq->queue_id = idx;
+	rxq->port_id = dev->data->port_id;
+	mrvl_port_to_bpool_lookup[rxq->port_id] = priv->bpool;
+
+	tc = priv->rxq_map[rxq->queue_id].tc,
+	inq = priv->rxq_map[rxq->queue_id].inq;
+	priv->ppio_params.inqs_params.tcs_params[tc].inqs_params[inq].size =
+		desc;
+
+	ret = mrvl_fill_bpool(rxq, desc);
+	if (ret) {
+		rte_free(rxq);
+		return ret;
+	}
+
+	priv->bpool_init_size += desc;
+
+	dev->data->rx_queues[idx] = rxq;
+
+	return 0;
+}
+
+/**
+ * DPDK callback to release the receive queue.
+ *
+ * @param rxq
+ *   Generic receive queue pointer.
+ */
+static void
+mrvl_rx_queue_release(void *rxq)
+{
+	struct mrvl_rxq *q = rxq;
+	struct pp2_ppio_tc_params *tc_params;
+	int i, num, tc, inq;
+
+	if (!q)
+		return;
+
+	tc = q->priv->rxq_map[q->queue_id].tc;
+	inq = q->priv->rxq_map[q->queue_id].inq;
+	tc_params = &q->priv->ppio_params.inqs_params.tcs_params[tc];
+	num = tc_params->inqs_params[inq].size;
+	for (i = 0; i < num; i++) {
+		struct pp2_buff_inf inf;
+		uint64_t addr;
+
+		pp2_bpool_get_buff(hifs[rte_lcore_id()], q->priv->bpool, &inf);
+		addr = cookie_addr_high | inf.cookie;
+		rte_pktmbuf_free((struct rte_mbuf *)addr);
+	}
+
+	rte_free(q);
+}
+
+/**
+ * DPDK callback to configure the transmit queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   Transmit queue index.
+ * @param desc
+ *   Number of descriptors to configure in the queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param conf
+ *   Thresholds parameters (unused).
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+		    unsigned int socket,
+		    const struct rte_eth_txconf *conf __rte_unused)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	struct mrvl_txq *txq;
+
+	if (dev->data->tx_queues[idx]) {
+		rte_free(dev->data->tx_queues[idx]);
+		dev->data->tx_queues[idx] = NULL;
+	}
+
+	txq = rte_zmalloc_socket("txq", sizeof(*txq), 0, socket);
+	if (!txq)
+		return -ENOMEM;
+
+	txq->priv = priv;
+	txq->queue_id = idx;
+	txq->port_id = dev->data->port_id;
+	dev->data->tx_queues[idx] = txq;
+
+	priv->ppio_params.outqs_params.outqs_params[idx].size = desc;
+	priv->ppio_params.outqs_params.outqs_params[idx].weight = 1;
+
+	return 0;
+}
+
+/**
+ * DPDK callback to release the transmit queue.
+ *
+ * @param txq
+ *   Generic transmit queue pointer.
+ */
+static void
+mrvl_tx_queue_release(void *txq)
+{
+	struct mrvl_txq *q = txq;
+
+	if (!q)
+		return;
+
+	rte_free(q);
+}
+
+/**
+ * Update RSS hash configuration
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param rss_conf
+ *   Pointer to RSS configuration.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_rss_hash_update(struct rte_eth_dev *dev,
+		     struct rte_eth_rss_conf *rss_conf)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+
+	return mrvl_configure_rss(priv, rss_conf);
+}
+
+/**
+ * DPDK callback to get RSS hash configuration.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @rss_conf
+ *   Pointer to RSS configuration.
+ *
+ * @return
+ *   Always 0.
+ */
+static int
+mrvl_rss_hash_conf_get(struct rte_eth_dev *dev,
+		       struct rte_eth_rss_conf *rss_conf)
+{
+	struct mrvl_priv *priv = dev->data->dev_private;
+	enum pp2_ppio_hash_type hash_type =
+		priv->ppio_params.inqs_params.hash_type;
+
+	rss_conf->rss_key = NULL;
+
+	if (hash_type == PP2_PPIO_HASH_T_NONE)
+		rss_conf->rss_hf = 0;
+	else if (hash_type == PP2_PPIO_HASH_T_2_TUPLE)
+		rss_conf->rss_hf = ETH_RSS_IPV4;
+	else if (hash_type == PP2_PPIO_HASH_T_5_TUPLE && priv->rss_hf_tcp)
+		rss_conf->rss_hf = ETH_RSS_NONFRAG_IPV4_TCP;
+	else if (hash_type == PP2_PPIO_HASH_T_5_TUPLE && !priv->rss_hf_tcp)
+		rss_conf->rss_hf = ETH_RSS_NONFRAG_IPV4_UDP;
+
+	return 0;
+}
+
+static const struct eth_dev_ops mrvl_ops = {
+	.dev_configure = mrvl_dev_configure,
+	.dev_start = mrvl_dev_start,
+	.dev_stop = mrvl_dev_stop,
+	.dev_set_link_up = mrvl_dev_set_link_up,
+	.dev_set_link_down = mrvl_dev_set_link_down,
+	.dev_close = mrvl_dev_close,
+	.link_update = mrvl_link_update,
+	.promiscuous_enable = mrvl_promiscuous_enable,
+	.allmulticast_enable = mrvl_allmulticast_enable,
+	.promiscuous_disable = mrvl_promiscuous_disable,
+	.allmulticast_disable = mrvl_allmulticast_disable,
+	.mac_addr_remove = mrvl_mac_addr_remove,
+	.mac_addr_add = mrvl_mac_addr_add,
+	.mac_addr_set = mrvl_mac_addr_set,
+	.mtu_set = mrvl_mtu_set,
+	.stats_get = mrvl_stats_get,
+	.stats_reset = mrvl_stats_reset,
+	.dev_infos_get = mrvl_dev_infos_get,
+	.dev_supported_ptypes_get = mrvl_dev_supported_ptypes_get,
+	.rxq_info_get = mrvl_rxq_info_get,
+	.txq_info_get = mrvl_txq_info_get,
+	.vlan_filter_set = mrvl_vlan_filter_set,
+	.rx_queue_setup = mrvl_rx_queue_setup,
+	.rx_queue_release = mrvl_rx_queue_release,
+	.tx_queue_setup = mrvl_tx_queue_setup,
+	.tx_queue_release = mrvl_tx_queue_release,
+	.rss_hash_update = mrvl_rss_hash_update,
+	.rss_hash_conf_get = mrvl_rss_hash_conf_get,
+};
+
+/**
+ * Return packet type information and l3/l4 offsets.
+ *
+ * @param desc
+ *   Pointer to the received packet descriptor.
+ * @param l3_offset
+ *   l3 packet offset.
+ * @param l4_offset
+ *   l4 packet offset.
+ *
+ * @return
+ *   Packet type information.
+ */
+static inline uint64_t
+mrvl_desc_to_packet_type_and_offset(struct pp2_ppio_desc *desc,
+				    uint8_t *l3_offset, uint8_t *l4_offset)
+{
+	enum pp2_inq_l3_type l3_type;
+	enum pp2_inq_l4_type l4_type;
+	uint64_t packet_type;
+
+	pp2_ppio_inq_desc_get_l3_info(desc, &l3_type, l3_offset);
+	pp2_ppio_inq_desc_get_l4_info(desc, &l4_type, l4_offset);
+
+	packet_type = RTE_PTYPE_L2_ETHER;
+
+	switch (l3_type) {
+	case PP2_INQ_L3_TYPE_IPV4_NO_OPTS:
+		packet_type |= RTE_PTYPE_L3_IPV4;
+		break;
+	case PP2_INQ_L3_TYPE_IPV4_OK:
+		packet_type |= RTE_PTYPE_L3_IPV4_EXT;
+		break;
+	case PP2_INQ_L3_TYPE_IPV4_TTL_ZERO:
+		packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+		break;
+	case PP2_INQ_L3_TYPE_IPV6_NO_EXT:
+		packet_type |= RTE_PTYPE_L3_IPV6;
+		break;
+	case PP2_INQ_L3_TYPE_IPV6_EXT:
+		packet_type |= RTE_PTYPE_L3_IPV6_EXT;
+		break;
+	case PP2_INQ_L3_TYPE_ARP:
+		packet_type |= RTE_PTYPE_L2_ETHER_ARP;
+		/*
+		 * In case of ARP l4_offset is set to wrong value.
+		 * Set it to proper one so that later on mbuf->l3_len can be
+		 * calculated subtracting l4_offset and l3_offset.
+		 */
+		*l4_offset = *l3_offset + MRVL_ARP_LENGTH;
+		break;
+	default:
+		RTE_LOG(DEBUG, PMD, "Failed to recognise l3 packet type\n");
+		break;
+	}
+
+	switch (l4_type) {
+	case PP2_INQ_L4_TYPE_TCP:
+		packet_type |= RTE_PTYPE_L4_TCP;
+		break;
+	case PP2_INQ_L4_TYPE_UDP:
+		packet_type |= RTE_PTYPE_L4_UDP;
+		break;
+	default:
+		RTE_LOG(DEBUG, PMD, "Failed to recognise l4 packet type\n");
+		break;
+	}
+
+	return packet_type;
+}
+
+/**
+ * Get offload information from the received packet descriptor.
+ *
+ * @param desc
+ *   Pointer to the received packet descriptor.
+ *
+ * @return
+ *   Mbuf offload flags.
+ */
+static inline uint64_t
+mrvl_desc_to_ol_flags(struct pp2_ppio_desc *desc)
+{
+	uint64_t flags;
+	enum pp2_inq_desc_status status;
+
+	status = pp2_ppio_inq_desc_get_l3_pkt_error(desc);
+	if (unlikely(status != PP2_DESC_ERR_OK))
+		flags = PKT_RX_IP_CKSUM_BAD;
+	else
+		flags = PKT_RX_IP_CKSUM_GOOD;
+
+	status = pp2_ppio_inq_desc_get_l4_pkt_error(desc);
+	if (unlikely(status != PP2_DESC_ERR_OK))
+		flags |= PKT_RX_L4_CKSUM_BAD;
+	else
+		flags |= PKT_RX_L4_CKSUM_GOOD;
+
+	return flags;
+}
+
+/**
+ * DPDK callback for receive.
+ *
+ * @param rxq
+ *   Generic pointer to the receive queue.
+ * @param rx_pkts
+ *   Array to store received packets.
+ * @param nb_pkts
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully received.
+ */
+static uint16_t
+mrvl_rx_pkt_burst(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	struct mrvl_rxq *q = rxq;
+	struct pp2_ppio_desc descs[nb_pkts];
+	struct pp2_bpool *bpool;
+	int i, ret, rx_done = 0;
+	int num;
+	unsigned int core_id = rte_lcore_id();
+
+	if (unlikely(!q->priv->ppio))
+		return 0;
+
+	bpool = q->priv->bpool;
+
+	ret = pp2_ppio_recv(q->priv->ppio, q->priv->rxq_map[q->queue_id].tc,
+			    q->priv->rxq_map[q->queue_id].inq, descs, &nb_pkts);
+	if (unlikely(ret < 0)) {
+		RTE_LOG(ERR, PMD, "Failed to receive packets\n");
+		return 0;
+	}
+	mrvl_port_bpool_size[bpool->pp2_id][bpool->id][core_id] -= nb_pkts;
+
+	for (i = 0; i < nb_pkts; i++) {
+		struct rte_mbuf *mbuf;
+		uint8_t l3_offset, l4_offset;
+		enum pp2_inq_desc_status status;
+		uint64_t addr;
+
+		if (likely(nb_pkts - i > MRVL_MUSDK_PREFETCH_SHIFT)) {
+			struct pp2_ppio_desc *pref_desc;
+			u64 pref_addr;
+
+			pref_desc = &descs[i + MRVL_MUSDK_PREFETCH_SHIFT];
+			pref_addr = cookie_addr_high |
+				    pp2_ppio_inq_desc_get_cookie(pref_desc);
+			rte_mbuf_prefetch_part1((struct rte_mbuf *)(pref_addr));
+			rte_mbuf_prefetch_part2((struct rte_mbuf *)(pref_addr));
+		}
+
+		addr = cookie_addr_high |
+		       pp2_ppio_inq_desc_get_cookie(&descs[i]);
+		mbuf = (struct rte_mbuf *)addr;
+		rte_pktmbuf_reset(mbuf);
+
+		/* drop packet in case of mac, overrun or resource error */
+		status = pp2_ppio_inq_desc_get_l2_pkt_error(&descs[i]);
+		if (unlikely(status != PP2_DESC_ERR_OK)) {
+			struct pp2_buff_inf binf = {
+				.addr = rte_mbuf_data_iova_default(mbuf),
+				.cookie = (pp2_cookie_t)(uint64_t)mbuf,
+			};
+
+			pp2_bpool_put_buff(hifs[core_id], bpool, &binf);
+			mrvl_port_bpool_size
+				[bpool->pp2_id][bpool->id][core_id]++;
+			q->drop_mac++;
+			continue;
+		}
+
+		mbuf->data_off += MRVL_PKT_EFFEC_OFFS;
+		mbuf->pkt_len = pp2_ppio_inq_desc_get_pkt_len(&descs[i]);
+		mbuf->data_len = mbuf->pkt_len;
+		mbuf->port = q->port_id;
+		mbuf->packet_type =
+			mrvl_desc_to_packet_type_and_offset(&descs[i],
+							    &l3_offset,
+							    &l4_offset);
+		mbuf->l2_len = l3_offset;
+		mbuf->l3_len = l4_offset - l3_offset;
+
+		if (likely(q->cksum_enabled))
+			mbuf->ol_flags = mrvl_desc_to_ol_flags(&descs[i]);
+
+		rx_pkts[rx_done++] = mbuf;
+		q->bytes_recv += mbuf->pkt_len;
+	}
+
+	if (rte_spinlock_trylock(&q->priv->lock) == 1) {
+		num = mrvl_get_bpool_size(bpool->pp2_id, bpool->id);
+
+		if (unlikely(num <= q->priv->bpool_min_size ||
+			     (!rx_done && num < q->priv->bpool_init_size))) {
+			ret = mrvl_fill_bpool(q, MRVL_BURST_SIZE);
+			if (ret)
+				RTE_LOG(ERR, PMD, "Failed to fill bpool\n");
+		} else if (unlikely(num > q->priv->bpool_max_size)) {
+			int i;
+			int pkt_to_remove = num - q->priv->bpool_init_size;
+			struct rte_mbuf *mbuf;
+			struct pp2_buff_inf buff;
+
+			RTE_LOG(DEBUG, PMD,
+				"\nport-%d:%d: bpool %d oversize - remove %d buffers (pool size: %d -> %d)\n",
+				bpool->pp2_id, q->priv->ppio->port_id,
+				bpool->id, pkt_to_remove, num,
+				q->priv->bpool_init_size);
+
+			for (i = 0; i < pkt_to_remove; i++) {
+				pp2_bpool_get_buff(hifs[core_id], bpool, &buff);
+				mbuf = (struct rte_mbuf *)
+					(cookie_addr_high | buff.cookie);
+				rte_pktmbuf_free(mbuf);
+			}
+			mrvl_port_bpool_size
+				[bpool->pp2_id][bpool->id][core_id] -=
+								pkt_to_remove;
+		}
+		rte_spinlock_unlock(&q->priv->lock);
+	}
+
+	return rx_done;
+}
+
+/**
+ * Prepare offload information.
+ *
+ * @param ol_flags
+ *   Offload flags.
+ * @param packet_type
+ *   Packet type bitfield.
+ * @param l3_type
+ *   Pointer to the pp2_ouq_l3_type structure.
+ * @param l4_type
+ *   Pointer to the pp2_outq_l4_type structure.
+ * @param gen_l3_cksum
+ *   Will be set to 1 in case l3 checksum is computed.
+ * @param l4_cksum
+ *   Will be set to 1 in case l4 checksum is computed.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static inline int
+mrvl_prepare_proto_info(uint64_t ol_flags, uint32_t packet_type,
+			enum pp2_outq_l3_type *l3_type,
+			enum pp2_outq_l4_type *l4_type,
+			int *gen_l3_cksum,
+			int *gen_l4_cksum)
+{
+	/*
+	 * Based on ol_flags prepare information
+	 * for pp2_ppio_outq_desc_set_proto_info() which setups descriptor
+	 * for offloading.
+	 */
+	if (ol_flags & PKT_TX_IPV4) {
+		*l3_type = PP2_OUTQ_L3_TYPE_IPV4;
+		*gen_l3_cksum = ol_flags & PKT_TX_IP_CKSUM ? 1 : 0;
+	} else if (ol_flags & PKT_TX_IPV6) {
+		*l3_type = PP2_OUTQ_L3_TYPE_IPV6;
+		/* no checksum for ipv6 header */
+		*gen_l3_cksum = 0;
+	} else {
+		/* if something different then stop processing */
+		return -1;
+	}
+
+	ol_flags &= PKT_TX_L4_MASK;
+	if ((packet_type & RTE_PTYPE_L4_TCP) &&
+	    ol_flags == PKT_TX_TCP_CKSUM) {
+		*l4_type = PP2_OUTQ_L4_TYPE_TCP;
+		*gen_l4_cksum = 1;
+	} else if ((packet_type & RTE_PTYPE_L4_UDP) &&
+		   ol_flags == PKT_TX_UDP_CKSUM) {
+		*l4_type = PP2_OUTQ_L4_TYPE_UDP;
+		*gen_l4_cksum = 1;
+	} else {
+		*l4_type = PP2_OUTQ_L4_TYPE_OTHER;
+		/* no checksum for other type */
+		*gen_l4_cksum = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * Release already sent buffers to bpool (buffer-pool).
+ *
+ * @param ppio
+ *   Pointer to the port structure.
+ * @param hif
+ *   Pointer to the MUSDK hardware interface.
+ * @param sq
+ *   Pointer to the shadow queue.
+ * @param qid
+ *   Queue id number.
+ * @param force
+ *   Force releasing packets.
+ */
+static inline void
+mrvl_free_sent_buffers(struct pp2_ppio *ppio, struct pp2_hif *hif,
+		       struct mrvl_shadow_txq *sq, int qid, int force)
+{
+	struct buff_release_entry *entry;
+	uint16_t nb_done = 0, num = 0, skip_bufs = 0;
+	int i, core_id = rte_lcore_id();
+
+	pp2_ppio_get_num_outq_done(ppio, hif, qid, &nb_done);
+
+	sq->num_to_release += nb_done;
+
+	if (likely(!force &&
+		   sq->num_to_release < MRVL_PP2_BUF_RELEASE_BURST_SIZE))
+		return;
+
+	nb_done = sq->num_to_release;
+	sq->num_to_release = 0;
+
+	for (i = 0; i < nb_done; i++) {
+		entry = &sq->ent[sq->tail + num];
+		if (unlikely(!entry->buff.addr)) {
+			RTE_LOG(ERR, PMD,
+				"Shadow memory @%d: cookie(%lx), pa(%lx)!\n",
+				sq->tail, (u64)entry->buff.cookie,
+				(u64)entry->buff.addr);
+			skip_bufs = 1;
+			goto skip;
+		}
+
+		if (unlikely(!entry->bpool)) {
+			struct rte_mbuf *mbuf;
+
+			mbuf = (struct rte_mbuf *)
+			       (cookie_addr_high | entry->buff.cookie);
+			rte_pktmbuf_free(mbuf);
+			skip_bufs = 1;
+			goto skip;
+		}
+
+		mrvl_port_bpool_size
+			[entry->bpool->pp2_id][entry->bpool->id][core_id]++;
+		num++;
+		if (unlikely(sq->tail + num == MRVL_PP2_TX_SHADOWQ_SIZE))
+			goto skip;
+		continue;
+skip:
+		if (likely(num))
+			pp2_bpool_put_buffs(hif, &sq->ent[sq->tail], &num);
+		num += skip_bufs;
+		sq->tail = (sq->tail + num) & MRVL_PP2_TX_SHADOWQ_MASK;
+		sq->size -= num;
+		num = 0;
+	}
+
+	if (likely(num)) {
+		pp2_bpool_put_buffs(hif, &sq->ent[sq->tail], &num);
+		sq->tail = (sq->tail + num) & MRVL_PP2_TX_SHADOWQ_MASK;
+		sq->size -= num;
+	}
+}
+
+/**
+ * DPDK callback for transmit.
+ *
+ * @param txq
+ *   Generic pointer transmit queue.
+ * @param tx_pkts
+ *   Packets to transmit.
+ * @param nb_pkts
+ *   Number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully transmitted.
+ */
+static uint16_t
+mrvl_tx_pkt_burst(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct mrvl_txq *q = txq;
+	struct mrvl_shadow_txq *sq = &shadow_txqs[q->port_id][rte_lcore_id()];
+	struct pp2_hif *hif = hifs[rte_lcore_id()];
+	struct pp2_ppio_desc descs[nb_pkts];
+	int i, ret, bytes_sent = 0;
+	uint16_t num, sq_free_size;
+	uint64_t addr;
+
+	if (unlikely(!q->priv->ppio))
+		return 0;
+
+	if (sq->size)
+		mrvl_free_sent_buffers(q->priv->ppio, hif, sq, q->queue_id, 0);
+
+	sq_free_size = MRVL_PP2_TX_SHADOWQ_SIZE - sq->size - 1;
+	if (unlikely(nb_pkts > sq_free_size)) {
+		RTE_LOG(DEBUG, PMD,
+			"No room in shadow queue for %d packets! %d packets will be sent.\n",
+			nb_pkts, sq_free_size);
+		nb_pkts = sq_free_size;
+	}
+
+	for (i = 0; i < nb_pkts; i++) {
+		struct rte_mbuf *mbuf = tx_pkts[i];
+		int gen_l3_cksum, gen_l4_cksum;
+		enum pp2_outq_l3_type l3_type;
+		enum pp2_outq_l4_type l4_type;
+
+		if (likely(nb_pkts - i > MRVL_MUSDK_PREFETCH_SHIFT)) {
+			struct rte_mbuf *pref_pkt_hdr;
+
+			pref_pkt_hdr = tx_pkts[i + MRVL_MUSDK_PREFETCH_SHIFT];
+			rte_mbuf_prefetch_part1(pref_pkt_hdr);
+			rte_mbuf_prefetch_part2(pref_pkt_hdr);
+		}
+
+		sq->ent[sq->head].buff.cookie = (pp2_cookie_t)(uint64_t)mbuf;
+		sq->ent[sq->head].buff.addr =
+			rte_mbuf_data_iova_default(mbuf);
+		sq->ent[sq->head].bpool =
+			(unlikely(mbuf->port == 0xff || mbuf->refcnt > 1)) ?
+			 NULL : mrvl_port_to_bpool_lookup[mbuf->port];
+		sq->head = (sq->head + 1) & MRVL_PP2_TX_SHADOWQ_MASK;
+		sq->size++;
+
+		pp2_ppio_outq_desc_reset(&descs[i]);
+		pp2_ppio_outq_desc_set_phys_addr(&descs[i],
+						 rte_pktmbuf_iova(mbuf));
+		pp2_ppio_outq_desc_set_pkt_offset(&descs[i], 0);
+		pp2_ppio_outq_desc_set_pkt_len(&descs[i],
+					       rte_pktmbuf_pkt_len(mbuf));
+
+		bytes_sent += rte_pktmbuf_pkt_len(mbuf);
+		/*
+		 * in case unsupported ol_flags were passed
+		 * do not update descriptor offload information
+		 */
+		ret = mrvl_prepare_proto_info(mbuf->ol_flags, mbuf->packet_type,
+					      &l3_type, &l4_type, &gen_l3_cksum,
+					      &gen_l4_cksum);
+		if (unlikely(ret))
+			continue;
+
+		pp2_ppio_outq_desc_set_proto_info(&descs[i], l3_type, l4_type,
+						  mbuf->l2_len,
+						  mbuf->l2_len + mbuf->l3_len,
+						  gen_l3_cksum, gen_l4_cksum);
+	}
+
+	num = nb_pkts;
+	pp2_ppio_send(q->priv->ppio, hif, q->queue_id, descs, &nb_pkts);
+	/* number of packets that were not sent */
+	if (unlikely(num > nb_pkts)) {
+		for (i = nb_pkts; i < num; i++) {
+			sq->head = (MRVL_PP2_TX_SHADOWQ_SIZE + sq->head - 1) &
+				MRVL_PP2_TX_SHADOWQ_MASK;
+			addr = cookie_addr_high | sq->ent[sq->head].buff.cookie;
+			bytes_sent -=
+				rte_pktmbuf_pkt_len((struct rte_mbuf *)addr);
+		}
+		sq->size -= num - nb_pkts;
+	}
+
+	q->bytes_sent += bytes_sent;
+
+	return nb_pkts;
+}
+
+/**
+ * Initialize packet processor.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_init_pp2(void)
+{
+	struct pp2_init_params init_params;
+
+	memset(&init_params, 0, sizeof(init_params));
+	init_params.hif_reserved_map = MRVL_MUSDK_HIFS_RESERVED;
+	init_params.bm_pool_reserved_map = MRVL_MUSDK_BPOOLS_RESERVED;
+	init_params.rss_tbl_reserved_map = MRVL_MUSDK_RSS_RESERVED;
+
+	return pp2_init(&init_params);
+}
+
+/**
+ * Deinitialize packet processor.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static void
+mrvl_deinit_pp2(void)
+{
+	pp2_deinit();
+}
+
+/**
+ * Create private device structure.
+ *
+ * @param dev_name
+ *   Pointer to the port name passed in the initialization parameters.
+ *
+ * @return
+ *   Pointer to the newly allocated private device structure.
+ */
+static struct mrvl_priv *
+mrvl_priv_create(const char *dev_name)
+{
+	struct pp2_bpool_params bpool_params;
+	char match[MRVL_MATCH_LEN];
+	struct mrvl_priv *priv;
+	int ret, bpool_bit;
+
+	priv = rte_zmalloc_socket(dev_name, sizeof(*priv), 0, rte_socket_id());
+	if (!priv)
+		return NULL;
+
+	ret = pp2_netdev_get_ppio_info((char *)(uintptr_t)dev_name,
+				       &priv->pp_id, &priv->ppio_id);
+	if (ret)
+		goto out_free_priv;
+
+	bpool_bit = mrvl_reserve_bit(&used_bpools[priv->pp_id],
+				     PP2_BPOOL_NUM_POOLS);
+	if (bpool_bit < 0)
+		goto out_free_priv;
+	priv->bpool_bit = bpool_bit;
+
+	snprintf(match, sizeof(match), "pool-%d:%d", priv->pp_id,
+		 priv->bpool_bit);
+	memset(&bpool_params, 0, sizeof(bpool_params));
+	bpool_params.match = match;
+	bpool_params.buff_len = MRVL_PKT_SIZE_MAX + MRVL_PKT_EFFEC_OFFS;
+	ret = pp2_bpool_init(&bpool_params, &priv->bpool);
+	if (ret)
+		goto out_clear_bpool_bit;
+
+	priv->ppio_params.type = PP2_PPIO_T_NIC;
+	rte_spinlock_init(&priv->lock);
+
+	return priv;
+out_clear_bpool_bit:
+	used_bpools[priv->pp_id] &= ~(1 << priv->bpool_bit);
+out_free_priv:
+	rte_free(priv);
+	return NULL;
+}
+
+/**
+ * Create device representing Ethernet port.
+ *
+ * @param name
+ *   Pointer to the port's name.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_eth_dev_create(struct rte_vdev_device *vdev, const char *name)
+{
+	int ret, fd = socket(AF_INET, SOCK_DGRAM, 0);
+	struct rte_eth_dev *eth_dev;
+	struct mrvl_priv *priv;
+	struct ifreq req;
+
+	eth_dev = rte_eth_dev_allocate(name);
+	if (!eth_dev)
+		return -ENOMEM;
+
+	priv = mrvl_priv_create(name);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto out_free_dev;
+	}
+
+	eth_dev->data->mac_addrs =
+		rte_zmalloc("mac_addrs",
+			    ETHER_ADDR_LEN * MRVL_MAC_ADDRS_MAX, 0);
+	if (!eth_dev->data->mac_addrs) {
+		RTE_LOG(ERR, PMD, "Failed to allocate space for eth addrs\n");
+		ret = -ENOMEM;
+		goto out_free_priv;
+	}
+
+	memset(&req, 0, sizeof(req));
+	strcpy(req.ifr_name, name);
+	ret = ioctl(fd, SIOCGIFHWADDR, &req);
+	if (ret)
+		goto out_free_mac;
+
+	memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
+	       req.ifr_addr.sa_data, ETHER_ADDR_LEN);
+
+	eth_dev->rx_pkt_burst = mrvl_rx_pkt_burst;
+	eth_dev->tx_pkt_burst = mrvl_tx_pkt_burst;
+	eth_dev->data->dev_private = priv;
+	eth_dev->device = &vdev->device;
+	eth_dev->dev_ops = &mrvl_ops;
+
+	return 0;
+out_free_mac:
+	rte_free(eth_dev->data->mac_addrs);
+out_free_dev:
+	rte_eth_dev_release_port(eth_dev);
+out_free_priv:
+	rte_free(priv);
+
+	return ret;
+}
+
+/**
+ * Cleanup previously created device representing Ethernet port.
+ *
+ * @param name
+ *   Pointer to the port name.
+ */
+static void
+mrvl_eth_dev_destroy(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct mrvl_priv *priv;
+
+	eth_dev = rte_eth_dev_allocated(name);
+	if (!eth_dev)
+		return;
+
+	priv = eth_dev->data->dev_private;
+	pp2_bpool_deinit(priv->bpool);
+	rte_free(priv);
+	rte_free(eth_dev->data->mac_addrs);
+	rte_eth_dev_release_port(eth_dev);
+}
+
+/**
+ * Callback used by rte_kvargs_process() during argument parsing.
+ *
+ * @param key
+ *   Pointer to the parsed key (unused).
+ * @param value
+ *   Pointer to the parsed value.
+ * @param extra_args
+ *   Pointer to the extra arguments which contains address of the
+ *   table of pointers to parsed interface names.
+ *
+ * @return
+ *   Always 0.
+ */
+static int
+mrvl_get_ifnames(const char *key __rte_unused, const char *value,
+		 void *extra_args)
+{
+	const char **ifnames = extra_args;
+
+	ifnames[mrvl_ports_nb++] = value;
+
+	return 0;
+}
+
+/**
+ * Initialize per-lcore MUSDK hardware interfaces (hifs).
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mrvl_init_hifs(void)
+{
+	struct pp2_hif_params params;
+	char match[MRVL_MATCH_LEN];
+	int i, ret;
+
+	RTE_LCORE_FOREACH(i) {
+		ret = mrvl_reserve_bit(&used_hifs, MRVL_MUSDK_HIFS_MAX);
+		if (ret < 0)
+			return ret;
+
+		snprintf(match, sizeof(match), "hif-%d", ret);
+		memset(&params, 0, sizeof(params));
+		params.match = match;
+		params.out_size = MRVL_PP2_AGGR_TXQD_MAX;
+		ret = pp2_hif_init(&params, &hifs[i]);
+		if (ret) {
+			RTE_LOG(ERR, PMD, "Failed to initialize hif %d\n", i);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * Deinitialize per-lcore MUSDK hardware interfaces (hifs).
+ */
+static void
+mrvl_deinit_hifs(void)
+{
+	int i;
+
+	RTE_LCORE_FOREACH(i) {
+		if (hifs[i])
+			pp2_hif_deinit(hifs[i]);
+	}
+}
+
+static void mrvl_set_first_last_cores(int core_id)
+{
+	if (core_id < mrvl_lcore_first)
+		mrvl_lcore_first = core_id;
+
+	if (core_id > mrvl_lcore_last)
+		mrvl_lcore_last = core_id;
+}
+
+/**
+ * DPDK callback to register the virtual device.
+ *
+ * @param vdev
+ *   Pointer to the virtual device.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+rte_pmd_mrvl_probe(struct rte_vdev_device *vdev)
+{
+	struct rte_kvargs *kvlist;
+	const char *ifnames[PP2_NUM_ETH_PPIO * PP2_NUM_PKT_PROC];
+	int ret = -EINVAL;
+	uint32_t i, ifnum, cfgnum, core_id;
+	const char *params;
+
+	params = rte_vdev_device_args(vdev);
+	if (!params)
+		return -EINVAL;
+
+	kvlist = rte_kvargs_parse(params, valid_args);
+	if (!kvlist)
+		return -EINVAL;
+
+	ifnum = rte_kvargs_count(kvlist, MRVL_IFACE_NAME_ARG);
+	if (ifnum > RTE_DIM(ifnames))
+		goto out_free_kvlist;
+
+	rte_kvargs_process(kvlist, MRVL_IFACE_NAME_ARG,
+			   mrvl_get_ifnames, &ifnames);
+
+	cfgnum = rte_kvargs_count(kvlist, MRVL_CFG_ARG);
+	if (cfgnum > 1) {
+		RTE_LOG(ERR, PMD, "Cannot handle more than one config file!\n");
+		goto out_free_kvlist;
+	} else if (cfgnum == 1) {
+		rte_kvargs_process(kvlist, MRVL_CFG_ARG,
+				   mrvl_get_qoscfg, &mrvl_qos_cfg);
+	}
+
+	/*
+	 * ret == -EEXIST is correct, it means DMA
+	 * has been already initialized (by another PMD).
+	 */
+	ret = mv_sys_dma_mem_init(MRVL_MUSDK_DMA_MEMSIZE);
+	if (ret < 0) {
+		if (ret != -EEXIST)
+			goto out_free_kvlist;
+		else
+			RTE_LOG(INFO, PMD,
+				"DMA memory has been already initialized by a different driver.\n");
+	}
+
+	ret = mrvl_init_pp2();
+	if (ret) {
+		RTE_LOG(ERR, PMD, "Failed to init PP!\n");
+		goto out_deinit_dma;
+	}
+
+	ret = mrvl_init_hifs();
+	if (ret)
+		goto out_deinit_hifs;
+
+	for (i = 0; i < ifnum; i++) {
+		RTE_LOG(INFO, PMD, "Creating %s\n", ifnames[i]);
+		ret = mrvl_eth_dev_create(vdev, ifnames[i]);
+		if (ret)
+			goto out_cleanup;
+	}
+
+	rte_kvargs_free(kvlist);
+
+	memset(mrvl_port_bpool_size, 0, sizeof(mrvl_port_bpool_size));
+
+	mrvl_lcore_first = RTE_MAX_LCORE;
+	mrvl_lcore_last = 0;
+
+	RTE_LCORE_FOREACH(core_id) {
+		mrvl_set_first_last_cores(core_id);
+	}
+
+	return 0;
+out_cleanup:
+	for (; i > 0; i--)
+		mrvl_eth_dev_destroy(ifnames[i]);
+out_deinit_hifs:
+	mrvl_deinit_hifs();
+	mrvl_deinit_pp2();
+out_deinit_dma:
+	mv_sys_dma_mem_destroy();
+out_free_kvlist:
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+/**
+ * DPDK callback to remove virtual device.
+ *
+ * @param vdev
+ *   Pointer to the removed virtual device.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+rte_pmd_mrvl_remove(struct rte_vdev_device *vdev)
+{
+	int i;
+	const char *name;
+
+	name = rte_vdev_device_name(vdev);
+	if (!name)
+		return -EINVAL;
+
+	RTE_LOG(INFO, PMD, "Removing %s\n", name);
+
+	for (i = 0; i < rte_eth_dev_count(); i++) {
+		char ifname[RTE_ETH_NAME_MAX_LEN];
+
+		rte_eth_dev_get_name_by_port(i, ifname);
+		mrvl_eth_dev_destroy(ifname);
+	}
+
+	mrvl_deinit_hifs();
+	mrvl_deinit_pp2();
+	mv_sys_dma_mem_destroy();
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_mrvl_drv = {
+	.probe = rte_pmd_mrvl_probe,
+	.remove = rte_pmd_mrvl_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_mrvl, pmd_mrvl_drv);
+RTE_PMD_REGISTER_ALIAS(net_mrvl, eth_mrvl);
diff --git a/drivers/net/mrvl/mrvl_ethdev.h b/drivers/net/mrvl/mrvl_ethdev.h
new file mode 100644
index 00000000..2a4ab5ab
--- /dev/null
+++ b/drivers/net/mrvl/mrvl_ethdev.h
@@ -0,0 +1,116 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Marvell International Ltd.
+ *   Copyright(c) 2017 Semihalf.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of the copyright holder nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MRVL_ETHDEV_H_
+#define _MRVL_ETHDEV_H_
+
+#include <rte_spinlock.h>
+#include <drivers/mv_pp2_cls.h>
+#include <drivers/mv_pp2_ppio.h>
+
+/** Maximum number of rx queues per port */
+#define MRVL_PP2_RXQ_MAX 32
+
+/** Maximum number of tx queues per port */
+#define MRVL_PP2_TXQ_MAX 8
+
+/** Minimum number of descriptors in tx queue */
+#define MRVL_PP2_TXD_MIN 16
+
+/** Maximum number of descriptors in tx queue */
+#define MRVL_PP2_TXD_MAX 2048
+
+/** Tx queue descriptors alignment */
+#define MRVL_PP2_TXD_ALIGN 16
+
+/** Minimum number of descriptors in rx queue */
+#define MRVL_PP2_RXD_MIN 16
+
+/** Maximum number of descriptors in rx queue */
+#define MRVL_PP2_RXD_MAX 2048
+
+/** Rx queue descriptors alignment */
+#define MRVL_PP2_RXD_ALIGN 16
+
+/** Maximum number of descriptors in tx aggregated queue */
+#define MRVL_PP2_AGGR_TXQD_MAX 2048
+
+/** Maximum number of Traffic Classes. */
+#define MRVL_PP2_TC_MAX 8
+
+/** Packet offset inside RX buffer. */
+#define MRVL_PKT_OFFS 64
+
+/** Maximum number of descriptors in shadow queue. Must be power of 2 */
+#define MRVL_PP2_TX_SHADOWQ_SIZE MRVL_PP2_TXD_MAX
+
+/** Shadow queue size mask (since shadow queue size is power of 2) */
+#define MRVL_PP2_TX_SHADOWQ_MASK (MRVL_PP2_TX_SHADOWQ_SIZE - 1)
+
+/** Minimum number of sent buffers to release from shadow queue to BM */
+#define MRVL_PP2_BUF_RELEASE_BURST_SIZE	64
+
+struct mrvl_priv {
+	/* Hot fields, used in fast path. */
+	struct pp2_bpool *bpool;  /**< BPool pointer */
+	struct pp2_ppio	*ppio;    /**< Port handler pointer */
+	rte_spinlock_t lock;	  /**< Spinlock for checking bpool status */
+	uint16_t bpool_max_size;  /**< BPool maximum size */
+	uint16_t bpool_min_size;  /**< BPool minimum size  */
+	uint16_t bpool_init_size; /**< Configured BPool size  */
+
+	/** Mapping for DPDK rx queue->(TC, MRVL relative inq) */
+	struct {
+		uint8_t tc;  /**< Traffic Class */
+		uint8_t inq; /**< Relative in-queue number */
+	} rxq_map[MRVL_PP2_RXQ_MAX] __rte_cache_aligned;
+
+	/* Configuration data, used sporadically. */
+	uint8_t pp_id;
+	uint8_t ppio_id;
+	uint8_t bpool_bit;
+	uint8_t rss_hf_tcp;
+	uint8_t uc_mc_flushed;
+	uint8_t vlan_flushed;
+
+	struct pp2_ppio_params ppio_params;
+	struct pp2_cls_qos_tbl_params qos_tbl_params;
+	struct pp2_cls_tbl *qos_tbl;
+	uint16_t nb_rx_queues;
+};
+
+/** Number of ports configured. */
+extern int mrvl_ports_nb;
+
+#endif /* _MRVL_ETHDEV_H_ */
diff --git a/drivers/net/mrvl/mrvl_qos.c b/drivers/net/mrvl/mrvl_qos.c
new file mode 100644
index 00000000..7c9943aa
--- /dev/null
+++ b/drivers/net/mrvl/mrvl_qos.c
@@ -0,0 +1,636 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Marvell International Ltd.
+ *   Copyright(c) 2017 Semihalf.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of the copyright holder nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_cfgfile.h>
+#include <rte_log.h>
+#include <rte_lcore.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+
+/* Unluckily, container_of is defined by both DPDK and MUSDK,
+ * we'll declare only one version.
+ *
+ * Note that it is not used in this PMD anyway.
+ */
+#ifdef container_of
+#undef container_of
+#endif
+
+#include "mrvl_qos.h"
+
+/* Parsing tokens. Defined conveniently, so that any correction is easy. */
+#define MRVL_TOK_DEFAULT "default"
+#define MRVL_TOK_DEFAULT_TC "default_tc"
+#define MRVL_TOK_DSCP "dscp"
+#define MRVL_TOK_MAPPING_PRIORITY "mapping_priority"
+#define MRVL_TOK_IP "ip"
+#define MRVL_TOK_IP_VLAN "ip/vlan"
+#define MRVL_TOK_PCP "pcp"
+#define MRVL_TOK_PORT "port"
+#define MRVL_TOK_RXQ "rxq"
+#define MRVL_TOK_SP "SP"
+#define MRVL_TOK_TC "tc"
+#define MRVL_TOK_TXQ "txq"
+#define MRVL_TOK_VLAN "vlan"
+#define MRVL_TOK_VLAN_IP "vlan/ip"
+#define MRVL_TOK_WEIGHT "weight"
+
+/** Number of tokens in range a-b = 2. */
+#define MAX_RNG_TOKENS 2
+
+/** Maximum possible value of PCP. */
+#define MAX_PCP 7
+
+/** Maximum possible value of DSCP. */
+#define MAX_DSCP 63
+
+/** Global QoS configuration. */
+struct mrvl_qos_cfg *mrvl_qos_cfg;
+
+/**
+ * Convert string to uint32_t with extra checks for result correctness.
+ *
+ * @param string String to convert.
+ * @param val Conversion result.
+ * @returns 0 in case of success, negative value otherwise.
+ */
+static int
+get_val_securely(const char *string, uint32_t *val)
+{
+	char *endptr;
+	size_t len = strlen(string);
+
+	if (len == 0)
+		return -1;
+
+	errno = 0;
+	*val = strtoul(string, &endptr, 0);
+	if (errno != 0 || RTE_PTR_DIFF(endptr, string) != len)
+		return -2;
+
+	return 0;
+}
+
+/**
+ * Read out-queue configuration from file.
+ *
+ * @param file Path to the configuration file.
+ * @param port Port number.
+ * @param outq Out queue number.
+ * @param cfg Pointer to the Marvell QoS configuration structure.
+ * @returns 0 in case of success, negative value otherwise.
+ */
+static int
+get_outq_cfg(struct rte_cfgfile *file, int port, int outq,
+		struct mrvl_qos_cfg *cfg)
+{
+	char sec_name[32];
+	const char *entry;
+	uint32_t val;
+
+	snprintf(sec_name, sizeof(sec_name), "%s %d %s %d",
+		MRVL_TOK_PORT, port, MRVL_TOK_TXQ, outq);
+
+	/* Skip non-existing */
+	if (rte_cfgfile_num_sections(file, sec_name, strlen(sec_name)) <= 0)
+		return 0;
+
+	entry = rte_cfgfile_get_entry(file, sec_name,
+			MRVL_TOK_WEIGHT);
+	if (entry) {
+		if (get_val_securely(entry, &val) < 0)
+			return -1;
+		cfg->port[port].outq[outq].weight = (uint8_t)val;
+	}
+
+	return 0;
+}
+
+/**
+ * Gets multiple-entry values and places them in table.
+ *
+ * Entry can be anything, e.g. "1 2-3 5 6 7-9". This needs to be converted to
+ * table entries, respectively: {1, 2, 3, 5, 6, 7, 8, 9}.
+ * As all result table's elements are always 1-byte long, we
+ * won't overcomplicate the function, but we'll keep API generic,
+ * check if someone hasn't changed element size and make it simple
+ * to extend to other sizes.
+ *
+ * This function is purely utilitary, it does not print any error, only returns
+ * different error numbers.
+ *
+ * @param entry[in] Values string to parse.
+ * @param tab[out] Results table.
+ * @param elem_sz[in] Element size (in bytes).
+ * @param max_elems[in] Number of results table elements available.
+ * @param max val[in] Maximum value allowed.
+ * @returns Number of correctly parsed elements in case of success.
+ * @retval -1 Wrong element size.
+ * @retval -2 More tokens than result table allows.
+ * @retval -3 Wrong range syntax.
+ * @retval -4 Wrong range values.
+ * @retval -5 Maximum value exceeded.
+ */
+static int
+get_entry_values(const char *entry, uint8_t *tab,
+	size_t elem_sz, uint8_t max_elems, uint8_t max_val)
+{
+	/* There should not be more tokens than max elements.
+	 * Add 1 for error trap.
+	 */
+	char *tokens[max_elems + 1];
+
+	/* Begin, End + error trap = 3. */
+	char *rng_tokens[MAX_RNG_TOKENS + 1];
+	long beg, end;
+	uint32_t token_val;
+	int nb_tokens, nb_rng_tokens;
+	int i;
+	int values = 0;
+	char val;
+	char entry_cpy[CFG_VALUE_LEN];
+
+	if (elem_sz != 1)
+		return -1;
+
+	/* Copy the entry to safely use rte_strsplit(). */
+	snprintf(entry_cpy, RTE_DIM(entry_cpy), "%s", entry);
+
+	/*
+	 * If there are more tokens than array size, rte_strsplit will
+	 * not return error, just array size.
+	 */
+	nb_tokens = rte_strsplit(entry_cpy, strlen(entry_cpy),
+		tokens, max_elems + 1, ' ');
+
+	/* Quick check, will be refined later. */
+	if (nb_tokens > max_elems)
+		return -2;
+
+	for (i = 0; i < nb_tokens; ++i) {
+		if (strchr(tokens[i], '-') != NULL) {
+			/*
+			 * Split to begin and end tokens.
+			 * We want to catch error cases too, thus we leave
+			 * option for number of tokens to be more than 2.
+			 */
+			nb_rng_tokens = rte_strsplit(tokens[i],
+					strlen(tokens[i]), rng_tokens,
+					RTE_DIM(rng_tokens), '-');
+			if (nb_rng_tokens != 2)
+				return -3;
+
+			/* Range and sanity checks. */
+			if (get_val_securely(rng_tokens[0], &token_val) < 0)
+				return -4;
+			beg = (char)token_val;
+			if (get_val_securely(rng_tokens[1], &token_val) < 0)
+				return -4;
+			end = (char)token_val;
+			if (beg < 0 || beg > UCHAR_MAX ||
+				end < 0 || end > UCHAR_MAX || end < beg)
+				return -4;
+
+			for (val = beg; val <= end; ++val) {
+				if (val > max_val)
+					return -5;
+
+				*tab = val;
+				tab = RTE_PTR_ADD(tab, elem_sz);
+				++values;
+				if (values >= max_elems)
+					return -2;
+			}
+		} else {
+			/* Single values. */
+			if (get_val_securely(tokens[i], &token_val) < 0)
+				return -5;
+			val = (char)token_val;
+			if (val > max_val)
+				return -5;
+
+			*tab = val;
+			tab = RTE_PTR_ADD(tab, elem_sz);
+			++values;
+			if (values >= max_elems)
+				return -2;
+		}
+	}
+
+	return values;
+}
+
+/**
+ * Parse Traffic Class'es mapping configuration.
+ *
+ * @param file Config file handle.
+ * @param port Which port to look for.
+ * @param tc Which Traffic Class to look for.
+ * @param cfg[out] Parsing results.
+ * @returns 0 in case of success, negative value otherwise.
+ */
+static int
+parse_tc_cfg(struct rte_cfgfile *file, int port, int tc,
+		struct mrvl_qos_cfg *cfg)
+{
+	char sec_name[32];
+	const char *entry;
+	int n;
+
+	snprintf(sec_name, sizeof(sec_name), "%s %d %s %d",
+		MRVL_TOK_PORT, port, MRVL_TOK_TC, tc);
+
+	/* Skip non-existing */
+	if (rte_cfgfile_num_sections(file, sec_name, strlen(sec_name)) <= 0)
+		return 0;
+
+	entry = rte_cfgfile_get_entry(file, sec_name, MRVL_TOK_RXQ);
+	if (entry) {
+		n = get_entry_values(entry,
+			cfg->port[port].tc[tc].inq,
+			sizeof(cfg->port[port].tc[tc].inq[0]),
+			RTE_DIM(cfg->port[port].tc[tc].inq),
+			MRVL_PP2_RXQ_MAX);
+		if (n < 0) {
+			RTE_LOG(ERR, PMD, "Error %d while parsing: %s\n",
+				n, entry);
+			return n;
+		}
+		cfg->port[port].tc[tc].inqs = n;
+	}
+
+	entry = rte_cfgfile_get_entry(file, sec_name, MRVL_TOK_PCP);
+	if (entry) {
+		n = get_entry_values(entry,
+			cfg->port[port].tc[tc].pcp,
+			sizeof(cfg->port[port].tc[tc].pcp[0]),
+			RTE_DIM(cfg->port[port].tc[tc].pcp),
+			MAX_PCP);
+		if (n < 0) {
+			RTE_LOG(ERR, PMD, "Error %d while parsing: %s\n",
+				n, entry);
+			return n;
+		}
+		cfg->port[port].tc[tc].pcps = n;
+	}
+
+	entry = rte_cfgfile_get_entry(file, sec_name, MRVL_TOK_DSCP);
+	if (entry) {
+		n = get_entry_values(entry,
+			cfg->port[port].tc[tc].dscp,
+			sizeof(cfg->port[port].tc[tc].dscp[0]),
+			RTE_DIM(cfg->port[port].tc[tc].dscp),
+			MAX_DSCP);
+		if (n < 0) {
+			RTE_LOG(ERR, PMD, "Error %d while parsing: %s\n",
+				n, entry);
+			return n;
+		}
+		cfg->port[port].tc[tc].dscps = n;
+	}
+	return 0;
+}
+
+/**
+ * Parse QoS configuration - rte_kvargs_process handler.
+ *
+ * Opens configuration file and parses its content.
+ *
+ * @param key Unused.
+ * @param path Path to config file.
+ * @param extra_args Pointer to configuration structure.
+ * @returns 0 in case of success, exits otherwise.
+ */
+int
+mrvl_get_qoscfg(const char *key __rte_unused, const char *path,
+		void *extra_args)
+{
+	struct mrvl_qos_cfg **cfg = extra_args;
+	struct rte_cfgfile *file = rte_cfgfile_load(path, 0);
+	uint32_t val;
+	int n, i, ret;
+	const char *entry;
+	char sec_name[32];
+
+	if (file == NULL)
+		rte_exit(EXIT_FAILURE, "Cannot load configuration %s\n", path);
+
+	/* Create configuration. This is never accessed on the fast path,
+	 * so we can ignore socket.
+	 */
+	*cfg = rte_zmalloc("mrvl_qos_cfg", sizeof(struct mrvl_qos_cfg), 0);
+	if (*cfg == NULL)
+		rte_exit(EXIT_FAILURE, "Cannot allocate configuration %s\n",
+			path);
+
+	n = rte_cfgfile_num_sections(file, MRVL_TOK_PORT,
+		sizeof(MRVL_TOK_PORT) - 1);
+
+	if (n == 0) {
+		/* This is weird, but not bad. */
+		RTE_LOG(WARNING, PMD, "Empty configuration file?\n");
+		return 0;
+	}
+
+	/* Use the number of ports given as vdev parameters. */
+	for (n = 0; n < mrvl_ports_nb; ++n) {
+		snprintf(sec_name, sizeof(sec_name), "%s %d %s",
+			MRVL_TOK_PORT, n, MRVL_TOK_DEFAULT);
+
+		/* Skip ports non-existing in configuration. */
+		if (rte_cfgfile_num_sections(file, sec_name,
+				strlen(sec_name)) <= 0) {
+			(*cfg)->port[n].use_global_defaults = 1;
+			(*cfg)->port[n].mapping_priority =
+				PP2_CLS_QOS_TBL_VLAN_IP_PRI;
+			continue;
+		}
+
+		entry = rte_cfgfile_get_entry(file, sec_name,
+				MRVL_TOK_DEFAULT_TC);
+		if (entry) {
+			if (get_val_securely(entry, &val) < 0 ||
+				val > USHRT_MAX)
+				return -1;
+			(*cfg)->port[n].default_tc = (uint8_t)val;
+		} else {
+			RTE_LOG(ERR, PMD,
+				"Default Traffic Class required in custom configuration!\n");
+			return -1;
+		}
+
+		entry = rte_cfgfile_get_entry(file, sec_name,
+				MRVL_TOK_MAPPING_PRIORITY);
+		if (entry) {
+			if (!strncmp(entry, MRVL_TOK_VLAN_IP,
+				sizeof(MRVL_TOK_VLAN_IP)))
+				(*cfg)->port[n].mapping_priority =
+					PP2_CLS_QOS_TBL_VLAN_IP_PRI;
+			else if (!strncmp(entry, MRVL_TOK_IP_VLAN,
+				sizeof(MRVL_TOK_IP_VLAN)))
+				(*cfg)->port[n].mapping_priority =
+					PP2_CLS_QOS_TBL_IP_VLAN_PRI;
+			else if (!strncmp(entry, MRVL_TOK_IP,
+				sizeof(MRVL_TOK_IP)))
+				(*cfg)->port[n].mapping_priority =
+					PP2_CLS_QOS_TBL_IP_PRI;
+			else if (!strncmp(entry, MRVL_TOK_VLAN,
+				sizeof(MRVL_TOK_VLAN)))
+				(*cfg)->port[n].mapping_priority =
+					PP2_CLS_QOS_TBL_VLAN_PRI;
+			else
+				rte_exit(EXIT_FAILURE,
+					"Error in parsing %s value (%s)!\n",
+					MRVL_TOK_MAPPING_PRIORITY, entry);
+		} else {
+			(*cfg)->port[n].mapping_priority =
+				PP2_CLS_QOS_TBL_VLAN_IP_PRI;
+		}
+
+		for (i = 0; i < MRVL_PP2_RXQ_MAX; ++i) {
+			ret = get_outq_cfg(file, n, i, *cfg);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+					"Error %d parsing port %d outq %d!\n",
+					ret, n, i);
+		}
+
+		for (i = 0; i < MRVL_PP2_TC_MAX; ++i) {
+			ret = parse_tc_cfg(file, n, i, *cfg);
+			if (ret < 0)
+				rte_exit(EXIT_FAILURE,
+					"Error %d parsing port %d tc %d!\n",
+					ret, n, i);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * Setup Traffic Class.
+ *
+ * Fill in TC parameters in single MUSDK TC config entry.
+ * @param param TC parameters entry.
+ * @param inqs Number of MUSDK in-queues in this TC.
+ * @param bpool Bpool for this TC.
+ * @returns 0 in case of success, exits otherwise.
+ */
+static int
+setup_tc(struct pp2_ppio_tc_params *param, uint8_t inqs,
+	struct pp2_bpool *bpool)
+{
+	struct pp2_ppio_inq_params *inq_params;
+
+	param->pkt_offset = MRVL_PKT_OFFS;
+	param->pools[0] = bpool;
+
+	inq_params = rte_zmalloc_socket("inq_params",
+		inqs * sizeof(*inq_params),
+		0, rte_socket_id());
+	if (!inq_params)
+		return -ENOMEM;
+
+	param->num_in_qs = inqs;
+
+	/* Release old config if necessary. */
+	if (param->inqs_params)
+		rte_free(param->inqs_params);
+
+	param->inqs_params = inq_params;
+
+	return 0;
+}
+
+/**
+ * Configure RX Queues in a given port.
+ *
+ * Sets up RX queues, their Traffic Classes and DPDK rxq->(TC,inq) mapping.
+ *
+ * @param priv Port's private data
+ * @param portid DPDK port ID
+ * @param max_queues Maximum number of queues to configure.
+ * @returns 0 in case of success, negative value otherwise.
+ */
+int
+mrvl_configure_rxqs(struct mrvl_priv *priv, uint16_t portid,
+	uint16_t max_queues)
+{
+	size_t i, tc;
+
+	if (mrvl_qos_cfg == NULL ||
+		mrvl_qos_cfg->port[portid].use_global_defaults) {
+		/* No port configuration, use default: 1 TC, no QoS. */
+		priv->ppio_params.inqs_params.num_tcs = 1;
+		setup_tc(&priv->ppio_params.inqs_params.tcs_params[0],
+			max_queues, priv->bpool);
+
+		/* Direct mapping of queues i.e. 0->0, 1->1 etc. */
+		for (i = 0; i < max_queues; ++i) {
+			priv->rxq_map[i].tc = 0;
+			priv->rxq_map[i].inq = i;
+		}
+		return 0;
+	}
+
+	/* We need only a subset of configuration. */
+	struct port_cfg *port_cfg = &mrvl_qos_cfg->port[portid];
+
+	priv->qos_tbl_params.type = port_cfg->mapping_priority;
+
+	/*
+	 * We need to reverse mapping, from tc->pcp (better from usability
+	 * point of view) to pcp->tc (configurable in MUSDK).
+	 * First, set all map elements to "default".
+	 */
+	for (i = 0; i < RTE_DIM(priv->qos_tbl_params.pcp_cos_map); ++i)
+		priv->qos_tbl_params.pcp_cos_map[i].tc = port_cfg->default_tc;
+
+	/* Then, fill in all known values. */
+	for (tc = 0; tc < RTE_DIM(port_cfg->tc); ++tc) {
+		if (port_cfg->tc[tc].pcps > RTE_DIM(port_cfg->tc[0].pcp)) {
+			/* Better safe than sorry. */
+			RTE_LOG(ERR, PMD,
+				"Too many PCPs configured in TC %zu!\n", tc);
+			return -1;
+		}
+		for (i = 0; i < port_cfg->tc[tc].pcps; ++i) {
+			priv->qos_tbl_params.pcp_cos_map[
+			  port_cfg->tc[tc].pcp[i]].tc = tc;
+		}
+	}
+
+	/*
+	 * The same logic goes with DSCP.
+	 * First, set all map elements to "default".
+	 */
+	for (i = 0; i < RTE_DIM(priv->qos_tbl_params.dscp_cos_map); ++i)
+		priv->qos_tbl_params.dscp_cos_map[i].tc =
+			port_cfg->default_tc;
+
+	/* Fill in all known values. */
+	for (tc = 0; tc < RTE_DIM(port_cfg->tc); ++tc) {
+		if (port_cfg->tc[tc].dscps > RTE_DIM(port_cfg->tc[0].dscp)) {
+			/* Better safe than sorry. */
+			RTE_LOG(ERR, PMD,
+				"Too many DSCPs configured in TC %zu!\n", tc);
+			return -1;
+		}
+		for (i = 0; i < port_cfg->tc[tc].dscps; ++i) {
+			priv->qos_tbl_params.dscp_cos_map[
+			  port_cfg->tc[tc].dscp[i]].tc = tc;
+		}
+	}
+
+	/*
+	 * Surprisingly, similar logic goes with queue mapping.
+	 * We need only to store qid->tc mapping,
+	 * to know TC when queue is read.
+	 */
+	for (i = 0; i < RTE_DIM(priv->rxq_map); ++i)
+		priv->rxq_map[i].tc = MRVL_UNKNOWN_TC;
+
+	/* Set up DPDKq->(TC,inq) mapping. */
+	for (tc = 0; tc < RTE_DIM(port_cfg->tc); ++tc) {
+		if (port_cfg->tc[tc].inqs > RTE_DIM(port_cfg->tc[0].inq)) {
+			/* Overflow. */
+			RTE_LOG(ERR, PMD,
+				"Too many RX queues configured per TC %zu!\n",
+				tc);
+			return -1;
+		}
+		for (i = 0; i < port_cfg->tc[tc].inqs; ++i) {
+			uint8_t idx = port_cfg->tc[tc].inq[i];
+
+			if (idx > RTE_DIM(priv->rxq_map)) {
+				RTE_LOG(ERR, PMD, "Bad queue index %d!\n", idx);
+				return -1;
+			}
+
+			priv->rxq_map[idx].tc = tc;
+			priv->rxq_map[idx].inq = i;
+		}
+	}
+
+	/*
+	 * Set up TC configuration. TCs need to be sequenced: 0, 1, 2
+	 * with no gaps. Empty TC means end of processing.
+	 */
+	for (i = 0; i < MRVL_PP2_TC_MAX; ++i) {
+		if (port_cfg->tc[i].inqs == 0)
+			break;
+		setup_tc(&priv->ppio_params.inqs_params.tcs_params[i],
+				port_cfg->tc[i].inqs,
+				priv->bpool);
+	}
+
+	priv->ppio_params.inqs_params.num_tcs = i;
+
+	return 0;
+}
+
+/**
+ * Start QoS mapping.
+ *
+ * Finalize QoS table configuration and initialize it in SDK. It can be done
+ * only after port is started, so we have a valid ppio reference.
+ *
+ * @param priv Port's private (configuration) data.
+ * @returns 0 in case of success, exits otherwise.
+ */
+int
+mrvl_start_qos_mapping(struct mrvl_priv *priv)
+{
+	size_t i;
+
+	if (priv->ppio == NULL) {
+		RTE_LOG(ERR, PMD, "ppio must not be NULL here!\n");
+		return -1;
+	}
+
+	for (i = 0; i < RTE_DIM(priv->qos_tbl_params.pcp_cos_map); ++i)
+		priv->qos_tbl_params.pcp_cos_map[i].ppio = priv->ppio;
+
+	for (i = 0; i < RTE_DIM(priv->qos_tbl_params.dscp_cos_map); ++i)
+		priv->qos_tbl_params.dscp_cos_map[i].ppio = priv->ppio;
+
+	/* Initialize Classifier QoS table. */
+
+	return pp2_cls_qos_tbl_init(&priv->qos_tbl_params, &priv->qos_tbl);
+}
diff --git a/drivers/net/mrvl/mrvl_qos.h b/drivers/net/mrvl/mrvl_qos.h
new file mode 100644
index 00000000..ae7508c9
--- /dev/null
+++ b/drivers/net/mrvl/mrvl_qos.h
@@ -0,0 +1,113 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Marvell International Ltd.
+ *   Copyright(c) 2017 Semihalf.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of the copyright holder nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MRVL_QOS_H_
+#define _MRVL_QOS_H_
+
+#include <rte_common.h>
+
+#include "mrvl_ethdev.h"
+
+/** Code Points per Traffic Class. Equals max(DSCP, PCP). */
+#define MRVL_CP_PER_TC (64)
+
+/** Value used as "unknown". */
+#define MRVL_UNKNOWN_TC (0xFF)
+
+/* QoS config. */
+struct mrvl_qos_cfg {
+	struct port_cfg {
+		struct {
+			uint8_t inq[MRVL_PP2_RXQ_MAX];
+			uint8_t dscp[MRVL_CP_PER_TC];
+			uint8_t pcp[MRVL_CP_PER_TC];
+			uint8_t inqs;
+			uint8_t dscps;
+			uint8_t pcps;
+		} tc[MRVL_PP2_TC_MAX];
+		struct {
+			uint8_t weight;
+		} outq[MRVL_PP2_RXQ_MAX];
+		enum pp2_cls_qos_tbl_type mapping_priority;
+		uint16_t inqs;
+		uint16_t outqs;
+		uint8_t default_tc;
+		uint8_t use_global_defaults;
+	} port[RTE_MAX_ETHPORTS];
+};
+
+/** Global QoS configuration. */
+extern struct mrvl_qos_cfg *mrvl_qos_cfg;
+
+/**
+ * Parse QoS configuration - rte_kvargs_process handler.
+ *
+ * Opens configuration file and parses its content.
+ *
+ * @param key Unused.
+ * @param path Path to config file.
+ * @param extra_args Pointer to configuration structure.
+ * @returns 0 in case of success, exits otherwise.
+ */
+int
+mrvl_get_qoscfg(const char *key __rte_unused, const char *path,
+		void *extra_args);
+
+/**
+ * Configure RX Queues in a given port.
+ *
+ * Sets up RX queues, their Traffic Classes and DPDK rxq->(TC,inq) mapping.
+ *
+ * @param priv Port's private data
+ * @param portid DPDK port ID
+ * @param max_queues Maximum number of queues to configure.
+ * @returns 0 in case of success, negative value otherwise.
+ */
+int
+mrvl_configure_rxqs(struct mrvl_priv *priv, uint16_t portid,
+		    uint16_t max_queues);
+
+/**
+ * Start QoS mapping.
+ *
+ * Finalize QoS table configuration and initialize it in SDK. It can be done
+ * only after port is started, so we have a valid ppio reference.
+ *
+ * @param priv Port's private (configuration) data.
+ * @returns 0 in case of success, exits otherwise.
+ */
+int
+mrvl_start_qos_mapping(struct mrvl_priv *priv);
+
+#endif /* _MRVL_QOS_H_ */
diff --git a/drivers/net/mrvl/rte_pmd_mrvl_version.map b/drivers/net/mrvl/rte_pmd_mrvl_version.map
new file mode 100644
index 00000000..a7530317
--- /dev/null
+++ b/drivers/net/mrvl/rte_pmd_mrvl_version.map
@@ -0,0 +1,3 @@
+DPDK_17.11 {
+	local: *;
+};
diff --git a/drivers/net/nfp/Makefile b/drivers/net/nfp/Makefile
index 4ee2c2dc..4ba066ac 100644
--- a/drivers/net/nfp/Makefile
+++ b/drivers/net/nfp/Makefile
@@ -40,6 +40,9 @@ CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
 LDLIBS += -lm
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 EXPORT_MAP := rte_pmd_nfp_version.map
 
@@ -49,5 +52,7 @@ LIBABIVER := 1
 # all source are stored in SRCS-y
 #
 SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_net.c
+SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_nfpu.c
+SRCS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp_nspu.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index 92b03c4c..83dec061 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -55,6 +55,7 @@
 #include <rte_alarm.h>
 #include <rte_spinlock.h>
 
+#include "nfp_nfpu.h"
 #include "nfp_net_pmd.h"
 #include "nfp_net_logs.h"
 #include "nfp_net_ctrl.h"
@@ -87,7 +88,7 @@ static int nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 				  uint16_t nb_desc, unsigned int socket_id,
 				  const struct rte_eth_txconf *tx_conf);
 static int nfp_net_start(struct rte_eth_dev *dev);
-static void nfp_net_stats_get(struct rte_eth_dev *dev,
+static int nfp_net_stats_get(struct rte_eth_dev *dev,
 			      struct rte_eth_stats *stats);
 static void nfp_net_stats_reset(struct rte_eth_dev *dev);
 static void nfp_net_stop(struct rte_eth_dev *dev);
@@ -99,13 +100,13 @@ static uint16_t nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
  * happen to be at the same offset on the NFP6000 and the NFP3200 so
  * we use a single macro here.
  */
-#define NFP_PCIE_QUEUE(_q)	(0x80000 + (0x800 * ((_q) & 0xff)))
+#define NFP_PCIE_QUEUE(_q)	(0x800 * ((_q) & 0xff))
 
 /* Maximum value which can be added to a queue with one transaction */
 #define NFP_QCP_MAX_ADD	0x7f
 
 #define RTE_MBUF_DMA_ADDR_DEFAULT(mb) \
-	(uint64_t)((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
+	(uint64_t)((mb)->buf_iova + RTE_PKTMBUF_HEADROOM)
 
 /* nfp_qcp_ptr - Read or Write Pointer of a queue */
 enum nfp_qcp_ptr {
@@ -487,10 +488,6 @@ nfp_net_configure(struct rte_eth_dev *dev)
 		return -EINVAL;
 	}
 
-	/* Supporting VLAN insertion by default */
-	if (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)
-		new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
-
 	if (rxmode->jumbo_frame)
 		/* this is handled in rte_eth_dev_configure */
 
@@ -504,6 +501,32 @@ nfp_net_configure(struct rte_eth_dev *dev)
 		return -EINVAL;
 	}
 
+	/* If next capabilities are supported, configure them by default */
+
+	/* VLAN insertion */
+	if (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)
+		new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
+
+	/* L2 broadcast */
+	if (hw->cap & NFP_NET_CFG_CTRL_L2BC)
+		new_ctrl |= NFP_NET_CFG_CTRL_L2BC;
+
+	/* L2 multicast */
+	if (hw->cap & NFP_NET_CFG_CTRL_L2MC)
+		new_ctrl |= NFP_NET_CFG_CTRL_L2MC;
+
+	/* TX checksum offload */
+	if (hw->cap & NFP_NET_CFG_CTRL_TXCSUM)
+		new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
+
+	/* LSO offload */
+	if (hw->cap & NFP_NET_CFG_CTRL_LSO)
+		new_ctrl |= NFP_NET_CFG_CTRL_LSO;
+
+	/* RX gather */
+	if (hw->cap & NFP_NET_CFG_CTRL_GATHER)
+		new_ctrl |= NFP_NET_CFG_CTRL_GATHER;
+
 	if (!new_ctrl)
 		return 0;
 
@@ -592,7 +615,55 @@ nfp_net_cfg_queue_setup(struct nfp_net_hw *hw)
 	hw->qcp_cfg = hw->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
 }
 
-static void nfp_net_read_mac(struct nfp_net_hw *hw)
+#define ETH_ADDR_LEN	6
+
+static void
+nfp_eth_copy_mac_reverse(uint8_t *dst, const uint8_t *src)
+{
+	int i;
+
+	for (i = 0; i < ETH_ADDR_LEN; i++)
+		dst[ETH_ADDR_LEN - i - 1] = src[i];
+}
+
+static int
+nfp_net_pf_read_mac(struct nfp_net_hw *hw, int port)
+{
+	union eth_table_entry *entry;
+	int idx, i;
+
+	idx = port;
+	entry = hw->eth_table;
+
+	/* Reading NFP ethernet table obtained before */
+	for (i = 0; i < NSP_ETH_MAX_COUNT; i++) {
+		if (!(entry->port & NSP_ETH_PORT_LANES_MASK)) {
+			/* port not in use */
+			entry++;
+			continue;
+		}
+		if (idx == 0)
+			break;
+		idx--;
+		entry++;
+	}
+
+	if (i == NSP_ETH_MAX_COUNT)
+		return -EINVAL;
+
+	/*
+	 * hw points to port0 private data. We need hw now pointing to
+	 * right port.
+	 */
+	hw += port;
+	nfp_eth_copy_mac_reverse((uint8_t *)&hw->mac_addr,
+				 (uint8_t *)&entry->mac_addr);
+
+	return 0;
+}
+
+static void
+nfp_net_vf_read_mac(struct nfp_net_hw *hw)
 {
 	uint32_t tmp;
 
@@ -603,6 +674,20 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 	memcpy(&hw->mac_addr[4], &tmp, 2);
 }
 
+static void
+nfp_net_write_mac(struct nfp_net_hw *hw, uint8_t *mac)
+{
+	uint32_t mac0 = *(uint32_t *)mac;
+	uint16_t mac1;
+
+	nn_writel(rte_cpu_to_be_32(mac0), hw->ctrl_bar + NFP_NET_CFG_MACADDR);
+
+	mac += 4;
+	mac1 = *(uint16_t *)mac;
+	nn_writew(rte_cpu_to_be_16(mac1),
+		  hw->ctrl_bar + NFP_NET_CFG_MACADDR + 6);
+}
+
 static int
 nfp_configure_rx_interrupt(struct rte_eth_dev *dev,
 			   struct rte_intr_handle *intr_handle)
@@ -627,14 +712,19 @@ nfp_configure_rx_interrupt(struct rte_eth_dev *dev,
 		PMD_INIT_LOG(INFO, "VF: enabling RX interrupt with UIO");
 		/* UIO just supports one queue and no LSC*/
 		nn_cfg_writeb(hw, NFP_NET_CFG_RXR_VEC(0), 0);
+		intr_handle->intr_vec[0] = 0;
 	} else {
 		PMD_INIT_LOG(INFO, "VF: enabling RX interrupt with VFIO");
-		for (i = 0; i < dev->data->nb_rx_queues; i++)
+		for (i = 0; i < dev->data->nb_rx_queues; i++) {
 			/*
 			 * The first msix vector is reserved for non
 			 * efd interrupts
 			*/
 			nn_cfg_writeb(hw, NFP_NET_CFG_RXR_VEC(i), i + 1);
+			intr_handle->intr_vec[i] = i + 1;
+			PMD_INIT_LOG(DEBUG, "intr_vec[%d]= %d\n", i,
+					    intr_handle->intr_vec[i]);
+		}
 	}
 
 	/* Avoiding TX interrupts */
@@ -667,6 +757,11 @@ nfp_net_start(struct rte_eth_dev *dev)
 
 	/* check and configure queue intr-vector mapping */
 	if (dev->data->dev_conf.intr_conf.rxq != 0) {
+		if (hw->pf_multiport_enabled) {
+			PMD_INIT_LOG(ERR, "PMD rx interrupt is not supported "
+					  "with NFP multiport PF");
+				return -EINVAL;
+		}
 		if (intr_handle->type == RTE_INTR_HANDLE_UIO) {
 			/*
 			 * Better not to share LSC with RX interrupts.
@@ -684,20 +779,17 @@ nfp_net_start(struct rte_eth_dev *dev)
 		intr_vector = dev->data->nb_rx_queues;
 		if (rte_intr_efd_enable(intr_handle, intr_vector))
 			return -1;
-	}
 
-	if (rte_intr_dp_is_en(intr_handle))
 		nfp_configure_rx_interrupt(dev, intr_handle);
+		update = NFP_NET_CFG_UPDATE_MSIX;
+	}
 
 	rte_intr_enable(intr_handle);
 
 	/* Enable device */
 	new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_ENABLE;
-	update = NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING;
 
-	/* Just configuring queues interrupts when necessary */
-	if (rte_intr_dp_is_en(intr_handle))
-		update |= NFP_NET_CFG_UPDATE_MSIX;
+	update |= NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING;
 
 	if (hw->cap & NFP_NET_CFG_CTRL_RINGCFG)
 		new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
@@ -715,6 +807,10 @@ nfp_net_start(struct rte_eth_dev *dev)
 		goto error;
 	}
 
+	if (hw->is_pf)
+		/* Configure the physical port up */
+		nfp_nsp_eth_config(hw->nspu_desc, hw->pf_port_idx, 1);
+
 	hw->ctrl = new_ctrl;
 
 	return 0;
@@ -743,9 +839,12 @@ static void
 nfp_net_stop(struct rte_eth_dev *dev)
 {
 	int i;
+	struct nfp_net_hw *hw;
 
 	PMD_INIT_LOG(DEBUG, "Stop");
 
+	hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
 	nfp_net_disable_queues(dev);
 
 	/* Clear queues */
@@ -758,6 +857,10 @@ nfp_net_stop(struct rte_eth_dev *dev)
 		nfp_net_reset_rx_queue(
 			(struct nfp_net_rxq *)dev->data->rx_queues[i]);
 	}
+
+	if (hw->is_pf)
+		/* Configure the physical port down */
+		nfp_nsp_eth_config(hw->nspu_desc, hw->pf_port_idx, 0);
 }
 
 /* Reset and stop device. The device can not be restarted. */
@@ -766,6 +869,7 @@ nfp_net_close(struct rte_eth_dev *dev)
 {
 	struct nfp_net_hw *hw;
 	struct rte_pci_device *pci_dev;
+	int i;
 
 	PMD_INIT_LOG(DEBUG, "Close");
 
@@ -777,7 +881,18 @@ nfp_net_close(struct rte_eth_dev *dev)
 	 * threads/queues before calling the device close function.
 	 */
 
-	nfp_net_stop(dev);
+	nfp_net_disable_queues(dev);
+
+	/* Clear queues */
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		nfp_net_reset_tx_queue(
+			(struct nfp_net_txq *)dev->data->tx_queues[i]);
+	}
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		nfp_net_reset_rx_queue(
+			(struct nfp_net_rxq *)dev->data->rx_queues[i]);
+	}
 
 	rte_intr_disable(&pci_dev->intr_handle);
 	nn_cfg_writeb(hw, NFP_NET_CFG_LSC, 0xff);
@@ -895,17 +1010,10 @@ nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
 	nn_link_status = (nn_link_status >> NFP_NET_CFG_STS_LINK_RATE_SHIFT) &
 			 NFP_NET_CFG_STS_LINK_RATE_MASK;
 
-	if ((NFD_CFG_MAJOR_VERSION_of(hw->ver) < 4) ||
-	    ((NFD_CFG_MINOR_VERSION_of(hw->ver) == 4) &&
-	    (NFD_CFG_MINOR_VERSION_of(hw->ver) == 0)))
-		/* We really do not know the speed wil old firmware */
+	if (nn_link_status >= RTE_DIM(ls_to_ethtool))
 		link.link_speed = ETH_SPEED_NUM_NONE;
-	else {
-		if (nn_link_status >= RTE_DIM(ls_to_ethtool))
-			link.link_speed = ETH_SPEED_NUM_NONE;
-		else
-			link.link_speed = ls_to_ethtool[nn_link_status];
-	}
+	else
+		link.link_speed = ls_to_ethtool[nn_link_status];
 
 	if (old.link_status != link.link_status) {
 		nfp_net_dev_atomic_write_link_status(dev, &link);
@@ -919,7 +1027,7 @@ nfp_net_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
 	return -1;
 }
 
-static void
+static int
 nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	int i;
@@ -1005,8 +1113,11 @@ nfp_net_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 	nfp_dev_stats.imissed -= hw->eth_stats_base.imissed;
 
-	if (stats)
+	if (stats) {
 		memcpy(stats, &nfp_dev_stats, sizeof(*stats));
+		return 0;
+	}
+	return -EINVAL;
 }
 
 static void
@@ -1125,6 +1236,11 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 			     ETH_TXQ_FLAGS_NOOFFLOADS,
 	};
 
+	dev_info->flow_type_rss_offloads = ETH_RSS_NONFRAG_IPV4_TCP |
+					   ETH_RSS_NONFRAG_IPV4_UDP |
+					   ETH_RSS_NONFRAG_IPV6_TCP |
+					   ETH_RSS_NONFRAG_IPV6_UDP;
+
 	dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ;
 	dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ;
 
@@ -1240,12 +1356,12 @@ nfp_net_dev_link_status_print(struct rte_eth_dev *dev)
 	nfp_net_dev_atomic_read_link_status(dev, &link);
 	if (link.link_status)
 		RTE_LOG(INFO, PMD, "Port %d: Link Up - speed %u Mbps - %s\n",
-			(int)(dev->data->port_id), (unsigned)link.link_speed,
+			dev->data->port_id, link.link_speed,
 			link.link_duplex == ETH_LINK_FULL_DUPLEX
 			? "full-duplex" : "half-duplex");
 	else
 		RTE_LOG(INFO, PMD, " Port %d: Link Down\n",
-			(int)(dev->data->port_id));
+			dev->data->port_id);
 
 	RTE_LOG(INFO, PMD, "PCI Address: %04d:%02d:%02d:%d\n",
 		pci_dev->addr.domain, pci_dev->addr.bus,
@@ -1446,7 +1562,7 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
 	}
 
 	/* Saving physical and virtual addresses for the RX ring */
-	rxq->dma = (uint64_t)tz->phys_addr;
+	rxq->dma = (uint64_t)tz->iova;
 	rxq->rxds = (struct nfp_net_rx_desc *)tz->addr;
 
 	/* mbuf pointers array for referencing mbufs linked to RX descriptors */
@@ -1549,7 +1665,7 @@ nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 			"tx_free_thresh must be less than the number of TX "
 			"descriptors. (tx_free_thresh=%u port=%d "
 			"queue=%d)\n", (unsigned int)tx_free_thresh,
-			(int)dev->data->port_id, (int)queue_idx);
+			dev->data->port_id, (int)queue_idx);
 		return -(EINVAL);
 	}
 
@@ -1602,7 +1718,7 @@ nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	txq->txq_flags = tx_conf->txq_flags;
 
 	/* Saving physical and virtual addresses for the TX ring */
-	txq->dma = (uint64_t)tz->phys_addr;
+	txq->dma = (uint64_t)tz->iova;
 	txq->txds = (struct nfp_net_tx_desc *)tz->addr;
 
 	/* mbuf pointers array for referencing mbufs linked to TX descriptors */
@@ -1720,6 +1836,8 @@ nfp_net_rx_cksum(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
 #define NFP_HASH_OFFSET      ((uint8_t *)mbuf->buf_addr + mbuf->data_off - 4)
 #define NFP_HASH_TYPE_OFFSET ((uint8_t *)mbuf->buf_addr + mbuf->data_off - 8)
 
+#define NFP_DESC_META_LEN(d) (d->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK)
+
 /*
  * nfp_net_set_hash - Set mbuf hash data
  *
@@ -1730,18 +1848,57 @@ static inline void
 nfp_net_set_hash(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
 		 struct rte_mbuf *mbuf)
 {
-	uint32_t hash;
-	uint32_t hash_type;
 	struct nfp_net_hw *hw = rxq->hw;
+	uint8_t *meta_offset;
+	uint32_t meta_info;
+	uint32_t hash = 0;
+	uint32_t hash_type = 0;
 
 	if (!(hw->ctrl & NFP_NET_CFG_CTRL_RSS))
 		return;
 
-	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
-		return;
+	if (NFD_CFG_MAJOR_VERSION_of(hw->ver) <= 3) {
+		if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
+			return;
+
+		hash = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_OFFSET);
+		hash_type = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_TYPE_OFFSET);
 
-	hash = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_OFFSET);
-	hash_type = rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_TYPE_OFFSET);
+	} else if (NFP_DESC_META_LEN(rxd)) {
+		/*
+		 * new metadata api:
+		 * <----  32 bit  ----->
+		 * m    field type word
+		 * e     data field #2
+		 * t     data field #1
+		 * a     data field #0
+		 * ====================
+		 *    packet data
+		 *
+		 * Field type word contains up to 8 4bit field types
+		 * A 4bit field type refers to a data field word
+		 * A data field word can have several 4bit field types
+		 */
+		meta_offset = rte_pktmbuf_mtod(mbuf, uint8_t *);
+		meta_offset -= NFP_DESC_META_LEN(rxd);
+		meta_info = rte_be_to_cpu_32(*(uint32_t *)meta_offset);
+		meta_offset += 4;
+		/* NFP PMD just supports metadata for hashing */
+		switch (meta_info & NFP_NET_META_FIELD_MASK) {
+		case NFP_NET_META_HASH:
+			/* next field type is about the hash type */
+			meta_info >>= NFP_NET_META_FIELD_SIZE;
+			/* hash value is in the data field */
+			hash = rte_be_to_cpu_32(*(uint32_t *)meta_offset);
+			hash_type = meta_info & NFP_NET_META_FIELD_MASK;
+			break;
+		default:
+			/* Unsupported metadata can be a performance issue */
+			return;
+		}
+	} else {
+		return;
+	}
 
 	mbuf->hash.rss = hash;
 	mbuf->ol_flags |= PKT_RX_RSS_HASH;
@@ -1847,9 +2004,9 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		 */
 		new_mb = rte_pktmbuf_alloc(rxq->mem_pool);
 		if (unlikely(new_mb == NULL)) {
-			RTE_LOG_DP(DEBUG, PMD, "RX mbuf alloc failed port_id=%u "
-				"queue_id=%u\n", (unsigned)rxq->port_id,
-				(unsigned)rxq->qidx);
+			RTE_LOG_DP(DEBUG, PMD,
+			"RX mbuf alloc failed port_id=%u queue_id=%u\n",
+				rxq->port_id, (unsigned int)rxq->qidx);
 			nfp_net_mbuf_alloc_failed(rxq);
 			break;
 		}
@@ -1910,7 +2067,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if ((rxds->rxd.flags & PCIE_DESC_RX_VLAN) &&
 		    (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) {
 			mb->vlan_tci = rte_cpu_to_le_32(rxds->rxd.vlan);
-			mb->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
+			mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
 		}
 
 		/* Adding the mbuff to the mbuff array passed by the app */
@@ -1933,7 +2090,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		return nb_hold;
 
 	PMD_RX_LOG(DEBUG, "RX  port_id=%u queue_id=%u, %d packets received\n",
-		   (unsigned)rxq->port_id, (unsigned)rxq->qidx, nb_hold);
+		   rxq->port_id, (unsigned int)rxq->qidx, nb_hold);
 
 	nb_hold += rxq->nb_rx_hold;
 
@@ -1944,7 +2101,7 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	rte_wmb();
 	if (nb_hold > rxq->rx_free_thresh) {
 		PMD_RX_LOG(DEBUG, "port=%u queue=%u nb_hold=%u avail=%u\n",
-			   (unsigned)rxq->port_id, (unsigned)rxq->qidx,
+			   rxq->port_id, (unsigned int)rxq->qidx,
 			   (unsigned)nb_hold, (unsigned)avail);
 		nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold);
 		nb_hold = 0;
@@ -2094,7 +2251,7 @@ nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		 */
 		pkt_size = pkt->pkt_len;
 
-		while (pkt_size) {
+		while (pkt) {
 			/* Copying TSO, VLAN and cksum info */
 			*txds = txd;
 
@@ -2109,7 +2266,7 @@ nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			*lmbuf = pkt;
 
 			dma_size = pkt->data_len;
-			dma_addr = rte_mbuf_data_dma_addr(pkt);
+			dma_addr = rte_mbuf_data_iova(pkt);
 			PMD_TX_LOG(DEBUG, "Working with mbuf at dma address:"
 				   "%" PRIx64 "\n", dma_addr);
 
@@ -2126,13 +2283,13 @@ nfp_net_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 				txq->wr_p = 0;
 
 			pkt_size -= dma_size;
-			if (!pkt_size) {
+			if (!pkt_size)
 				/* End of packet */
 				txds->offset_eop |= PCIE_DESC_TX_EOP;
-			} else {
+			else
 				txds->offset_eop &= PCIE_DESC_TX_OFFSET_MASK;
-				pkt = pkt->next;
-			}
+
+			pkt = pkt->next;
 			/* Referencing next free TX descriptor */
 			txds = &txq->txds[txq->wr_p];
 			lmbuf = &txq->txbufs[txq->wr_p].mbuf;
@@ -2149,11 +2306,12 @@ xmit_end:
 	return i;
 }
 
-static void
+static int
 nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	uint32_t new_ctrl, update;
 	struct nfp_net_hw *hw;
+	int ret;
 
 	hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	new_ctrl = 0;
@@ -2174,14 +2332,15 @@ nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 		new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_RXVLAN;
 
 	if (new_ctrl == 0)
-		return;
+		return 0;
 
 	update = NFP_NET_CFG_UPDATE_GEN;
 
-	if (nfp_net_reconfig(hw, new_ctrl, update) < 0)
-		return;
+	ret = nfp_net_reconfig(hw, new_ctrl, update);
+	if (!ret)
+		hw->ctrl = new_ctrl;
 
-	hw->ctrl = new_ctrl;
+	return ret;
 }
 
 /* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device */
@@ -2233,7 +2392,8 @@ nfp_net_reta_update(struct rte_eth_dev *dev,
 				reta &= ~(0xFF << (8 * j));
 			reta |= reta_conf[idx].reta[shift + j] << (8 * j);
 		}
-		nn_cfg_writel(hw, NFP_NET_CFG_RSS_ITBL + shift, reta);
+		nn_cfg_writel(hw, NFP_NET_CFG_RSS_ITBL + (idx * 64) + shift,
+			      reta);
 	}
 
 	update = NFP_NET_CFG_UPDATE_RSS;
@@ -2280,7 +2440,8 @@ nfp_net_reta_query(struct rte_eth_dev *dev,
 		if (!mask)
 			continue;
 
-		reta = nn_cfg_readl(hw, NFP_NET_CFG_RSS_ITBL + shift);
+		reta = nn_cfg_readl(hw, NFP_NET_CFG_RSS_ITBL + (idx * 64) +
+				    shift);
 		for (j = 0; j < 4; j++) {
 			if (!(mask & (0x1 << j)))
 				continue;
@@ -2330,6 +2491,9 @@ nfp_net_rss_hash_update(struct rte_eth_dev *dev,
 				NFP_NET_CFG_RSS_IPV6_TCP |
 				NFP_NET_CFG_RSS_IPV6_UDP;
 
+	cfg_rss_ctrl |= NFP_NET_CFG_RSS_MASK;
+	cfg_rss_ctrl |= NFP_NET_CFG_RSS_TOEPLITZ;
+
 	/* configuring where to apply the RSS hash */
 	nn_cfg_writel(hw, NFP_NET_CFG_RSS_CTRL, cfg_rss_ctrl);
 
@@ -2426,19 +2590,76 @@ static const struct eth_dev_ops nfp_net_eth_dev_ops = {
 	.rx_queue_intr_disable  = nfp_rx_queue_intr_disable,
 };
 
+/*
+ * All eth_dev created got its private data, but before nfp_net_init, that
+ * private data is referencing private data for all the PF ports. This is due
+ * to how the vNIC bars are mapped based on first port, so all ports need info
+ * about port 0 private data. Inside nfp_net_init the private data pointer is
+ * changed to the right address for each port once the bars have been mapped.
+ *
+ * This functions helps to find out which port and therefore which offset
+ * inside the private data array to use.
+ */
+static int
+get_pf_port_number(char *name)
+{
+	char *pf_str = name;
+	int size = 0;
+
+	while ((*pf_str != '_') && (*pf_str != '\0') && (size++ < 30))
+		pf_str++;
+
+	if (size == 30)
+		/*
+		 * This should not happen at all and it would mean major
+		 * implementation fault.
+		 */
+		rte_panic("nfp_net: problem with pf device name\n");
+
+	/* Expecting _portX with X within [0,7] */
+	pf_str += 5;
+
+	return (int)strtol(pf_str, NULL, 10);
+}
+
 static int
 nfp_net_init(struct rte_eth_dev *eth_dev)
 {
 	struct rte_pci_device *pci_dev;
-	struct nfp_net_hw *hw;
+	struct nfp_net_hw *hw, *hwport0;
 
-	uint32_t tx_bar_off, rx_bar_off;
+	uint64_t tx_bar_off = 0, rx_bar_off = 0;
 	uint32_t start_q;
 	int stride = 4;
 
+	nspu_desc_t *nspu_desc = NULL;
+	uint64_t bar_offset;
+	int port = 0;
+
 	PMD_INIT_FUNC_TRACE();
 
-	hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+
+	if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) ||
+	    (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) {
+		port = get_pf_port_number(eth_dev->data->name);
+		if (port < 0 || port > 7) {
+			RTE_LOG(ERR, PMD, "Port value is wrong\n");
+			return -ENODEV;
+		}
+
+		PMD_INIT_LOG(DEBUG, "Working with PF port value %d\n", port);
+
+		/* This points to port 0 private data */
+		hwport0 = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+
+		/* This points to the specific port private data */
+		hw = &hwport0[port];
+		hw->pf_port_idx = port;
+	} else {
+		hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+		hwport0 = 0;
+	}
 
 	eth_dev->dev_ops = &nfp_net_eth_dev_ops;
 	eth_dev->rx_pkt_burst = &nfp_net_recv_pkts;
@@ -2448,9 +2669,7 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
-	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	hw->device_id = pci_dev->id.device_id;
 	hw->vendor_id = pci_dev->id.vendor_id;
@@ -2468,11 +2687,42 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 			"hw->ctrl_bar is NULL. BAR0 not configured\n");
 		return -ENODEV;
 	}
+
+	if (hw->is_pf && port == 0) {
+		nspu_desc = hw->nspu_desc;
+
+		if (nfp_nsp_map_ctrl_bar(nspu_desc, &bar_offset) != 0) {
+			/*
+			 * A firmware should be there after PF probe so this
+			 * should not happen.
+			 */
+			RTE_LOG(ERR, PMD, "PF BAR symbol resolution failed\n");
+			return -ENODEV;
+		}
+
+		/* vNIC PF control BAR is a subset of PF PCI device BAR */
+		hw->ctrl_bar += bar_offset;
+		PMD_INIT_LOG(DEBUG, "ctrl bar: %p\n", hw->ctrl_bar);
+	}
+
+	if (port > 0) {
+		if (!hwport0->ctrl_bar)
+			return -ENODEV;
+
+		/* address based on port0 offset */
+		hw->ctrl_bar = hwport0->ctrl_bar +
+			       (port * NFP_PF_CSR_SLICE_SIZE);
+	}
+
+	PMD_INIT_LOG(DEBUG, "ctrl bar: %p\n", hw->ctrl_bar);
+
 	hw->max_rx_queues = nn_cfg_readl(hw, NFP_NET_CFG_MAX_RXRINGS);
 	hw->max_tx_queues = nn_cfg_readl(hw, NFP_NET_CFG_MAX_TXRINGS);
 
 	/* Work out where in the BAR the queues start. */
 	switch (pci_dev->id.device_id) {
+	case PCI_DEVICE_ID_NFP4000_PF_NIC:
+	case PCI_DEVICE_ID_NFP6000_PF_NIC:
 	case PCI_DEVICE_ID_NFP6000_VF_NIC:
 		start_q = nn_cfg_readl(hw, NFP_NET_CFG_START_TXQ);
 		tx_bar_off = NFP_PCIE_QUEUE(start_q);
@@ -2484,11 +2734,34 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 		return -ENODEV;
 	}
 
-	PMD_INIT_LOG(DEBUG, "tx_bar_off: 0x%08x", tx_bar_off);
-	PMD_INIT_LOG(DEBUG, "rx_bar_off: 0x%08x", rx_bar_off);
+	PMD_INIT_LOG(DEBUG, "tx_bar_off: 0x%" PRIx64 "\n", tx_bar_off);
+	PMD_INIT_LOG(DEBUG, "rx_bar_off: 0x%" PRIx64 "\n", rx_bar_off);
+
+	if (hw->is_pf && port == 0) {
+		/* configure access to tx/rx vNIC BARs */
+		nfp_nsp_map_queues_bar(nspu_desc, &bar_offset);
+		PMD_INIT_LOG(DEBUG, "tx/rx bar_offset: %" PRIx64 "\n",
+				    bar_offset);
+		hwport0->hw_queues = (uint8_t *)pci_dev->mem_resource[0].addr;
 
-	hw->tx_bar = (uint8_t *)pci_dev->mem_resource[2].addr + tx_bar_off;
-	hw->rx_bar = (uint8_t *)pci_dev->mem_resource[2].addr + rx_bar_off;
+		/* vNIC PF tx/rx BARs are a subset of PF PCI device */
+		hwport0->hw_queues += bar_offset;
+
+		/* Lets seize the chance to read eth table from hw */
+		if (nfp_nsp_eth_read_table(nspu_desc, &hw->eth_table))
+			return -ENODEV;
+	}
+
+	if (hw->is_pf) {
+		hw->tx_bar = hwport0->hw_queues + tx_bar_off;
+		hw->rx_bar = hwport0->hw_queues + rx_bar_off;
+		eth_dev->data->dev_private = hw;
+	} else {
+		hw->tx_bar = (uint8_t *)pci_dev->mem_resource[2].addr +
+			     tx_bar_off;
+		hw->rx_bar = (uint8_t *)pci_dev->mem_resource[2].addr +
+			     rx_bar_off;
+	}
 
 	PMD_INIT_LOG(DEBUG, "ctrl_bar: %p, tx_bar: %p, rx_bar: %p",
 		     hw->ctrl_bar, hw->tx_bar, hw->rx_bar);
@@ -2508,8 +2781,10 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 
 	PMD_INIT_LOG(INFO, "VER: %#x, Maximum supported MTU: %d",
 		     hw->ver, hw->max_mtu);
-	PMD_INIT_LOG(INFO, "CAP: %#x, %s%s%s%s%s%s%s%s%s", hw->cap,
+	PMD_INIT_LOG(INFO, "CAP: %#x, %s%s%s%s%s%s%s%s%s%s%s", hw->cap,
 		     hw->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
+		     hw->cap & NFP_NET_CFG_CTRL_L2BC    ? "L2BCFILT " : "",
+		     hw->cap & NFP_NET_CFG_CTRL_L2MC    ? "L2MCFILT " : "",
 		     hw->cap & NFP_NET_CFG_CTRL_RXCSUM  ? "RXCSUM "  : "",
 		     hw->cap & NFP_NET_CFG_CTRL_TXCSUM  ? "TXCSUM "  : "",
 		     hw->cap & NFP_NET_CFG_CTRL_RXVLAN  ? "RXVLAN "  : "",
@@ -2537,11 +2812,18 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 		return -ENOMEM;
 	}
 
-	nfp_net_read_mac(hw);
+	if (hw->is_pf) {
+		nfp_net_pf_read_mac(hwport0, port);
+		nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr);
+	} else {
+		nfp_net_vf_read_mac(hw);
+	}
 
-	if (!is_valid_assigned_ether_addr((struct ether_addr *)&hw->mac_addr))
+	if (!is_valid_assigned_ether_addr((struct ether_addr *)&hw->mac_addr)) {
 		/* Using random mac addresses for VFs */
 		eth_random_addr(&hw->mac_addr[0]);
+		nfp_net_write_mac(hw, (uint8_t *)&hw->mac_addr);
+	}
 
 	/* Copying mac address to DPDK eth_dev struct */
 	ether_addr_copy((struct ether_addr *)hw->mac_addr,
@@ -2568,12 +2850,154 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 	return 0;
 }
 
-static const struct rte_pci_id pci_id_nfp_net_map[] = {
+static int
+nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports,
+		  nfpu_desc_t *nfpu_desc, void **priv)
+{
+	struct rte_eth_dev *eth_dev;
+	struct nfp_net_hw *hw;
+	char *port_name;
+	int ret;
+
+	port_name = rte_zmalloc("nfp_pf_port_name", 100, 0);
+	if (!port_name)
+		return -ENOMEM;
+
+	if (ports > 1)
+		sprintf(port_name, "%s_port%d", dev->device.name, port);
+	else
+		sprintf(port_name, "%s", dev->device.name);
+
+	eth_dev = rte_eth_dev_allocate(port_name);
+	if (!eth_dev)
+		return -ENOMEM;
+
+	if (port == 0) {
+		*priv = rte_zmalloc(port_name,
+				    sizeof(struct nfp_net_adapter) * ports,
+				    RTE_CACHE_LINE_SIZE);
+		if (!*priv) {
+			rte_eth_dev_release_port(eth_dev);
+			return -ENOMEM;
+		}
+	}
+
+	eth_dev->data->dev_private = *priv;
+
+	/*
+	 * dev_private pointing to port0 dev_private because we need
+	 * to configure vNIC bars based on port0 at nfp_net_init.
+	 * Then dev_private is adjusted per port.
+	 */
+	hw = (struct nfp_net_hw *)(eth_dev->data->dev_private) + port;
+	hw->nspu_desc = nfpu_desc->nspu;
+	hw->nfpu_desc = nfpu_desc;
+	hw->is_pf = 1;
+	if (ports > 1)
+		hw->pf_multiport_enabled = 1;
+
+	eth_dev->device = &dev->device;
+	rte_eth_copy_pci_info(eth_dev, dev);
+
+	ret = nfp_net_init(eth_dev);
+
+	if (ret)
+		rte_eth_dev_release_port(eth_dev);
+
+	rte_free(port_name);
+
+	return ret;
+}
+
+static int nfp_pf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+			    struct rte_pci_device *dev)
+{
+	nfpu_desc_t *nfpu_desc;
+	nspu_desc_t *nspu_desc;
+	uint64_t offset_symbol;
+	uint8_t *bar_offset;
+	int major, minor;
+	int total_ports;
+	void *priv = 0;
+	int ret = -ENODEV;
+	int i;
+
+	if (!dev)
+		return ret;
+
+	nfpu_desc = rte_malloc("nfp nfpu", sizeof(nfpu_desc_t), 0);
+	if (!nfpu_desc)
+		return -ENOMEM;
+
+	if (nfpu_open(dev, nfpu_desc, 0) < 0) {
+		RTE_LOG(ERR, PMD,
+			"nfpu_open failed\n");
+		goto nfpu_error;
+	}
+
+	nspu_desc = nfpu_desc->nspu;
+
+
+	/* Check NSP ABI version */
+	if (nfp_nsp_get_abi_version(nspu_desc, &major, &minor) < 0) {
+		RTE_LOG(INFO, PMD, "NFP NSP not present\n");
+		goto error;
+	}
+	PMD_INIT_LOG(INFO, "nspu ABI version: %d.%d\n", major, minor);
+
+	if ((major == 0) && (minor < 20)) {
+		RTE_LOG(INFO, PMD, "NFP NSP ABI version too old. Required 0.20 or higher\n");
+		goto error;
+	}
+
+	ret = nfp_nsp_fw_setup(nspu_desc, "nfd_cfg_pf0_num_ports",
+			       &offset_symbol);
+	if (ret)
+		goto error;
+
+	bar_offset = (uint8_t *)dev->mem_resource[0].addr;
+	bar_offset += offset_symbol;
+	total_ports = (uint32_t)*bar_offset;
+	PMD_INIT_LOG(INFO, "Total pf ports: %d\n", total_ports);
+
+	if (total_ports <= 0 || total_ports > 8) {
+		RTE_LOG(ERR, PMD, "nfd_cfg_pf0_num_ports symbol with wrong value");
+		ret = -ENODEV;
+		goto error;
+	}
+
+	for (i = 0; i < total_ports; i++) {
+		ret = nfp_pf_create_dev(dev, i, total_ports, nfpu_desc, &priv);
+		if (ret)
+			goto error;
+	}
+
+	return 0;
+
+error:
+	nfpu_close(nfpu_desc);
+nfpu_error:
+	rte_free(nfpu_desc);
+
+	return ret;
+}
+
+static const struct rte_pci_id pci_id_nfp_pf_net_map[] = {
+	{
+		RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
+			       PCI_DEVICE_ID_NFP4000_PF_NIC)
+	},
 	{
 		RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
 			       PCI_DEVICE_ID_NFP6000_PF_NIC)
 	},
 	{
+		.vendor_id = 0,
+	},
+};
+
+static const struct rte_pci_id pci_id_nfp_vf_net_map[] = {
+	{
 		RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
 			       PCI_DEVICE_ID_NFP6000_VF_NIC)
 	},
@@ -2591,19 +3015,45 @@ static int eth_nfp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 
 static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev)
 {
+	struct rte_eth_dev *eth_dev;
+	struct nfp_net_hw *hw, *hwport0;
+	int port = 0;
+
+	eth_dev = rte_eth_dev_allocated(pci_dev->device.name);
+	if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) ||
+	    (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) {
+		port = get_pf_port_number(eth_dev->data->name);
+		hwport0 = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+		hw = &hwport0[port];
+	} else {
+		hw = NFP_NET_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+	}
+	/* hotplug is not possible with multiport PF */
+	if (hw->pf_multiport_enabled)
+		return -ENOTSUP;
 	return rte_eth_dev_pci_generic_remove(pci_dev, NULL);
 }
 
-static struct rte_pci_driver rte_nfp_net_pmd = {
-	.id_table = pci_id_nfp_net_map,
+static struct rte_pci_driver rte_nfp_net_pf_pmd = {
+	.id_table = pci_id_nfp_pf_net_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+	.probe = nfp_pf_pci_probe,
+	.remove = eth_nfp_pci_remove,
+};
+
+static struct rte_pci_driver rte_nfp_net_vf_pmd = {
+	.id_table = pci_id_nfp_vf_net_map,
 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
 	.probe = eth_nfp_pci_probe,
 	.remove = eth_nfp_pci_remove,
 };
 
-RTE_PMD_REGISTER_PCI(net_nfp, rte_nfp_net_pmd);
-RTE_PMD_REGISTER_PCI_TABLE(net_nfp, pci_id_nfp_net_map);
-RTE_PMD_REGISTER_KMOD_DEP(net_nfp, "* igb_uio | uio_pci_generic | vfio-pci");
+RTE_PMD_REGISTER_PCI(net_nfp_pf, rte_nfp_net_pf_pmd);
+RTE_PMD_REGISTER_PCI(net_nfp_vf, rte_nfp_net_vf_pmd);
+RTE_PMD_REGISTER_PCI_TABLE(net_nfp_pf, pci_id_nfp_pf_net_map);
+RTE_PMD_REGISTER_PCI_TABLE(net_nfp_vf, pci_id_nfp_vf_net_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_nfp_pf, "* igb_uio | uio_pci_generic | vfio");
+RTE_PMD_REGISTER_KMOD_DEP(net_nfp_vf, "* igb_uio | uio_pci_generic | vfio");
 
 /*
  * Local variables:
diff --git a/drivers/net/nfp/nfp_net_ctrl.h b/drivers/net/nfp/nfp_net_ctrl.h
index 2c500433..1ebd99ca 100644
--- a/drivers/net/nfp/nfp_net_ctrl.h
+++ b/drivers/net/nfp/nfp_net_ctrl.h
@@ -52,6 +52,13 @@
 /* Offset in Freelist buffer where packet starts on RX */
 #define NFP_NET_RX_OFFSET               32
 
+/* working with metadata api (NFD version > 3.0) */
+#define NFP_NET_META_FIELD_SIZE         4
+#define NFP_NET_META_FIELD_MASK ((1 << NFP_NET_META_FIELD_SIZE) - 1)
+
+/* Prepend field types */
+#define NFP_NET_META_HASH               1 /* next field carries hash type */
+
 /* Hash type pre-pended when a RSS hash was computed */
 #define NFP_NET_RSS_NONE                0
 #define NFP_NET_RSS_IPV4                1
@@ -327,6 +334,9 @@
 #define NFP_NET_CFG_RXR_STATS(_x)       (NFP_NET_CFG_RXR_STATS_BASE + \
 					 ((_x) * 0x10))
 
+/* PF multiport offset */
+#define NFP_PF_CSR_SLICE_SIZE	(32 * 1024)
+
 #endif /* _NFP_NET_CTRL_H_ */
 /*
  * Local variables:
diff --git a/drivers/net/nfp/nfp_net_eth.h b/drivers/net/nfp/nfp_net_eth.h
new file mode 100644
index 00000000..af57f03c
--- /dev/null
+++ b/drivers/net/nfp/nfp_net_eth.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 Netronome Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in the
+ *  documentation and/or other materials provided with the distribution
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ *  contributors may be used to endorse or promote products derived from this
+ *  software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * vim:shiftwidth=8:noexpandtab
+ *
+ * @file dpdk/pmd/nfp_net_eth.h
+ *
+ * Netronome NFP_NET PDM driver
+ */
+
+union eth_table_entry {
+	struct {
+		uint64_t port;
+		uint64_t state;
+		uint8_t mac_addr[6];
+		uint8_t resv[2];
+		uint64_t control;
+	};
+	uint64_t raw[4];
+};
+
+#ifndef BIT_ULL
+#define BIT_ULL(a) (1ULL << (a))
+#endif
+
+#define NSP_ETH_NBI_PORT_COUNT          24
+#define NSP_ETH_MAX_COUNT               (2 * NSP_ETH_NBI_PORT_COUNT)
+#define NSP_ETH_TABLE_SIZE   (NSP_ETH_MAX_COUNT * sizeof(union eth_table_entry))
+
+#define NSP_ETH_PORT_LANES              0xf
+#define NSP_ETH_PORT_INDEX              0xff00
+#define NSP_ETH_PORT_LABEL              0x3f000000000000
+#define NSP_ETH_PORT_PHYLABEL           0xfc0000000000000
+
+#define NSP_ETH_PORT_LANES_MASK         rte_cpu_to_le_64(NSP_ETH_PORT_LANES)
+
+#define NSP_ETH_STATE_CONFIGURED        BIT_ULL(0)
+#define NSP_ETH_STATE_ENABLED           BIT_ULL(1)
+#define NSP_ETH_STATE_TX_ENABLED        BIT_ULL(2)
+#define NSP_ETH_STATE_RX_ENABLED        BIT_ULL(3)
+#define NSP_ETH_STATE_RATE              0xf00
+#define NSP_ETH_STATE_INTERFACE         0xff000
+#define NSP_ETH_STATE_MEDIA             0x300000
+#define NSP_ETH_STATE_OVRD_CHNG         BIT_ULL(22)
+#define NSP_ETH_STATE_ANEG              0x3800000
+
+#define NSP_ETH_CTRL_CONFIGURED         BIT_ULL(0)
+#define NSP_ETH_CTRL_ENABLED            BIT_ULL(1)
+#define NSP_ETH_CTRL_TX_ENABLED         BIT_ULL(2)
+#define NSP_ETH_CTRL_RX_ENABLED         BIT_ULL(3)
+#define NSP_ETH_CTRL_SET_RATE           BIT_ULL(4)
+#define NSP_ETH_CTRL_SET_LANES          BIT_ULL(5)
+#define NSP_ETH_CTRL_SET_ANEG           BIT_ULL(6)
diff --git a/drivers/net/nfp/nfp_net_pmd.h b/drivers/net/nfp/nfp_net_pmd.h
index eec56bc1..1ae0ea62 100644
--- a/drivers/net/nfp/nfp_net_pmd.h
+++ b/drivers/net/nfp/nfp_net_pmd.h
@@ -42,6 +42,7 @@
 
 #define NFP_NET_PMD_VERSION "0.1"
 #define PCI_VENDOR_ID_NETRONOME         0x19ee
+#define PCI_DEVICE_ID_NFP4000_PF_NIC    0x4000
 #define PCI_DEVICE_ID_NFP6000_PF_NIC    0x6000
 #define PCI_DEVICE_ID_NFP6000_VF_NIC    0x6003
 
@@ -143,6 +144,11 @@ static inline void nn_writel(uint32_t val, volatile void *addr)
 	rte_write32(val, addr);
 }
 
+static inline void nn_writew(uint16_t val, volatile void *addr)
+{
+	rte_write16(val, addr);
+}
+
 static inline uint64_t nn_readq(volatile void *addr)
 {
 	const volatile uint32_t *p = addr;
@@ -250,7 +256,7 @@ struct nfp_net_txq {
 	uint32_t tx_hthresh;   /* not used by now. Future? */
 	uint32_t tx_wthresh;   /* not used by now. Future? */
 	uint32_t txq_flags;    /* not used by now. Future? */
-	uint8_t  port_id;
+	uint16_t port_id;
 	int qidx;
 	int tx_qcidx;
 	__le64 dma;
@@ -431,6 +437,13 @@ struct nfp_net_hw {
 	struct nfp_cpp_area *rx_area;
 	struct nfp_cpp_area *msix_area;
 #endif
+	uint8_t *hw_queues;
+	uint8_t is_pf;
+	uint8_t pf_port_idx;
+	uint8_t pf_multiport_enabled;
+	union eth_table_entry *eth_table;
+	nspu_desc_t *nspu_desc;
+	nfpu_desc_t *nfpu_desc;
 };
 
 struct nfp_net_adapter {
diff --git a/drivers/net/nfp/nfp_nfpu.c b/drivers/net/nfp/nfp_nfpu.c
new file mode 100644
index 00000000..5775d8da
--- /dev/null
+++ b/drivers/net/nfp/nfp_nfpu.c
@@ -0,0 +1,103 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/types.h>
+
+#include <rte_bus_pci.h>
+#include <rte_malloc.h>
+
+#include "nfp_nfpu.h"
+
+/* PF BAR and expansion BAR for the NSP interface */
+#define NFP_CFG_PCIE_BAR        0
+#define NFP_CFG_EXP_BAR         7
+
+#define NFP_CFG_EXP_BAR_CFG_BASE	0x30000
+
+/* There could be other NFP userspace tools using the NSP interface.
+ * Make sure there is no other process using it and locking the access for
+ * avoiding problems.
+ */
+static int
+nspv_aquire_process_lock(nfpu_desc_t *desc)
+{
+	int rc;
+	struct flock lock;
+	char lockname[30];
+
+	memset(&lock, 0, sizeof(lock));
+
+	snprintf(lockname, sizeof(lockname), "/var/lock/nfp%d", desc->nfp);
+
+	/* Using S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH */
+	desc->lock = open(lockname, O_RDWR | O_CREAT, 0666);
+
+	if (desc->lock < 0)
+		return desc->lock;
+
+	lock.l_type = F_WRLCK;
+	lock.l_whence = SEEK_SET;
+	rc = -1;
+	while (rc != 0) {
+		rc = fcntl(desc->lock, F_SETLK, &lock);
+		if (rc < 0) {
+			if ((errno != EAGAIN) && (errno != EACCES)) {
+				close(desc->lock);
+				return rc;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int
+nfpu_open(struct rte_pci_device *pci_dev, nfpu_desc_t *desc, int nfp)
+{
+	void *cfg_base, *mem_base;
+	size_t barsz;
+	int ret = 0;
+	int i = 0;
+
+	desc->nfp = nfp;
+
+	ret = nspv_aquire_process_lock(desc);
+	if (ret)
+		return -1;
+
+	barsz = pci_dev->mem_resource[0].len;
+
+	/* barsz in log2 */
+	while (barsz >>= 1)
+		i++;
+	barsz = i;
+
+	/* Getting address for NFP expansion BAR registers */
+	cfg_base = pci_dev->mem_resource[0].addr;
+	cfg_base = (uint8_t *)cfg_base + NFP_CFG_EXP_BAR_CFG_BASE;
+
+	/* Getting address for NFP NSP interface registers */
+	mem_base = pci_dev->mem_resource[0].addr;
+	mem_base = (uint8_t *)mem_base + (NFP_CFG_EXP_BAR << (barsz - 3));
+
+
+	desc->nspu = rte_malloc("nfp nspu", sizeof(nspu_desc_t), 0);
+	nfp_nspu_init(desc->nspu, desc->nfp, NFP_CFG_PCIE_BAR, barsz,
+		      NFP_CFG_EXP_BAR, cfg_base, mem_base);
+
+	return ret;
+}
+
+int
+nfpu_close(nfpu_desc_t *desc)
+{
+	rte_free(desc->nspu);
+	close(desc->lock);
+	unlink("/var/lock/nfp0");
+	return 0;
+}
diff --git a/drivers/net/nfp/nfp_nfpu.h b/drivers/net/nfp/nfp_nfpu.h
new file mode 100644
index 00000000..e56fa099
--- /dev/null
+++ b/drivers/net/nfp/nfp_nfpu.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017 Netronome Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in the
+ *  documentation and/or other materials provided with the distribution
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ *  contributors may be used to endorse or promote products derived from this
+ *  software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * vim:shiftwidth=8:noexpandtab
+ *
+ * @file dpdk/pmd/nfp_nfpu.h
+ *
+ * Netronome NFP_NET PDM driver
+ */
+
+/*
+ * NFP User interface creates a window for talking with NFP NSP processor
+ */
+
+
+#include <rte_bus_pci.h>
+#include "nfp_nspu.h"
+
+typedef struct {
+	int nfp;
+	int lock;
+	nspu_desc_t *nspu;
+} nfpu_desc_t;
+
+int nfpu_open(struct rte_pci_device *pci_dev, nfpu_desc_t *desc, int nfp);
+int nfpu_close(nfpu_desc_t *desc);
diff --git a/drivers/net/nfp/nfp_nspu.c b/drivers/net/nfp/nfp_nspu.c
new file mode 100644
index 00000000..6ba940cb
--- /dev/null
+++ b/drivers/net/nfp/nfp_nspu.c
@@ -0,0 +1,623 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <rte_log.h>
+#include <rte_byteorder.h>
+
+#include "nfp_nfpu.h"
+
+#define CFG_EXP_BAR_ADDR_SZ     1
+#define CFG_EXP_BAR_MAP_TYPE	1
+
+#define EXP_BAR_TARGET_SHIFT     23
+#define EXP_BAR_LENGTH_SHIFT     27 /* 0=32, 1=64 bit increment */
+#define EXP_BAR_MAP_TYPE_SHIFT   29 /* Bulk BAR map */
+
+/* NFP target for NSP access */
+#define NFP_NSP_TARGET   7
+
+/* Expansion BARs for mapping PF vnic BARs */
+#define NFP_NET_PF_CFG_EXP_BAR          6
+#define NFP_NET_PF_HW_QUEUES_EXP_BAR    5
+
+/*
+ * This is an NFP internal address used for configuring properly an NFP
+ * expansion BAR.
+ */
+#define MEM_CMD_BASE_ADDR       0x8100000000
+
+/* NSP interface registers */
+#define NSP_BASE                (MEM_CMD_BASE_ADDR + 0x22100)
+#define NSP_STATUS              0x00
+#define NSP_COMMAND             0x08
+#define NSP_BUFFER		0x10
+#define NSP_DEFAULT_BUF         0x18
+#define NSP_DEFAULT_BUF_CFG  0x20
+
+#define NSP_MAGIC                0xab10
+#define NSP_STATUS_MAGIC(x)      (((x) >> 48) & 0xffff)
+#define NSP_STATUS_MAJOR(x)      (int)(((x) >> 44) & 0xf)
+#define NSP_STATUS_MINOR(x)      (int)(((x) >> 32) & 0xfff)
+
+/* NSP commands */
+#define NSP_CMD_RESET                   1
+#define NSP_CMD_FW_LOAD                 6
+#define NSP_CMD_READ_ETH_TABLE          7
+#define NSP_CMD_WRITE_ETH_TABLE         8
+#define NSP_CMD_GET_SYMBOL             14
+
+#define NSP_BUFFER_CFG_SIZE_MASK	(0xff)
+
+#define NSP_REG_ADDR(d, off, reg) ((uint8_t *)(d)->mem_base + (off) + (reg))
+#define NSP_REG_VAL(p) (*(uint64_t *)(p))
+
+/*
+ * An NFP expansion BAR is configured for allowing access to a specific NFP
+ * target:
+ *
+ *  IN:
+ *	desc: struct with basic NSP addresses to work with
+ *	expbar: NFP PF expansion BAR index to configure
+ *	tgt: NFP target to configure access
+ *	addr: NFP target address
+ *
+ *  OUT:
+ *	pcie_offset: NFP PCI BAR offset to work with
+ */
+static void
+nfp_nspu_mem_bar_cfg(nspu_desc_t *desc, int expbar, int tgt,
+		     uint64_t addr, uint64_t *pcie_offset)
+{
+	uint64_t x, y, barsz;
+	uint32_t *expbar_ptr;
+
+	barsz = desc->barsz;
+
+	/*
+	 * NFP CPP address to configure. This comes from NFP 6000
+	 * datasheet document based on Bulk mapping.
+	 */
+	x = (addr >> (barsz - 3)) << (21 - (40 - (barsz - 3)));
+	x |= CFG_EXP_BAR_MAP_TYPE << EXP_BAR_MAP_TYPE_SHIFT;
+	x |= CFG_EXP_BAR_ADDR_SZ << EXP_BAR_LENGTH_SHIFT;
+	x |= tgt << EXP_BAR_TARGET_SHIFT;
+
+	/* Getting expansion bar configuration register address */
+	expbar_ptr = (uint32_t *)desc->cfg_base;
+	/* Each physical PCI BAR has 8 NFP expansion BARs */
+	expbar_ptr += (desc->pcie_bar * 8) + expbar;
+
+	/* Writing to the expansion BAR register */
+	*expbar_ptr = (uint32_t)x;
+
+	/* Getting the pcie offset to work with from userspace */
+	y = addr & ((uint64_t)(1 << (barsz - 3)) - 1);
+	*pcie_offset = y;
+}
+
+/*
+ * Configuring an expansion bar for accessing NSP userspace interface. This
+ * function configures always the same expansion bar, which implies access to
+ * previously configured NFP target is lost.
+ */
+static void
+nspu_xlate(nspu_desc_t *desc, uint64_t addr, uint64_t *pcie_offset)
+{
+	nfp_nspu_mem_bar_cfg(desc, desc->exp_bar, NFP_NSP_TARGET, addr,
+			     pcie_offset);
+}
+
+int
+nfp_nsp_get_abi_version(nspu_desc_t *desc, int *major, int *minor)
+{
+	uint64_t pcie_offset;
+	uint64_t nsp_reg;
+
+	nspu_xlate(desc, NSP_BASE, &pcie_offset);
+	nsp_reg = NSP_REG_VAL(NSP_REG_ADDR(desc, pcie_offset, NSP_STATUS));
+
+	if (NSP_STATUS_MAGIC(nsp_reg) != NSP_MAGIC)
+		return -1;
+
+	*major = NSP_STATUS_MAJOR(nsp_reg);
+	*minor = NSP_STATUS_MINOR(nsp_reg);
+
+	return 0;
+}
+
+int
+nfp_nspu_init(nspu_desc_t *desc, int nfp, int pcie_bar, size_t pcie_barsz,
+	      int exp_bar, void *exp_bar_cfg_base, void *exp_bar_mmap)
+{
+	uint64_t offset, buffaddr;
+	uint64_t nsp_reg;
+
+	desc->nfp = nfp;
+	desc->pcie_bar = pcie_bar;
+	desc->exp_bar = exp_bar;
+	desc->barsz = pcie_barsz;
+	desc->windowsz = 1 << (desc->barsz - 3);
+	desc->cfg_base = exp_bar_cfg_base;
+	desc->mem_base = exp_bar_mmap;
+
+	nspu_xlate(desc, NSP_BASE, &offset);
+
+	/*
+	 * Other NSPU clients can use other buffers. Let's tell NSPU we use the
+	 * default buffer.
+	 */
+	buffaddr = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_DEFAULT_BUF));
+	NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_BUFFER)) = buffaddr;
+
+	/* NFP internal addresses are 40 bits. Clean all other bits here */
+	buffaddr = buffaddr & (((uint64_t)1 << 40) - 1);
+	desc->bufaddr = buffaddr;
+
+	/* Lets get information about the buffer */
+	nsp_reg = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_DEFAULT_BUF_CFG));
+
+	/* Buffer size comes in MBs. Coversion to bytes */
+	desc->buf_size = ((size_t)nsp_reg & NSP_BUFFER_CFG_SIZE_MASK) << 20;
+
+	return 0;
+}
+
+#define NSPU_NFP_BUF(addr, base, off) \
+	(*(uint64_t *)((uint8_t *)(addr)->mem_base + ((base) | (off))))
+
+#define NSPU_HOST_BUF(base, off) (*(uint64_t *)((uint8_t *)(base) + (off)))
+
+static int
+nspu_buff_write(nspu_desc_t *desc, void *buffer, size_t size)
+{
+	uint64_t pcie_offset, pcie_window_base, pcie_window_offset;
+	uint64_t windowsz = desc->windowsz;
+	uint64_t buffaddr, j, i = 0;
+	int ret = 0;
+
+	if (size > desc->buf_size)
+		return -1;
+
+	buffaddr = desc->bufaddr;
+	windowsz = desc->windowsz;
+
+	while (i < size) {
+		/* Expansion bar reconfiguration per window size */
+		nspu_xlate(desc, buffaddr + i, &pcie_offset);
+		pcie_window_base = pcie_offset & (~(windowsz - 1));
+		pcie_window_offset = pcie_offset & (windowsz - 1);
+		for (j = pcie_window_offset; ((j < windowsz) && (i < size));
+		     j += 8) {
+			NSPU_NFP_BUF(desc, pcie_window_base, j) =
+				NSPU_HOST_BUF(buffer, i);
+			i += 8;
+		}
+	}
+
+	return ret;
+}
+
+static int
+nspu_buff_read(nspu_desc_t *desc, void *buffer, size_t size)
+{
+	uint64_t pcie_offset, pcie_window_base, pcie_window_offset;
+	uint64_t windowsz, i = 0, j;
+	uint64_t buffaddr;
+	int ret = 0;
+
+	if (size > desc->buf_size)
+		return -1;
+
+	buffaddr = desc->bufaddr;
+	windowsz = desc->windowsz;
+
+	while (i < size) {
+		/* Expansion bar reconfiguration per window size */
+		nspu_xlate(desc, buffaddr + i, &pcie_offset);
+		pcie_window_base = pcie_offset & (~(windowsz - 1));
+		pcie_window_offset = pcie_offset & (windowsz - 1);
+		for (j = pcie_window_offset; ((j < windowsz) && (i < size));
+		     j += 8) {
+			NSPU_HOST_BUF(buffer, i) =
+				NSPU_NFP_BUF(desc, pcie_window_base, j);
+			i += 8;
+		}
+	}
+
+	return ret;
+}
+
+static int
+nspu_command(nspu_desc_t *desc, uint16_t cmd, int read, int write,
+		 void *buffer, size_t rsize, size_t wsize)
+{
+	uint64_t status, cmd_reg;
+	uint64_t offset;
+	int retry = 0;
+	int retries = 120;
+	int ret = 0;
+
+	/* Same expansion BAR is used for different things */
+	nspu_xlate(desc, NSP_BASE, &offset);
+
+	status = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_STATUS));
+
+	while ((status & 0x1) && (retry < retries)) {
+		status = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_STATUS));
+		retry++;
+		sleep(1);
+	}
+
+	if (retry == retries)
+		return -1;
+
+	if (write) {
+		ret = nspu_buff_write(desc, buffer, wsize);
+		if (ret)
+			return ret;
+
+		/* Expansion BAR changes when writing the buffer */
+		nspu_xlate(desc, NSP_BASE, &offset);
+	}
+
+	NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_COMMAND)) =
+		(uint64_t)wsize << 32 | (uint64_t)cmd << 16 | 1;
+
+	retry = 0;
+
+	cmd_reg = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_COMMAND));
+	while ((cmd_reg & 0x1) && (retry < retries)) {
+		cmd_reg = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_COMMAND));
+		retry++;
+		sleep(1);
+	}
+	if (retry == retries)
+		return -1;
+
+	retry = 0;
+	status = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_STATUS));
+	while ((status & 0x1) && (retry < retries)) {
+		status = NSP_REG_VAL(NSP_REG_ADDR(desc, offset, NSP_STATUS));
+		retry++;
+		sleep(1);
+	}
+
+	if (retry == retries)
+		return -1;
+
+	ret = status & (0xff << 8);
+	if (ret)
+		return ret;
+
+	if (read) {
+		ret = nspu_buff_read(desc, buffer, rsize);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int
+nfp_fw_reset(nspu_desc_t *nspu_desc)
+{
+	int res;
+
+	res = nspu_command(nspu_desc, NSP_CMD_RESET, 0, 0, 0, 0, 0);
+
+	if (res < 0)
+		RTE_LOG(INFO, PMD, "fw reset failed: error %d", res);
+
+	return res;
+}
+
+#define DEFAULT_FW_PATH       "/lib/firmware/netronome"
+#define DEFAULT_FW_FILENAME   "nic_dpdk_default.nffw"
+
+static int
+nfp_fw_upload(nspu_desc_t *nspu_desc)
+{
+	int fw_f;
+	char *fw_buf;
+	char filename[100];
+	struct stat file_stat;
+	off_t fsize, bytes;
+	ssize_t size;
+	int ret;
+
+	size = nspu_desc->buf_size;
+
+	sprintf(filename, "%s/%s", DEFAULT_FW_PATH, DEFAULT_FW_FILENAME);
+	fw_f = open(filename, O_RDONLY);
+	if (fw_f < 0) {
+		RTE_LOG(INFO, PMD, "Firmware file %s/%s not found.",
+			DEFAULT_FW_PATH, DEFAULT_FW_FILENAME);
+		return -ENOENT;
+	}
+
+	fstat(fw_f, &file_stat);
+
+	fsize = file_stat.st_size;
+	RTE_LOG(DEBUG, PMD, "Firmware file with size: %" PRIu64 "\n",
+			    (uint64_t)fsize);
+
+	if (fsize > (off_t)size) {
+		RTE_LOG(INFO, PMD, "fw file too big: %" PRIu64
+				   " bytes (%" PRIu64 " max)",
+				  (uint64_t)fsize, (uint64_t)size);
+		return -EINVAL;
+	}
+
+	fw_buf = malloc((size_t)size);
+	if (!fw_buf) {
+		RTE_LOG(INFO, PMD, "malloc failed for fw buffer");
+		return -ENOMEM;
+	}
+	memset(fw_buf, 0, size);
+
+	bytes = read(fw_f, fw_buf, fsize);
+	if (bytes != fsize) {
+		RTE_LOG(INFO, PMD, "Reading fw to buffer failed.\n"
+				   "Just %" PRIu64 " of %" PRIu64 " bytes read.",
+				   (uint64_t)bytes, (uint64_t)fsize);
+		free(fw_buf);
+		return -EIO;
+	}
+
+	ret = nspu_command(nspu_desc, NSP_CMD_FW_LOAD, 0, 1, fw_buf, 0, bytes);
+
+	free(fw_buf);
+
+	return ret;
+}
+
+/* Firmware symbol descriptor size */
+#define NFP_SYM_DESC_LEN 40
+
+#define SYMBOL_DATA(b, off)     (*(int64_t *)((b) + (off)))
+#define SYMBOL_UDATA(b, off)     (*(uint64_t *)((b) + (off)))
+
+/* Firmware symbols contain information about how to access what they
+ * represent. It can be as simple as an numeric variable declared at a
+ * specific NFP memory, but it can also be more complex structures and
+ * related to specific hardware functionalities or components. Target,
+ * domain and address allow to create the BAR window for accessing such
+ * hw object and size defines the length to map.
+ *
+ * A vNIC is a network interface implemented inside the NFP and using a
+ * subset of device PCI BARs. Specific firmware symbols allow to map those
+ * vNIC bars by host drivers like the NFP PMD.
+ *
+ * Accessing what the symbol represents implies to map the access through
+ * a PCI BAR window. NFP expansion BARs are used in this regard through
+ * the NSPU interface.
+ */
+static int
+nfp_nspu_set_bar_from_symbl(nspu_desc_t *desc, const char *symbl,
+			    uint32_t expbar, uint64_t *pcie_offset,
+			    ssize_t *size)
+{
+	int64_t type;
+	int64_t target;
+	int64_t domain;
+	uint64_t addr;
+	char *sym_buf;
+	int ret = 0;
+
+	sym_buf = malloc(desc->buf_size);
+	strncpy(sym_buf, symbl, strlen(symbl));
+	ret = nspu_command(desc, NSP_CMD_GET_SYMBOL, 1, 1, sym_buf,
+			   NFP_SYM_DESC_LEN, strlen(symbl));
+	if (ret) {
+		RTE_LOG(DEBUG, PMD, "symbol resolution (%s) failed\n", symbl);
+		goto clean;
+	}
+
+	/* Reading symbol information */
+	type = SYMBOL_DATA(sym_buf, 0);
+	target = SYMBOL_DATA(sym_buf, 8);
+	domain =  SYMBOL_DATA(sym_buf, 16);
+	addr = SYMBOL_UDATA(sym_buf, 24);
+	*size = (ssize_t)SYMBOL_UDATA(sym_buf, 32);
+
+	if (type != 1) {
+		RTE_LOG(INFO, PMD, "wrong symbol type\n");
+		ret = -EINVAL;
+		goto clean;
+	}
+	if (!(target == 7 || target == -7)) {
+		RTE_LOG(INFO, PMD, "wrong symbol target\n");
+		ret = -EINVAL;
+		goto clean;
+	}
+	if (domain == 8 || domain == 9) {
+		RTE_LOG(INFO, PMD, "wrong symbol domain\n");
+		ret = -EINVAL;
+		goto clean;
+	}
+
+	/* Adjusting address based on symbol location */
+	if ((domain >= 24) && (domain < 28) && (target == 7)) {
+		addr = 1ULL << 37 | addr | ((uint64_t)domain & 0x3) << 35;
+	} else {
+		addr = 1ULL << 39 | addr | ((uint64_t)domain & 0x3f) << 32;
+		if (target == -7)
+			target = 7;
+	}
+
+	/* Configuring NFP expansion bar for mapping specific PCI BAR window */
+	nfp_nspu_mem_bar_cfg(desc, expbar, target, addr, pcie_offset);
+
+	/* This is the PCI BAR offset to use by the host */
+	*pcie_offset |= ((expbar & 0x7) << (desc->barsz - 3));
+
+clean:
+	free(sym_buf);
+	return ret;
+}
+
+int
+nfp_nsp_fw_setup(nspu_desc_t *desc, const char *sym, uint64_t *pcie_offset)
+{
+	ssize_t bar0_sym_size;
+
+	/* If the symbol resolution works, it implies a firmware app
+	 * is already there.
+	 */
+	if (!nfp_nspu_set_bar_from_symbl(desc, sym, NFP_NET_PF_CFG_EXP_BAR,
+					 pcie_offset, &bar0_sym_size))
+		return 0;
+
+	/* No firmware app detected or not the right one */
+	RTE_LOG(INFO, PMD, "No firmware detected. Resetting NFP...\n");
+	if (nfp_fw_reset(desc) < 0) {
+		RTE_LOG(ERR, PMD, "nfp fw reset failed\n");
+		return -ENODEV;
+	}
+
+	RTE_LOG(INFO, PMD, "Reset done.\n");
+	RTE_LOG(INFO, PMD, "Uploading firmware...\n");
+
+	if (nfp_fw_upload(desc) < 0) {
+		RTE_LOG(ERR, PMD, "nfp fw upload failed\n");
+		return -ENODEV;
+	}
+
+	RTE_LOG(INFO, PMD, "Done.\n");
+
+	/* Now the symbol should be there */
+	if (nfp_nspu_set_bar_from_symbl(desc, sym, NFP_NET_PF_CFG_EXP_BAR,
+					pcie_offset, &bar0_sym_size)) {
+		RTE_LOG(ERR, PMD, "nfp PF BAR symbol resolution failed\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+int
+nfp_nsp_map_ctrl_bar(nspu_desc_t *desc, uint64_t *pcie_offset)
+{
+	ssize_t bar0_sym_size;
+
+	if (nfp_nspu_set_bar_from_symbl(desc, "_pf0_net_bar0",
+					NFP_NET_PF_CFG_EXP_BAR,
+					pcie_offset, &bar0_sym_size))
+		return -ENODEV;
+
+	return 0;
+}
+
+/*
+ * This is a hardcoded fixed NFP internal CPP bus address for the hw queues unit
+ * inside the PCIE island.
+ */
+#define NFP_CPP_PCIE_QUEUES ((uint64_t)(1ULL << 39) |  0x80000 | \
+			     ((uint64_t)0x4 & 0x3f) << 32)
+
+/* Configure a specific NFP expansion bar for accessing the vNIC rx/tx BARs */
+void
+nfp_nsp_map_queues_bar(nspu_desc_t *desc, uint64_t *pcie_offset)
+{
+	nfp_nspu_mem_bar_cfg(desc, NFP_NET_PF_HW_QUEUES_EXP_BAR, 0,
+			     NFP_CPP_PCIE_QUEUES, pcie_offset);
+
+	/* This is the pcie offset to use by the host */
+	*pcie_offset |= ((NFP_NET_PF_HW_QUEUES_EXP_BAR & 0x7) << (27 - 3));
+}
+
+int
+nfp_nsp_eth_config(nspu_desc_t *desc, int port, int up)
+{
+	union eth_table_entry *entries, *entry;
+	int modified;
+	int ret, idx;
+	int i;
+
+	idx = port;
+
+	RTE_LOG(INFO, PMD, "Hw ethernet port %d configure...\n", port);
+	rte_spinlock_lock(&desc->nsp_lock);
+	entries = malloc(NSP_ETH_TABLE_SIZE);
+	if (!entries) {
+		rte_spinlock_unlock(&desc->nsp_lock);
+		return -ENOMEM;
+	}
+
+	ret = nspu_command(desc, NSP_CMD_READ_ETH_TABLE, 1, 0, entries,
+			   NSP_ETH_TABLE_SIZE, 0);
+	if (ret) {
+		rte_spinlock_unlock(&desc->nsp_lock);
+		return ret;
+	}
+
+	entry = entries;
+
+	for (i = 0; i < NSP_ETH_MAX_COUNT; i++) {
+		/* ports in use do not appear sequentially in the table */
+		if (!(entry->port & NSP_ETH_PORT_LANES_MASK)) {
+			/* entry not in use */
+			entry++;
+			continue;
+		}
+		if (idx == 0)
+			break;
+		idx--;
+		entry++;
+	}
+
+	if (i == NSP_ETH_MAX_COUNT) {
+		rte_spinlock_unlock(&desc->nsp_lock);
+		return -EINVAL;
+	}
+
+	if (up && !(entry->state & NSP_ETH_STATE_CONFIGURED)) {
+		entry->control |= NSP_ETH_STATE_CONFIGURED;
+		modified = 1;
+	}
+
+	if (!up && (entry->state & NSP_ETH_STATE_CONFIGURED)) {
+		entry->control &= ~NSP_ETH_STATE_CONFIGURED;
+		modified = 1;
+	}
+
+	if (modified) {
+		ret = nspu_command(desc, NSP_CMD_WRITE_ETH_TABLE, 0, 1, entries,
+				   0, NSP_ETH_TABLE_SIZE);
+		if (!ret)
+			RTE_LOG(INFO, PMD,
+				"Hw ethernet port %d configure done\n", port);
+		else
+			RTE_LOG(INFO, PMD,
+				"Hw ethernet port %d configure failed\n", port);
+	}
+	rte_spinlock_unlock(&desc->nsp_lock);
+	return ret;
+}
+
+int
+nfp_nsp_eth_read_table(nspu_desc_t *desc, union eth_table_entry **table)
+{
+	int ret;
+
+	RTE_LOG(INFO, PMD, "Reading hw ethernet table...\n");
+	/* port 0 allocates the eth table and read it using NSPU */
+	*table = malloc(NSP_ETH_TABLE_SIZE);
+	if (!table)
+		return -ENOMEM;
+
+	ret = nspu_command(desc, NSP_CMD_READ_ETH_TABLE, 1, 0, *table,
+			   NSP_ETH_TABLE_SIZE, 0);
+	if (ret)
+		return ret;
+
+	RTE_LOG(INFO, PMD, "Done\n");
+
+	return 0;
+}
diff --git a/drivers/net/nfp/nfp_nspu.h b/drivers/net/nfp/nfp_nspu.h
new file mode 100644
index 00000000..8c33835e
--- /dev/null
+++ b/drivers/net/nfp/nfp_nspu.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2017 Netronome Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in the
+ *  documentation and/or other materials provided with the distribution
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ *  contributors may be used to endorse or promote products derived from this
+ *  software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * vim:shiftwidth=8:noexpandtab
+ *
+ * @file dpdk/pmd/nfp_nspu.h
+ *
+ * Netronome NFP_NET PDM driver
+ */
+
+/*
+ * NSP is the NFP Service Processor. NSPU is NSP Userspace interface.
+ *
+ * NFP NSP helps with firmware/hardware configuration. NSP is another component
+ * in NFP programmable processor and accessing it from host requires to firstly
+ * configure a specific NFP PCI expansion BAR.
+ *
+ * Once access is ready, configuration can be done reading and writing
+ * from/to a specific PF PCI BAR window. This same interface will allow to
+ * create other PCI BAR windows for accessing other NFP components.
+ *
+ * This file includes low-level functions, using the NSPU interface, and high
+ * level functions, invoked by the PMD for using NSP services. This allows
+ * firmware upload, vNIC PCI BARs mapping and other low-level configurations
+ * like link setup.
+ *
+ * NSP access is done during initialization and it is not involved at all with
+ * the fast path.
+ */
+
+#include <rte_spinlock.h>
+#include "nfp_net_eth.h"
+
+typedef struct {
+	int nfp;        /* NFP device */
+	int pcie_bar;   /* PF PCI BAR to work with */
+	int exp_bar;    /* Expansion BAR number used by NSPU */
+	int barsz;      /* PCIE BAR log2 size */
+	uint64_t bufaddr;  /* commands buffer address */
+	size_t buf_size;   /* commands buffer size */
+	uint64_t windowsz; /* NSPU BAR window size */
+	void *cfg_base; /* Expansion BARs address */
+	void *mem_base; /* NSP interface */
+	rte_spinlock_t nsp_lock;
+} nspu_desc_t;
+
+int nfp_nspu_init(nspu_desc_t *desc, int nfp, int pcie_bar, size_t pcie_barsz,
+		  int exp_bar, void *exp_bar_cfg_base, void *exp_bar_mmap);
+int nfp_nsp_get_abi_version(nspu_desc_t *desc, int *major, int *minor);
+int nfp_nsp_fw_setup(nspu_desc_t *desc, const char *sym, uint64_t *pcie_offset);
+int nfp_nsp_map_ctrl_bar(nspu_desc_t *desc, uint64_t *pcie_offset);
+void nfp_nsp_map_queues_bar(nspu_desc_t *desc, uint64_t *pcie_offset);
+int nfp_nsp_eth_config(nspu_desc_t *desc, int port, int up);
+int nfp_nsp_eth_read_table(nspu_desc_t *desc, union eth_table_entry **table);
diff --git a/drivers/net/null/Makefile b/drivers/net/null/Makefile
index 77810bce..9331ccac 100644
--- a/drivers/net/null/Makefile
+++ b/drivers/net/null/Makefile
@@ -38,6 +38,9 @@ LIB = librte_pmd_null.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vdev
 
 EXPORT_MAP := rte_pmd_null_version.map
 
diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 5aef0591..032c30e9 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -36,7 +36,7 @@
 #include <rte_ethdev_vdev.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_spinlock.h>
 
@@ -68,7 +68,7 @@ struct null_queue {
 struct pmd_internals {
 	unsigned packet_size;
 	unsigned packet_copy;
-	uint8_t port_id;
+	uint16_t port_id;
 
 	struct null_queue rx_null_queues[RTE_MAX_QUEUES_PER_PORT];
 	struct null_queue tx_null_queues[RTE_MAX_QUEUES_PER_PORT];
@@ -298,7 +298,7 @@ eth_dev_info(struct rte_eth_dev *dev,
 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
 }
 
-static void
+static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
 {
 	unsigned i, num_stats;
@@ -306,7 +306,7 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
 	const struct pmd_internals *internal;
 
 	if ((dev == NULL) || (igb_stats == NULL))
-		return;
+		return -EINVAL;
 
 	internal = dev->data->dev_private;
 	num_stats = RTE_MIN((unsigned)RTE_ETHDEV_QUEUE_STAT_CNTRS,
@@ -333,6 +333,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
 	igb_stats->ipackets = rx_total;
 	igb_stats->opackets = tx_total;
 	igb_stats->oerrors = tx_err_total;
+
+	return 0;
 }
 
 static void
@@ -540,8 +542,6 @@ eth_dev_null_create(struct rte_vdev_device *dev,
 	eth_dev->data = data;
 	eth_dev->dev_ops = &ops;
 
-	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
-
 	/* finally assign rx and tx ops */
 	if (packet_copy) {
 		eth_dev->rx_pkt_burst = eth_null_copy_rx;
diff --git a/drivers/net/octeontx/Makefile b/drivers/net/octeontx/Makefile
new file mode 100644
index 00000000..9c27fdfe
--- /dev/null
+++ b/drivers/net/octeontx/Makefile
@@ -0,0 +1,79 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2017 Cavium Inc. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Cavium Networks nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_octeontx.a
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -I$(RTE_SDK)/drivers/mempool/octeontx/
+
+EXPORT_MAP := rte_pmd_octeontx_version.map
+
+LIBABIVER := 1
+
+OBJS_BASE_DRIVER=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))
+$(foreach obj, $(OBJS_BASE_DRIVER), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
+
+VPATH += $(SRCDIR)/base
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_OCTEONTX_PMD) += octeontx_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_OCTEONTX_PMD) += octeontx_pkovf.c
+SRCS-$(CONFIG_RTE_LIBRTE_OCTEONTX_PMD) += octeontx_pkivf.c
+SRCS-$(CONFIG_RTE_LIBRTE_OCTEONTX_PMD) += octeontx_bgx.c
+SRCS-$(CONFIG_RTE_LIBRTE_OCTEONTX_PMD) += octeontx_ethdev.c
+
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_octeontx_rxtx.o += -fno-prefetch-loop-arrays
+
+ifeq ($(shell test $(GCC_VERSION) -ge 46 && echo 1), 1)
+CFLAGS_octeontx_rxtx.o += -O3 -Ofast
+else
+CFLAGS_octeontx_rxtx.o += -O3 -ffast-math
+endif
+
+else
+CFLAGS_octeontx_rxtx.o += -O3 -Ofast
+endif
+
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_mempool_octeontx
+LDLIBS += -lrte_eventdev
+LDLIBS += -lrte_bus_pci
+LDLIBS += -lrte_bus_vdev
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/octeontx/base/octeontx_bgx.c b/drivers/net/octeontx/base/octeontx_bgx.c
new file mode 100644
index 00000000..c2d0d433
--- /dev/null
+++ b/drivers/net/octeontx/base/octeontx_bgx.c
@@ -0,0 +1,273 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#include "octeontx_bgx.h"
+
+int
+octeontx_bgx_port_open(int port, octeontx_mbox_bgx_port_conf_t *conf)
+{
+	struct octeontx_mbox_hdr hdr;
+	octeontx_mbox_bgx_port_conf_t bgx_conf;
+	int len = sizeof(octeontx_mbox_bgx_port_conf_t);
+	int res;
+
+	memset(&bgx_conf, 0, sizeof(octeontx_mbox_bgx_port_conf_t));
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_OPEN;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, NULL, 0, &bgx_conf, len);
+	if (res < 0)
+		return -EACCES;
+
+	conf->enable = bgx_conf.enable;
+	conf->promisc = bgx_conf.promisc;
+	conf->bpen = bgx_conf.bpen;
+	conf->node = bgx_conf.node;
+	conf->base_chan = bgx_conf.base_chan;
+	conf->num_chans = bgx_conf.num_chans;
+	conf->mtu = bgx_conf.mtu;
+	conf->bgx = bgx_conf.bgx;
+	conf->lmac = bgx_conf.lmac;
+	conf->mode = bgx_conf.mode;
+	conf->pkind = bgx_conf.pkind;
+	memcpy(conf->macaddr, bgx_conf.macaddr, 6);
+
+	return res;
+}
+
+int
+octeontx_bgx_port_close(int port)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_CLOSE;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, NULL, 0, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+int
+octeontx_bgx_port_start(int port)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_START;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, NULL, 0, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+int
+octeontx_bgx_port_stop(int port)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_STOP;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, NULL, 0, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+int
+octeontx_bgx_port_get_config(int port, octeontx_mbox_bgx_port_conf_t *conf)
+{
+	struct octeontx_mbox_hdr hdr;
+	octeontx_mbox_bgx_port_conf_t bgx_conf;
+	int len = sizeof(octeontx_mbox_bgx_port_conf_t);
+	int res;
+
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_GET_CONFIG;
+	hdr.vfid = port;
+
+	memset(&bgx_conf, 0, sizeof(octeontx_mbox_bgx_port_conf_t));
+	res = octeontx_ssovf_mbox_send(&hdr, NULL, 0, &bgx_conf, len);
+	if (res < 0)
+		return -EACCES;
+
+	conf->enable = bgx_conf.enable;
+	conf->promisc = bgx_conf.promisc;
+	conf->bpen = bgx_conf.bpen;
+	conf->node = bgx_conf.node;
+	conf->base_chan = bgx_conf.base_chan;
+	conf->num_chans = bgx_conf.num_chans;
+	conf->mtu = bgx_conf.mtu;
+	conf->bgx = bgx_conf.bgx;
+	conf->lmac = bgx_conf.lmac;
+	conf->mode = bgx_conf.mode;
+	conf->pkind = bgx_conf.pkind;
+	memcpy(conf->macaddr, bgx_conf.macaddr, 6);
+
+	return res;
+}
+
+int
+octeontx_bgx_port_status(int port, octeontx_mbox_bgx_port_status_t *stat)
+{
+	struct octeontx_mbox_hdr hdr;
+	octeontx_mbox_bgx_port_status_t bgx_stat;
+	int len = sizeof(octeontx_mbox_bgx_port_status_t);
+	int res;
+
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_GET_STATUS;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, NULL, 0, &bgx_stat, len);
+	if (res < 0)
+		return -EACCES;
+
+	stat->link_up = bgx_stat.link_up;
+
+	return res;
+}
+
+int
+octeontx_bgx_port_stats(int port, octeontx_mbox_bgx_port_stats_t *stats)
+{
+	struct octeontx_mbox_hdr hdr;
+	octeontx_mbox_bgx_port_stats_t bgx_stats;
+	int len = sizeof(octeontx_mbox_bgx_port_stats_t);
+	int res;
+
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_GET_STATS;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, NULL, 0, &bgx_stats, len);
+	if (res < 0)
+		return -EACCES;
+
+	stats->rx_packets = bgx_stats.rx_packets;
+	stats->rx_bytes = bgx_stats.rx_bytes;
+	stats->rx_dropped = bgx_stats.rx_dropped;
+	stats->rx_errors = bgx_stats.rx_errors;
+	stats->tx_packets = bgx_stats.tx_packets;
+	stats->tx_bytes = bgx_stats.tx_bytes;
+	stats->tx_dropped = bgx_stats.tx_dropped;
+	stats->tx_errors = bgx_stats.tx_errors;
+	return res;
+}
+
+int
+octeontx_bgx_port_stats_clr(int port)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_CLR_STATS;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, NULL, 0, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+int
+octeontx_bgx_port_link_status(int port)
+{
+	struct octeontx_mbox_hdr hdr;
+	uint8_t link;
+	int len = sizeof(uint8_t);
+	int res;
+
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_GET_LINK_STATUS;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, NULL, 0, &link, len);
+	if (res < 0)
+		return -EACCES;
+
+	return link;
+}
+
+int
+octeontx_bgx_port_promisc_set(int port, int en)
+{
+	struct octeontx_mbox_hdr hdr;
+	uint8_t	prom;
+	int res;
+
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_SET_PROMISC;
+	hdr.vfid = port;
+	prom = en ? 1 : 0;
+
+	res = octeontx_ssovf_mbox_send(&hdr, &prom, sizeof(prom), NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+int
+octeontx_bgx_port_mac_set(int port, uint8_t *mac_addr)
+{
+	struct octeontx_mbox_hdr hdr;
+	int len = 6;
+	int res = 0;
+
+	hdr.coproc = OCTEONTX_BGX_COPROC;
+	hdr.msg = MBOX_BGX_PORT_SET_MACADDR;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, mac_addr, len, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
diff --git a/drivers/net/octeontx/base/octeontx_bgx.h b/drivers/net/octeontx/base/octeontx_bgx.h
new file mode 100644
index 00000000..f740a1d9
--- /dev/null
+++ b/drivers/net/octeontx/base/octeontx_bgx.h
@@ -0,0 +1,150 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __OCTEONTX_BGX_H__
+#define __OCTEONTX_BGX_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <octeontx_mbox.h>
+
+#define OCTEONTX_BGX_COPROC	        6
+
+/* BGX messages */
+#define MBOX_BGX_PORT_OPEN              0
+#define MBOX_BGX_PORT_CLOSE             1
+#define MBOX_BGX_PORT_START             2
+#define MBOX_BGX_PORT_STOP              3
+#define MBOX_BGX_PORT_GET_CONFIG        4
+#define MBOX_BGX_PORT_GET_STATUS        5
+#define MBOX_BGX_PORT_GET_STATS         6
+#define MBOX_BGX_PORT_CLR_STATS         7
+#define MBOX_BGX_PORT_GET_LINK_STATUS   8
+#define MBOX_BGX_PORT_SET_PROMISC       9
+#define MBOX_BGX_PORT_SET_MACADDR       10
+#define MBOX_BGX_PORT_SET_BP            11
+#define MBOX_BGX_PORT_SET_BCAST         12
+#define MBOX_BGX_PORT_SET_MCAST         13
+
+/* BGX port configuration parameters: */
+typedef struct octeontx_mbox_bgx_port_conf {
+	uint8_t enable;
+	uint8_t promisc;
+	uint8_t bpen;
+	uint8_t macaddr[6]; /* MAC address.*/
+	uint8_t fcs_strip;
+	uint8_t bcast_mode;
+	uint8_t mcast_mode;
+	uint8_t node; /* CPU node */
+	uint16_t base_chan;
+	uint16_t num_chans;
+	uint16_t mtu;
+	uint8_t bgx;
+	uint8_t lmac;
+	uint8_t mode;
+	uint8_t pkind;
+} octeontx_mbox_bgx_port_conf_t;
+
+/* BGX port status: */
+typedef struct octeontx_mbox_bgx_port_status {
+	uint8_t link_up;
+	uint8_t bp;
+} octeontx_mbox_bgx_port_status_t;
+
+/* BGX port statistics: */
+typedef struct octeontx_mbox_bgx_port_stats {
+	uint64_t rx_packets;
+	uint64_t tx_packets;
+	uint64_t rx_bytes;
+	uint64_t tx_bytes;
+	uint64_t rx_errors;
+	uint64_t tx_errors;
+	uint64_t rx_dropped;
+	uint64_t tx_dropped;
+	uint64_t multicast;
+	uint64_t collisions;
+
+	uint64_t rx_length_errors;
+	uint64_t rx_over_errors;
+	uint64_t rx_crc_errors;
+	uint64_t rx_frame_errors;
+	uint64_t rx_fifo_errors;
+	uint64_t rx_missed_errors;
+
+	/* Detailed transmit errors. */
+	uint64_t tx_aborted_errors;
+	uint64_t tx_carrier_errors;
+	uint64_t tx_fifo_errors;
+	uint64_t tx_heartbeat_errors;
+	uint64_t tx_window_errors;
+
+	/* Extended statistics based on RFC2819. */
+	uint64_t rx_1_to_64_packets;
+	uint64_t rx_65_to_127_packets;
+	uint64_t rx_128_to_255_packets;
+	uint64_t rx_256_to_511_packets;
+	uint64_t rx_512_to_1023_packets;
+	uint64_t rx_1024_to_1522_packets;
+	uint64_t rx_1523_to_max_packets;
+
+	uint64_t tx_1_to_64_packets;
+	uint64_t tx_65_to_127_packets;
+	uint64_t tx_128_to_255_packets;
+	uint64_t tx_256_to_511_packets;
+	uint64_t tx_512_to_1023_packets;
+	uint64_t tx_1024_to_1522_packets;
+	uint64_t tx_1523_to_max_packets;
+
+	uint64_t tx_multicast_packets;
+	uint64_t rx_broadcast_packets;
+	uint64_t tx_broadcast_packets;
+	uint64_t rx_undersized_errors;
+	uint64_t rx_oversize_errors;
+	uint64_t rx_fragmented_errors;
+	uint64_t rx_jabber_errors;
+} octeontx_mbox_bgx_port_stats_t;
+
+int octeontx_bgx_port_open(int port, octeontx_mbox_bgx_port_conf_t *conf);
+int octeontx_bgx_port_close(int port);
+int octeontx_bgx_port_start(int port);
+int octeontx_bgx_port_stop(int port);
+int octeontx_bgx_port_get_config(int port, octeontx_mbox_bgx_port_conf_t *conf);
+int octeontx_bgx_port_status(int port, octeontx_mbox_bgx_port_status_t *stat);
+int octeontx_bgx_port_stats(int port, octeontx_mbox_bgx_port_stats_t *stats);
+int octeontx_bgx_port_stats_clr(int port);
+int octeontx_bgx_port_link_status(int port);
+int octeontx_bgx_port_promisc_set(int port, int en);
+int octeontx_bgx_port_mac_set(int port, uint8_t *mac_addr);
+
+#endif	/* __OCTEONTX_BGX_H__ */
+
diff --git a/drivers/net/octeontx/base/octeontx_io.h b/drivers/net/octeontx/base/octeontx_io.h
new file mode 100644
index 00000000..ec4ce1dc
--- /dev/null
+++ b/drivers/net/octeontx/base/octeontx_io.h
@@ -0,0 +1,156 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __OCTEONTX_IO_H__
+#define __OCTEONTX_IO_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <rte_io.h>
+
+/* In Cavium OcteonTX SoC, all accesses to the device registers are
+ * implicitly strongly ordered. So, The relaxed version of IO operation is
+ * safe to use with out any IO memory barriers.
+ */
+#define octeontx_read64 rte_read64_relaxed
+#define octeontx_write64 rte_write64_relaxed
+
+/* ARM64 specific functions */
+#if defined(RTE_ARCH_ARM64)
+#define octeontx_prefetch_store_keep(_ptr) ({\
+	asm volatile("prfm pstl1keep, %a0\n" : : "p" (_ptr)); })
+
+#define octeontx_load_pair(val0, val1, addr) ({		\
+			asm volatile(			\
+			"ldp %x[x0], %x[x1], [%x[p1]]"	\
+			:[x0]"=r"(val0), [x1]"=r"(val1) \
+			:[p1]"r"(addr)			\
+			); })
+
+#define octeontx_store_pair(val0, val1, addr) ({		\
+			asm volatile(			\
+			"stp %x[x0], %x[x1], [%x[p1]]"	\
+			::[x0]"r"(val0), [x1]"r"(val1), [p1]"r"(addr) \
+			); })
+#else /* Un optimized functions for building on non arm64 arch */
+
+#define octeontx_prefetch_store_keep(_ptr) do {} while (0)
+
+#define octeontx_load_pair(val0, val1, addr)		\
+do {							\
+	val0 = rte_read64(addr);			\
+	val1 = rte_read64(((uint8_t *)addr) + 8);	\
+} while (0)
+
+#define octeontx_store_pair(val0, val1, addr)		\
+do {							\
+	rte_write64(val0, addr);			\
+	rte_write64(val1, (((uint8_t *)addr) + 8));	\
+} while (0)
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+/**
+ * Perform an atomic fetch-and-add operation.
+ */
+static inline uint64_t
+octeontx_reg_ldadd_u64(void *addr, int64_t off)
+{
+	uint64_t old_val;
+
+	__asm__ volatile(
+		" .cpu		generic+lse\n"
+		" ldadd	%1, %0, [%2]\n"
+		: "=r" (old_val) : "r" (off), "r" (addr) : "memory");
+
+	return old_val;
+}
+
+/**
+ * Perform a LMTST operation - an atomic write of up to 128 byte to
+ * an I/O block that supports this operation type.
+ *
+ * @param lmtline_va is the address where LMTLINE is mapped
+ * @param ioreg_va is the virtual address of the device register
+ * @param cmdbuf is the array of peripheral commands to execute
+ * @param cmdsize is the number of 64-bit words in 'cmdbuf'
+ *
+ * @return N/A
+ */
+static inline void
+octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[],
+		   uint64_t cmdsize)
+{
+	uint64_t result;
+	uint64_t word_count;
+	uint64_t *lmtline = lmtline_va;
+
+	word_count = cmdsize;
+
+	do {
+		/* Copy commands to LMTLINE */
+		for (result = 0; result < word_count; result += 2) {
+			lmtline[result + 0] = cmdbuf[result + 0];
+			lmtline[result + 1] = cmdbuf[result + 1];
+		}
+
+		/* LDEOR initiates atomic transfer to I/O device */
+		__asm__ volatile(
+			" .cpu		generic+lse\n"
+			" ldeor	xzr, %0, [%1]\n"
+			: "=r" (result) : "r" (ioreg_va) : "memory");
+	} while (!result);
+}
+
+#else
+
+static inline uint64_t
+octeontx_reg_ldadd_u64(void *addr, int64_t off)
+{
+	RTE_SET_USED(addr);
+	RTE_SET_USED(off);
+	return 0;
+}
+
+static inline void
+octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[],
+		   uint64_t cmdsize)
+{
+	RTE_SET_USED(lmtline_va);
+	RTE_SET_USED(ioreg_va);
+	RTE_SET_USED(cmdbuf);
+	RTE_SET_USED(cmdsize);
+}
+
+#endif
+#endif /* __OCTEONTX_IO_H__ */
diff --git a/drivers/net/octeontx/base/octeontx_pki_var.h b/drivers/net/octeontx/base/octeontx_pki_var.h
new file mode 100644
index 00000000..def6cbb9
--- /dev/null
+++ b/drivers/net/octeontx/base/octeontx_pki_var.h
@@ -0,0 +1,237 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __OCTEONTX_PKI_VAR_H__
+#define __OCTEONTX_PKI_VAR_H__
+
+#include <rte_byteorder.h>
+
+#define OCTTX_PACKET_WQE_SKIP		128
+#define OCTTX_PACKET_FIRST_SKIP		240
+#define OCTTX_PACKET_LATER_SKIP		128
+
+/* WQE descriptor */
+typedef union octtx_wqe_s {
+	uint64_t	w[6];
+
+	struct {
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+		struct {
+			uint64_t	pknd : 6;
+			uint64_t	rsvd0 : 10;
+			uint64_t	style : 8;
+			uint64_t	bufs : 8;
+			uint64_t	chan : 12;
+			uint64_t	apad : 3;
+			uint64_t	rsvd1 : 1;
+			uint64_t	aura : 12;
+			uint64_t	rsvd2 : 4;
+		} w0;
+
+		struct {
+			uint64_t	tag :	32;
+			uint64_t	tt :	2;
+			uint64_t	grp :	10;
+			uint64_t	rsvd0 : 2;
+			uint64_t	rsvd1 : 2;
+			uint64_t	len :	16;
+		} w1;
+
+		struct {
+			uint64_t	op_code : 8;
+			uint64_t	err_lev : 3;
+			uint64_t	raw	: 1;
+			uint64_t	l2m	: 1;
+			uint64_t	l2b	: 1;
+			uint64_t	l3m	: 1;
+			uint64_t	l3b	: 1;
+			uint64_t	l3fr	: 1;
+			uint64_t	pf1	: 1;
+			uint64_t	pf2	: 1;
+			uint64_t	pf3	: 1;
+			uint64_t	pf4	: 1;
+			uint64_t	sh	: 1;
+			uint64_t	vs	: 1;
+			uint64_t	vv	: 1;
+			uint64_t	rsvd0	: 8;
+			uint64_t	lae	: 1;
+			uint64_t	lbty	: 5;
+			uint64_t	lcty	: 5;
+			uint64_t	ldty	: 5;
+			uint64_t	lety	: 5;
+			uint64_t	lfty	: 5;
+			uint64_t	lgty	: 5;
+			uint64_t	sw	: 1;
+		} w2;
+
+		struct {
+			uint64_t	addr;	/* Byte addr of start-of-pkt */
+		} w3;
+
+		struct {
+			uint64_t	laptr : 8;
+			uint64_t	lbptr : 8;
+			uint64_t	lcptr : 8;
+			uint64_t	ldprt : 8;
+			uint64_t	leptr : 8;
+			uint64_t	lfptr : 8;
+			uint64_t	lgptr : 8;
+			uint64_t	vlptr : 8;
+		} w4;
+
+		struct {
+			uint64_t	rsvd0 : 47;
+			uint64_t	dwd : 1;
+			uint64_t	size : 16;
+		} w5;
+#else
+		struct {
+			uint64_t	rsvd2 : 4;
+			uint64_t	aura : 12;
+			uint64_t	rsvd1 : 1;
+			uint64_t	apad : 3;
+			uint64_t	chan : 12;
+			uint64_t	bufs : 8;
+			uint64_t	style : 8;
+			uint64_t	rsvd0 : 10;
+			uint64_t	pknd : 6;
+		} w0;
+
+		struct {
+			uint64_t	len :   16;
+			uint64_t	rsvd1 : 2;
+			uint64_t	rsvd0 : 2;
+			uint64_t	grp :   10;
+			uint64_t	tt :    2;
+			uint64_t	tag :   32;
+		} w1;
+
+		struct {
+			uint64_t	sw	: 1;
+			uint64_t	lgty	: 5;
+			uint64_t	lfty	: 5;
+			uint64_t	lety	: 5;
+			uint64_t	ldty	: 5;
+			uint64_t	lcty	: 5;
+			uint64_t	lbty	: 5;
+			uint64_t	lae	: 1;
+			uint64_t	rsvd0	: 8;
+			uint64_t	vv	: 1;
+			uint64_t	vs	: 1;
+			uint64_t	sh	: 1;
+			uint64_t	pf4	: 1;
+			uint64_t	pf3	: 1;
+			uint64_t	pf2	: 1;
+			uint64_t	pf1	: 1;
+			uint64_t	l3fr	: 1;
+			uint64_t	l3b	: 1;
+			uint64_t	l3m	: 1;
+			uint64_t	l2b	: 1;
+			uint64_t	l2m	: 1;
+			uint64_t	raw	: 1;
+	uint64_t	err_lev : 3;
+			uint64_t	op_code : 8;
+		} w2;
+
+		struct {
+			uint64_t	addr;	/* Byte addr of start-of-pkt */
+		} w3;
+
+		struct {
+			uint64_t	vlptr : 8;
+			uint64_t	lgptr : 8;
+			uint64_t	lfptr : 8;
+			uint64_t	leptr : 8;
+			uint64_t	ldprt : 8;
+			uint64_t	lcptr : 8;
+			uint64_t	lbptr : 8;
+			uint64_t	laptr : 8;
+		} w4;
+#endif
+	} s;
+
+} __rte_packed octtx_wqe_t;
+
+enum occtx_pki_ltype_e {
+	OCCTX_PKI_LTYPE_NONE		= 0,
+	OCCTX_PKI_LTYPE_ENET		= 1,
+	OCCTX_PKI_LTYPE_VLAN		= 2,
+	OCCTX_PKI_LTYPE_SNAP_PAYLD	= 5,
+	OCCTX_PKI_LTYPE_ARP		= 6,
+	OCCTX_PKI_LTYPE_RARP		= 7,
+	OCCTX_PKI_LTYPE_IP4		= 8,
+	OCCTX_PKI_LTYPE_IP4_OPT		= 9,
+	OCCTX_PKI_LTYPE_IP6		= 0xa,
+	OCCTX_PKI_LTYPE_IP6_OPT		= 0xb,
+	OCCTX_PKI_LTYPE_IPSEC_ESP	= 0xc,
+	OCCTX_PKI_LTYPE_IPFRAG		= 0xd,
+	OCCTX_PKI_LTYPE_IPCOMP		= 0xe,
+	OCCTX_PKI_LTYPE_TCP		= 0x10,
+	OCCTX_PKI_LTYPE_UDP		= 0x11,
+	OCCTX_PKI_LTYPE_SCTP		= 0x12,
+	OCCTX_PKI_LTYPE_UDP_VXLAN	= 0x13,
+	OCCTX_PKI_LTYPE_GRE		= 0x14,
+	OCCTX_PKI_LTYPE_NVGRE		= 0x15,
+	OCCTX_PKI_LTYPE_GTP		= 0x16,
+	OCCTX_PKI_LTYPE_UDP_GENEVE	= 0x17,
+	OCCTX_PKI_LTYPE_SW28		= 0x1c,
+	OCCTX_PKI_LTYPE_SW29		= 0x1d,
+	OCCTX_PKI_LTYPE_SW30		= 0x1e,
+	OCCTX_PKI_LTYPE_SW31		= 0x1f,
+	OCCTX_PKI_LTYPE_LAST
+};
+
+enum lc_type_e {
+	LC_NONE		= OCCTX_PKI_LTYPE_NONE,
+	LC_IPV4		= OCCTX_PKI_LTYPE_IP4,
+	LC_IPV4_OPT	= OCCTX_PKI_LTYPE_IP4_OPT,
+	LC_IPV6		= OCCTX_PKI_LTYPE_IP6,
+	LC_IPV6_OPT	= OCCTX_PKI_LTYPE_IP6_OPT,
+};
+
+enum le_type_e {
+	LE_NONE		= OCCTX_PKI_LTYPE_NONE,
+};
+
+enum lf_type_e {
+	LF_NONE		= OCCTX_PKI_LTYPE_NONE,
+	LF_IPSEC_ESP	= OCCTX_PKI_LTYPE_IPSEC_ESP,
+	LF_IPFRAG	= OCCTX_PKI_LTYPE_IPFRAG,
+	LF_IPCOMP	= OCCTX_PKI_LTYPE_IPCOMP,
+	LF_TCP		= OCCTX_PKI_LTYPE_TCP,
+	LF_UDP		= OCCTX_PKI_LTYPE_UDP,
+	LF_GRE		= OCCTX_PKI_LTYPE_GRE,
+	LF_UDP_GENEVE	= OCCTX_PKI_LTYPE_UDP_GENEVE,
+	LF_UDP_VXLAN	= OCCTX_PKI_LTYPE_UDP_VXLAN,
+	LF_NVGRE	= OCCTX_PKI_LTYPE_NVGRE,
+};
+#endif /* __OCTEONTX_PKI_VAR_H__ */
diff --git a/drivers/net/octeontx/base/octeontx_pkivf.c b/drivers/net/octeontx/base/octeontx_pkivf.c
new file mode 100644
index 00000000..b97f05cd
--- /dev/null
+++ b/drivers/net/octeontx/base/octeontx_pkivf.c
@@ -0,0 +1,169 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_bus_pci.h>
+
+#include "octeontx_pkivf.h"
+
+int
+octeontx_pki_port_open(int port)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	hdr.coproc = OCTEONTX_PKI_COPROC;
+	hdr.msg = MBOX_PKI_PORT_OPEN;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, NULL, 0, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+	return res;
+}
+
+int
+octeontx_pki_port_hash_config(int port, pki_hash_cfg_t *hash_cfg)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	mbox_pki_hash_cfg_t h_cfg = *(mbox_pki_hash_cfg_t *)hash_cfg;
+	int len = sizeof(mbox_pki_hash_cfg_t);
+
+	hdr.coproc = OCTEONTX_PKI_COPROC;
+	hdr.msg = MBOX_PKI_PORT_HASH_CONFIG;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, &h_cfg, len, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+int
+octeontx_pki_port_pktbuf_config(int port, pki_pktbuf_cfg_t *buf_cfg)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	mbox_pki_pktbuf_cfg_t b_cfg = *(mbox_pki_pktbuf_cfg_t *)buf_cfg;
+	int len = sizeof(mbox_pki_pktbuf_cfg_t);
+
+	hdr.coproc = OCTEONTX_PKI_COPROC;
+	hdr.msg = MBOX_PKI_PORT_PKTBUF_CONFIG;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, &b_cfg, len, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+	return res;
+}
+
+int
+octeontx_pki_port_create_qos(int port, pki_qos_cfg_t *qos_cfg)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	mbox_pki_qos_cfg_t q_cfg = *(mbox_pki_qos_cfg_t *)qos_cfg;
+	int len = sizeof(mbox_pki_qos_cfg_t);
+
+	hdr.coproc = OCTEONTX_PKI_COPROC;
+	hdr.msg = MBOX_PKI_PORT_CREATE_QOS;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, &q_cfg, len, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+
+int
+octeontx_pki_port_errchk_config(int port, pki_errchk_cfg_t *cfg)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	mbox_pki_errcheck_cfg_t e_cfg;
+	e_cfg = *((mbox_pki_errcheck_cfg_t *)(cfg));
+	int len = sizeof(mbox_pki_errcheck_cfg_t);
+
+	hdr.coproc = OCTEONTX_PKI_COPROC;
+	hdr.msg = MBOX_PKI_PORT_ERRCHK_CONFIG;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, &e_cfg, len, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+#define PCI_VENDOR_ID_CAVIUM               0x177D
+#define PCI_DEVICE_ID_OCTEONTX_PKI_VF      0xA0DD
+
+/* PKIVF pcie device */
+static int
+pkivf_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+	RTE_SET_USED(pci_drv);
+	RTE_SET_USED(pci_dev);
+
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	return 0;
+}
+
+static const struct rte_pci_id pci_pkivf_map[] = {
+	{
+		RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM,
+				PCI_DEVICE_ID_OCTEONTX_PKI_VF)
+	},
+	{
+		.vendor_id = 0,
+	},
+};
+
+static struct rte_pci_driver pci_pkivf = {
+	.id_table = pci_pkivf_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+	.probe = pkivf_probe,
+};
+
+RTE_PMD_REGISTER_PCI(octeontx_pkivf, pci_pkivf);
diff --git a/drivers/net/octeontx/base/octeontx_pkivf.h b/drivers/net/octeontx/base/octeontx_pkivf.h
new file mode 100644
index 00000000..004dedcc
--- /dev/null
+++ b/drivers/net/octeontx/base/octeontx_pkivf.h
@@ -0,0 +1,553 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	__OCTEONTX_PKI_H__
+#define	__OCTEONTX_PKI_H__
+
+#include <stdint.h>
+
+#include <octeontx_mbox.h>
+
+#define OCTEONTX_PKI_COPROC                     5
+
+/* PKI messages */
+
+#define MBOX_PKI_PORT_OPEN			1
+#define MBOX_PKI_PORT_START			2
+#define MBOX_PKI_PORT_STOP			3
+#define MBOX_PKI_PORT_CLOSE			4
+#define MBOX_PKI_PORT_CONFIG			5
+#define MBOX_PKI_PORT_OPT_PARSER_CONFIG		6
+#define MBOX_PKI_PORT_CUSTOM_PARSER_CONFIG	7
+#define MBOX_PKI_PORT_PKTBUF_CONFIG		8
+#define MBOX_PKI_PORT_HASH_CONFIG		9
+#define MBOX_PKI_PORT_ERRCHK_CONFIG		10
+#define MBOX_PKI_PORT_CREATE_QOS		11
+#define MBOX_PKI_PORT_MODIFY_QOS		12
+#define MBOX_PKI_PORT_DELETE_QOS		13
+#define MBOX_PKI_PORT_PKTDROP_CONFIG		14
+#define MBOX_PKI_PORT_WQE_GEN_CONFIG		15
+#define MBOX_PKI_BACKPRESSURE_CONFIG		16
+#define MBOX_PKI_PORT_GET_STATS			17
+#define MBOX_PKI_PORT_RESET_STATS		18
+#define MBOX_PKI_GET_PORT_CONFIG		19
+#define MBOX_PKI_GET_PORT_QOS_CONFIG		20
+
+#define MBOX_PKI_MAX_QOS_ENTRY 64
+
+/* pki pkind parse mode */
+enum  {
+	MBOX_PKI_PARSE_LA_TO_LG = 0,
+	MBOX_PKI_PARSE_LB_TO_LG = 1,
+	MBOX_PKI_PARSE_LC_TO_LG = 3,
+	MBOX_PKI_PARSE_LG = 0x3f,
+	MBOX_PKI_PARSE_NOTHING = 0x7f
+};
+
+/* Interface types: */
+enum {
+	OCTTX_PORT_TYPE_NET, /* Network interface ports */
+	OCTTX_PORT_TYPE_INT, /* CPU internal interface ports */
+	OCTTX_PORT_TYPE_PCI, /* DPI/PCIe interface ports */
+	OCTTX_PORT_TYPE_MAX
+};
+
+/* pki port config */
+typedef struct mbox_pki_port_type {
+	uint8_t port_type;
+} mbox_pki_port_t;
+
+/* pki port config */
+typedef struct mbox_pki_port_cfg {
+	uint8_t port_type;
+	struct {
+		uint8_t fcs_pres:1;
+		uint8_t fcs_skip:1;
+		uint8_t parse_mode:1;
+		uint8_t mpls_parse:1;
+		uint8_t inst_hdr_parse:1;
+		uint8_t fulc_parse:1;
+		uint8_t dsa_parse:1;
+		uint8_t hg2_parse:1;
+		uint8_t hg_parse:1;
+	} mmask;
+	uint8_t fcs_pres;
+	uint8_t fcs_skip;
+	uint8_t parse_mode;
+	uint8_t mpls_parse;
+	uint8_t inst_hdr_parse;
+	uint8_t fulc_parse;
+	uint8_t dsa_parse;
+	uint8_t hg2_parse;
+	uint8_t hg_parse;
+} mbox_pki_prt_cfg_t;
+
+/* pki Flow/style packet buffer config */
+typedef struct mbox_pki_port_pktbuf_cfg {
+	uint8_t port_type;
+	struct {
+		uint16_t f_mbuff_size:1;
+		uint16_t f_wqe_skip:1;
+		uint16_t f_first_skip:1;
+		uint16_t f_later_skip:1;
+		uint16_t f_pkt_outside_wqe:1;
+		uint16_t f_wqe_endian:1;
+		uint16_t f_cache_mode:1;
+	} mmask;
+	uint16_t mbuff_size;
+	uint16_t wqe_skip;
+	uint16_t first_skip;
+	uint16_t later_skip;
+	uint8_t pkt_outside_wqe;
+	uint8_t wqe_endian;
+	uint8_t cache_mode;
+} mbox_pki_pktbuf_cfg_t;
+
+/* pki flow/style tag config */
+typedef struct mbox_pki_port_hash_cfg {
+	uint8_t port_type;
+	uint32_t tag_slf:1;
+	uint32_t tag_sle:1;
+	uint32_t tag_sld:1;
+	uint32_t tag_slc:1;
+	uint32_t tag_dlf:1;
+	uint32_t tag_dle:1;
+	uint32_t tag_dld:1;
+	uint32_t tag_dlc:1;
+	uint32_t tag_prt:1;
+	uint32_t tag_vlan0:1;
+	uint32_t tag_vlan1:1;
+	uint32_t tag_ip_pctl:1;
+	uint32_t tag_sync:1;
+	uint32_t tag_spi:1;
+	uint32_t tag_gtp:1;
+	uint32_t tag_vni:1;
+} mbox_pki_hash_cfg_t;
+
+/* pki flow/style errcheck config */
+typedef struct mbox_pki_port_errcheck_cfg {
+	uint8_t port_type;
+	struct {
+		uint32_t f_ip6_udp_opt:1;
+		uint32_t f_lenerr_en:1;
+		uint32_t f_maxerr_en:1;
+		uint32_t f_minerr_en:1;
+		uint32_t f_fcs_chk:1;
+		uint32_t f_fcs_strip:1;
+		uint32_t f_len_lf:1;
+		uint32_t f_len_le:1;
+		uint32_t f_len_ld:1;
+		uint32_t f_len_lc:1;
+		uint32_t f_csum_lf:1;
+		uint32_t f_csum_le:1;
+		uint32_t f_csum_ld:1;
+		uint32_t f_csum_lc:1;
+		uint32_t f_min_frame_len;
+		uint32_t f_max_frame_len;
+	} mmask;
+	uint64_t ip6_udp_opt:1;
+	uint64_t lenerr_en:1;
+	uint64_t maxerr_en:1;
+	uint64_t minerr_en:1;
+	uint64_t fcs_chk:1;
+	uint64_t fcs_strip:1;
+	uint64_t len_lf:1;
+	uint64_t len_le:1;
+	uint64_t len_ld:1;
+	uint64_t len_lc:1;
+	uint64_t csum_lf:1;
+	uint64_t csum_le:1;
+	uint64_t csum_ld:1;
+	uint64_t csum_lc:1;
+	uint64_t min_frame_len;
+	uint64_t max_frame_len;
+} mbox_pki_errcheck_cfg_t;
+
+/* CACHE MODE*/
+enum {
+	MBOX_PKI_OPC_MODE_STT = 0LL,
+	MBOX_PKI_OPC_MODE_STF = 1LL,
+	MBOX_PKI_OPC_MODE_STF1_STT = 2LL,
+	MBOX_PKI_OPC_MODE_STF2_STT = 3LL
+};
+
+/* PKI QPG QOS*/
+enum {
+	MBOX_PKI_QPG_QOS_NONE = 0,
+	MBOX_PKI_QPG_QOS_VLAN,
+	MBOX_PKI_QPG_QOS_MPLS,
+	MBOX_PKI_QPG_QOS_DSA_SRC,
+	MBOX_PKI_QPG_QOS_DIFFSERV,
+	MBOX_PKI_QPG_QOS_HIGIG,
+};
+
+struct mbox_pki_qos_entry {
+	uint16_t port_add;
+	uint16_t ggrp_ok;
+	uint16_t ggrp_bad;
+	uint16_t gaura;
+	uint8_t grptag_ok;
+	uint8_t grptag_bad;
+};
+
+/* pki flow/style enable qos */
+typedef struct mbox_pki_port_create_qos {
+	uint8_t port_type;
+	uint8_t qpg_qos;
+	uint8_t num_entry;
+	uint8_t tag_type;
+	uint8_t drop_policy;
+	struct mbox_pki_qos_entry qos_entry[MBOX_PKI_MAX_QOS_ENTRY];
+} mbox_pki_qos_cfg_t;
+
+/* pki flow/style enable qos */
+typedef struct mbox_pki_port_modify_qos_entry {
+	uint8_t port_type;
+	uint16_t index;
+	struct {
+		uint8_t f_port_add:1;
+		uint8_t f_grp_ok:1;
+		uint8_t f_grp_bad:1;
+		uint8_t f_gaura:1;
+		uint8_t f_grptag_ok:1;
+		uint8_t f_grptag_bad:1;
+		uint8_t f_tag_type:1;
+	} mmask;
+	uint8_t tag_type;
+	struct mbox_pki_qos_entry qos_entry;
+} mbox_pki_mod_qos_t;
+
+/* pki flow/style enable qos */
+typedef struct mbox_pki_port_delete_qos_entry {
+	uint8_t port_type;
+	uint16_t index;
+} mbox_pki_del_qos_t;
+
+/* PKI maximum constants */
+#define PKI_VF_MAX			(1)
+#define PKI_MAX_PKTLEN			(32768)
+
+/* pki pkind parse mode */
+enum  {
+	PKI_PARSE_LA_TO_LG = 0,
+	PKI_PARSE_LB_TO_LG = 1,
+	PKI_PARSE_LC_TO_LG = 3,
+	PKI_PARSE_LG = 0x3f,
+	PKI_PARSE_NOTHING = 0x7f
+};
+
+/* pki port config */
+typedef struct pki_port_cfg {
+	uint8_t port_type;
+	struct {
+		uint8_t fcs_pres:1;
+		uint8_t fcs_skip:1;
+		uint8_t parse_mode:1;
+		uint8_t mpls_parse:1;
+		uint8_t inst_hdr_parse:1;
+		uint8_t fulc_parse:1;
+		uint8_t dsa_parse:1;
+		uint8_t hg2_parse:1;
+		uint8_t hg_parse:1;
+	} mmask;
+	uint8_t fcs_pres;
+	uint8_t fcs_skip;
+	uint8_t parse_mode;
+	uint8_t mpls_parse;
+	uint8_t inst_hdr_parse;
+	uint8_t fulc_parse;
+	uint8_t dsa_parse;
+	uint8_t hg2_parse;
+	uint8_t hg_parse;
+} pki_prt_cfg_t;
+
+
+/* pki Flow/style packet buffer config */
+typedef struct pki_port_pktbuf_cfg {
+	uint8_t port_type;
+	struct {
+		uint16_t f_mbuff_size:1;
+		uint16_t f_wqe_skip:1;
+		uint16_t f_first_skip:1;
+		uint16_t f_later_skip:1;
+		uint16_t f_pkt_outside_wqe:1;
+		uint16_t f_wqe_endian:1;
+		uint16_t f_cache_mode:1;
+	} mmask;
+	uint16_t mbuff_size;
+	uint16_t wqe_skip;
+	uint16_t first_skip;
+	uint16_t later_skip;
+	uint8_t pkt_outside_wqe;
+	uint8_t wqe_endian;
+	uint8_t cache_mode;
+} pki_pktbuf_cfg_t;
+
+/* pki flow/style tag config */
+typedef struct pki_port_hash_cfg {
+	uint8_t port_type;
+	uint32_t tag_slf:1;
+	uint32_t tag_sle:1;
+	uint32_t tag_sld:1;
+	uint32_t tag_slc:1;
+	uint32_t tag_dlf:1;
+	uint32_t tag_dle:1;
+	uint32_t tag_dld:1;
+	uint32_t tag_dlc:1;
+	uint32_t tag_prt:1;
+	uint32_t tag_vlan0:1;
+	uint32_t tag_vlan1:1;
+	uint32_t tag_ip_pctl:1;
+	uint32_t tag_sync:1;
+	uint32_t tag_spi:1;
+	uint32_t tag_gtp:1;
+	uint32_t tag_vni:1;
+} pki_hash_cfg_t;
+
+/* pki flow/style errcheck config */
+typedef struct pki_port_errcheck_cfg {
+	uint8_t port_type;
+	struct {
+		uint32_t f_ip6_udp_opt:1;
+		uint32_t f_lenerr_en:1;
+		uint32_t f_maxerr_en:1;
+		uint32_t f_minerr_en:1;
+		uint32_t f_fcs_chk:1;
+		uint32_t f_fcs_strip:1;
+		uint32_t f_len_lf:1;
+		uint32_t f_len_le:1;
+		uint32_t f_len_ld:1;
+		uint32_t f_len_lc:1;
+		uint32_t f_csum_lf:1;
+		uint32_t f_csum_le:1;
+		uint32_t f_csum_ld:1;
+		uint32_t f_csum_lc:1;
+		uint32_t f_min_frame_len;
+		uint32_t f_max_frame_len;
+	} mmask;
+	uint64_t ip6_udp_opt:1;
+	uint64_t lenerr_en:1;
+	uint64_t maxerr_en:1;
+	uint64_t minerr_en:1;
+	uint64_t fcs_chk:1;
+	uint64_t fcs_strip:1;
+	uint64_t len_lf:1;
+	uint64_t len_le:1;
+	uint64_t len_ld:1;
+	uint64_t len_lc:1;
+	uint64_t csum_lf:1;
+	uint64_t csum_le:1;
+	uint64_t csum_ld:1;
+	uint64_t csum_lc:1;
+	uint64_t min_frame_len;
+	uint64_t max_frame_len;
+} pki_errchk_cfg_t;
+
+
+/* CACHE MODE*/
+enum {
+	PKI_OPC_MODE_STT = 0LL,
+	PKI_OPC_MODE_STF = 1LL,
+	PKI_OPC_MODE_STF1_STT = 2LL,
+	PKI_OPC_MODE_STF2_STT = 3LL
+};
+
+/* PKI QPG QOS*/
+enum {
+	PKI_QPG_QOS_NONE = 0,
+	PKI_QPG_QOS_VLAN,
+	PKI_QPG_QOS_MPLS,
+	PKI_QPG_QOS_DSA_SRC,
+	PKI_QPG_QOS_DIFFSERV,
+	PKI_QPG_QOS_HIGIG,
+};
+
+struct pki_qos_entry {
+	uint16_t port_add;
+	uint16_t ggrp_ok;
+	uint16_t ggrp_bad;
+	uint16_t gaura;
+	uint8_t grptag_ok;
+	uint8_t grptag_bad;
+	uint8_t ena_red;
+	uint8_t ena_drop;
+};
+
+#define PKO_MAX_QOS_ENTRY 64
+
+/* pki flow/style enable qos */
+typedef struct pki_port_create_qos {
+	uint8_t port_type;
+	uint8_t qpg_qos;
+	uint8_t num_entry;
+	uint8_t tag_type;
+	uint8_t drop_policy;
+	struct pki_qos_entry qos_entry[PKO_MAX_QOS_ENTRY];
+} pki_qos_cfg_t;
+
+/* pki flow/style enable qos */
+typedef struct pki_port_delete_qos_entry {
+	uint8_t port_type;
+	uint16_t index;
+} pki_del_qos_t;
+
+/* pki flow/style enable qos */
+typedef struct pki_port_modify_qos_entry {
+	uint8_t port_type;
+	uint16_t index;
+	struct {
+		uint8_t f_port_add:1;
+		uint8_t f_grp_ok:1;
+		uint8_t f_grp_bad:1;
+		uint8_t f_gaura:1;
+		uint8_t f_grptag_ok:1;
+		uint8_t f_grptag_bad:1;
+		uint8_t f_tag_type:1;
+	} mmask;
+	uint8_t tag_type;
+	struct pki_qos_entry qos_entry;
+} pki_mod_qos_t;
+
+static inline int
+octeontx_pki_port_modify_qos(int port, pki_mod_qos_t *qos_cfg)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	mbox_pki_mod_qos_t q_cfg = *(mbox_pki_mod_qos_t *)qos_cfg;
+	int len = sizeof(mbox_pki_mod_qos_t);
+
+	hdr.coproc = OCTEONTX_PKI_COPROC;
+	hdr.msg = MBOX_PKI_PORT_MODIFY_QOS;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, &q_cfg, len, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+static inline int
+octeontx_pki_port_delete_qos(int port, pki_del_qos_t *qos_cfg)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	mbox_pki_del_qos_t q_cfg = *(mbox_pki_del_qos_t *)qos_cfg;
+	int len = sizeof(mbox_pki_del_qos_t);
+
+	hdr.coproc = OCTEONTX_PKI_COPROC;
+	hdr.msg = MBOX_PKI_PORT_DELETE_QOS;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, &q_cfg, len, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+static inline int
+octeontx_pki_port_close(int port)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	mbox_pki_port_t ptype;
+	int len = sizeof(mbox_pki_port_t);
+	memset(&ptype, 0, len);
+	ptype.port_type = OCTTX_PORT_TYPE_NET;
+
+	hdr.coproc = OCTEONTX_PKI_COPROC;
+	hdr.msg = MBOX_PKI_PORT_CLOSE;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, &ptype, len, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+static inline int
+octeontx_pki_port_start(int port)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	mbox_pki_port_t ptype;
+	int len = sizeof(mbox_pki_port_t);
+	memset(&ptype, 0, len);
+	ptype.port_type = OCTTX_PORT_TYPE_NET;
+
+	hdr.coproc = OCTEONTX_PKI_COPROC;
+	hdr.msg = MBOX_PKI_PORT_START;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, &ptype, len, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+static inline int
+octeontx_pki_port_stop(int port)
+{
+	struct octeontx_mbox_hdr hdr;
+	int res;
+
+	mbox_pki_port_t ptype;
+	int len = sizeof(mbox_pki_port_t);
+	memset(&ptype, 0, len);
+	ptype.port_type = OCTTX_PORT_TYPE_NET;
+
+	hdr.coproc = OCTEONTX_PKI_COPROC;
+	hdr.msg = MBOX_PKI_PORT_STOP;
+	hdr.vfid = port;
+
+	res = octeontx_ssovf_mbox_send(&hdr, &ptype, len, NULL, 0);
+	if (res < 0)
+		return -EACCES;
+
+	return res;
+}
+
+int octeontx_pki_port_open(int port);
+int octeontx_pki_port_hash_config(int port, pki_hash_cfg_t *hash_cfg);
+int octeontx_pki_port_pktbuf_config(int port, pki_pktbuf_cfg_t *buf_cfg);
+int octeontx_pki_port_create_qos(int port, pki_qos_cfg_t *qos_cfg);
+int octeontx_pki_port_close(int port);
+int octeontx_pki_port_errchk_config(int port, pki_errchk_cfg_t *cfg);
+
+#endif /* __OCTEONTX_PKI_H__ */
diff --git a/drivers/net/octeontx/base/octeontx_pkovf.c b/drivers/net/octeontx/base/octeontx_pkovf.c
new file mode 100644
index 00000000..f01d948e
--- /dev/null
+++ b/drivers/net/octeontx/base/octeontx_pkovf.c
@@ -0,0 +1,617 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_eal.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_bus_pci.h>
+#include <rte_spinlock.h>
+
+#include "../octeontx_logs.h"
+#include "octeontx_io.h"
+#include "octeontx_pkovf.h"
+
+struct octeontx_pko_iomem {
+	uint8_t		*va;
+	rte_iova_t	iova;
+	size_t		size;
+};
+
+#define PKO_IOMEM_NULL (struct octeontx_pko_iomem){0, 0, 0}
+
+struct octeontx_pko_fc_ctl_s {
+	int64_t buf_cnt;
+	int64_t padding[(PKO_DQ_FC_STRIDE / 8) - 1];
+};
+
+struct octeontx_pkovf {
+	uint8_t		*bar0;
+	uint8_t		*bar2;
+	uint16_t	domain;
+	uint16_t	vfid;
+};
+
+struct octeontx_pko_vf_ctl_s {
+	rte_spinlock_t lock;
+
+	struct octeontx_pko_iomem fc_iomem;
+	struct octeontx_pko_fc_ctl_s *fc_ctl;
+	struct octeontx_pkovf pko[PKO_VF_MAX];
+	struct {
+		uint64_t chanid;
+	} dq_map[PKO_VF_MAX * PKO_VF_NUM_DQ];
+};
+
+static struct octeontx_pko_vf_ctl_s pko_vf_ctl;
+
+static void *
+octeontx_pko_dq_vf_bar0(uint16_t txq)
+{
+	int vf_ix;
+
+	vf_ix = txq / PKO_VF_NUM_DQ;
+	return pko_vf_ctl.pko[vf_ix].bar0;
+}
+
+static int
+octeontx_pko_dq_gdq(uint16_t txq)
+{
+	return txq % PKO_VF_NUM_DQ;
+}
+
+/**
+ * Open a PKO DQ.
+ */
+static inline
+int octeontx_pko_dq_open(uint16_t txq)
+{
+	unsigned int reg_off;
+	uint8_t *vf_bar0;
+	uint64_t rtn;
+	int gdq;
+
+	vf_bar0 = octeontx_pko_dq_vf_bar0(txq);
+	gdq = octeontx_pko_dq_gdq(txq);
+
+	if (unlikely(gdq < 0 || vf_bar0 == NULL))
+		return -EINVAL;
+	*(volatile int64_t*)(pko_vf_ctl.fc_ctl + txq) =
+		PKO_DQ_FC_DEPTH_PAGES - PKO_DQ_FC_SKID;
+
+	rte_wmb();
+
+	octeontx_write64(PKO_DQ_FC_DEPTH_PAGES,
+			 vf_bar0 + PKO_VF_DQ_FC_STATUS(gdq));
+
+	/* Set the register to return descriptor (packet) count as DEPTH */
+	/* KIND=1, NCB_QUERY_RSP=0 */
+	octeontx_write64(1ull << PKO_DQ_KIND_BIT,
+				vf_bar0 + PKO_VF_DQ_WM_CTL(gdq));
+	reg_off = PKO_VF_DQ_OP_OPEN(gdq);
+
+	rtn = octeontx_reg_ldadd_u64(vf_bar0 + reg_off, 0);
+
+	/* PKO_DQOP_E::OPEN */
+	if (((rtn >> PKO_DQ_OP_BIT) & 0x3) != 0x1)
+		return -EIO;
+
+	switch (rtn >> PKO_DQ_STATUS_BIT) {
+	case 0xC:	/* DQALREADYCREATED */
+	case 0x0:	/* PASS */
+		break;
+	default:
+		return -EIO;
+	}
+
+	/* DRAIN=0, DRAIN_NULL_LINK=0, SW_XOFF=0 */
+	octeontx_write64(0, vf_bar0 + PKO_VF_DQ_SW_XOFF(gdq));
+
+	return rtn & ((1ull << PKO_DQ_OP_BIT) - 1);
+}
+
+/**
+ * Close a PKO DQ
+ * Flush all packets pending.
+ */
+static inline
+int octeontx_pko_dq_close(uint16_t txq)
+{
+	unsigned int reg_off;
+	uint8_t *vf_bar0;
+	uint64_t rtn;
+	int res;
+
+	vf_bar0 = octeontx_pko_dq_vf_bar0(txq);
+	res = octeontx_pko_dq_gdq(txq);
+
+	if (unlikely(res < 0 || vf_bar0 == NULL))
+		return -EINVAL;
+
+	reg_off = PKO_VF_DQ_OP_CLOSE(res);
+
+	rtn = octeontx_reg_ldadd_u64(vf_bar0 + reg_off, 0);
+
+	/* PKO_DQOP_E::CLOSE */
+	if (((rtn >> PKO_DQ_OP_BIT) & 0x3) != 0x2)
+		return -EIO;
+
+	switch (rtn >> PKO_DQ_STATUS_BIT) {
+	case 0xD:	/* DQNOTCREATED */
+	case 0x0:	/* PASS */
+		break;
+	default:
+		return -EIO;
+	}
+
+	res = rtn & ((1ull << PKO_DQ_OP_BIT) - 1); /* DEPTH */
+	return res;
+}
+
+/* Flush all packets pending on a DQ */
+static inline
+int octeontx_pko_dq_drain(uint16_t txq)
+{
+	unsigned int gdq;
+	uint8_t *vf_bar0;
+	uint64_t reg;
+	int res, timo = PKO_DQ_DRAIN_TO;
+
+	vf_bar0 = octeontx_pko_dq_vf_bar0(txq);
+	res = octeontx_pko_dq_gdq(txq);
+	gdq = res;
+
+	 /* DRAIN=1, DRAIN_NULL_LINK=0, SW_XOFF=1 */
+	 octeontx_write64(0x3, vf_bar0 + PKO_VF_DQ_SW_XOFF(gdq));
+	/* Wait until buffers leave DQs */
+	reg = octeontx_read64(vf_bar0 + PKO_VF_DQ_WM_CNT(gdq));
+	while (reg && timo > 0) {
+		rte_delay_us(100);
+		timo--;
+		reg = octeontx_read64(vf_bar0 + PKO_VF_DQ_WM_CNT(gdq));
+	}
+	/* DRAIN=0, DRAIN_NULL_LINK=0, SW_XOFF=0 */
+	octeontx_write64(0, vf_bar0 + PKO_VF_DQ_SW_XOFF(gdq));
+
+	return reg;
+}
+
+static inline int
+octeontx_pko_dq_range_lookup(struct octeontx_pko_vf_ctl_s *ctl, uint64_t chanid,
+			     unsigned int dq_num, unsigned int dq_from)
+{
+	unsigned int dq, dq_cnt;
+	unsigned int dq_base;
+
+	dq_cnt = 0;
+	dq = dq_from;
+	while (dq < RTE_DIM(ctl->dq_map)) {
+		dq_base = dq;
+		dq_cnt = 0;
+		while (ctl->dq_map[dq].chanid == ~chanid &&
+			dq < RTE_DIM(ctl->dq_map)) {
+			dq_cnt++;
+			if (dq_cnt == dq_num)
+				return dq_base;
+			dq++;
+		}
+		dq++;
+	}
+	return -1;
+}
+
+static inline void
+octeontx_pko_dq_range_assign(struct octeontx_pko_vf_ctl_s *ctl, uint64_t chanid,
+			     unsigned int dq_base, unsigned int dq_num)
+{
+	unsigned int dq, dq_cnt;
+
+	dq_cnt = 0;
+	while (dq_cnt < dq_num) {
+		dq = dq_base + dq_cnt;
+
+		octeontx_log_dbg("DQ# %u assigned to CHAN# %" PRIx64 "", dq,
+			chanid);
+
+		ctl->dq_map[dq].chanid = ~chanid;
+		dq_cnt++;
+	}
+}
+
+static inline int
+octeontx_pko_dq_claim(struct octeontx_pko_vf_ctl_s *ctl, unsigned int dq_base,
+		      unsigned int dq_num, uint64_t chanid)
+{
+	const uint64_t null_chanid = ~0ull;
+	int dq;
+
+	rte_spinlock_lock(&ctl->lock);
+
+	dq = octeontx_pko_dq_range_lookup(ctl, null_chanid, dq_num, dq_base);
+	if (dq < 0 || (unsigned int)dq != dq_base) {
+		rte_spinlock_unlock(&ctl->lock);
+		return -1;
+	}
+	octeontx_pko_dq_range_assign(ctl, chanid, dq_base, dq_num);
+
+	rte_spinlock_unlock(&ctl->lock);
+
+	return 0;
+}
+
+static inline int
+octeontx_pko_dq_free(struct octeontx_pko_vf_ctl_s *ctl, uint64_t chanid)
+{
+	const uint64_t null_chanid = ~0ull;
+	unsigned int dq = 0, dq_cnt = 0;
+
+	rte_spinlock_lock(&ctl->lock);
+	while (dq < RTE_DIM(ctl->dq_map)) {
+		if (ctl->dq_map[dq].chanid == ~chanid) {
+			ctl->dq_map[dq].chanid = ~null_chanid;
+			dq_cnt++;
+		}
+		dq++;
+	}
+	rte_spinlock_unlock(&ctl->lock);
+
+	return dq_cnt > 0 ? 0 : -EINVAL;
+}
+
+int
+octeontx_pko_channel_open(int dq_base, int dq_num, int chanid)
+{
+	struct octeontx_pko_vf_ctl_s *ctl = &pko_vf_ctl;
+	int res;
+
+	res = octeontx_pko_dq_claim(ctl, dq_base, dq_num, chanid);
+	if (res < 0)
+		return -1;
+
+	return 0;
+}
+
+int
+octeontx_pko_channel_close(int chanid)
+{
+	struct octeontx_pko_vf_ctl_s *ctl = &pko_vf_ctl;
+	int res;
+
+	res = octeontx_pko_dq_free(ctl, chanid);
+	if (res < 0)
+		return -1;
+
+	return 0;
+}
+
+static inline int
+octeontx_pko_chan_start(struct octeontx_pko_vf_ctl_s *ctl, uint64_t chanid)
+{
+	unsigned int dq_vf;
+	unsigned int dq, dq_cnt;
+
+	dq_cnt = 0;
+	dq = 0;
+	while (dq < RTE_DIM(ctl->dq_map)) {
+		dq_vf = dq / PKO_VF_NUM_DQ;
+
+		if (!ctl->pko[dq_vf].bar0) {
+			dq += PKO_VF_NUM_DQ;
+			continue;
+		}
+
+		if (ctl->dq_map[dq].chanid != ~chanid) {
+			dq++;
+			continue;
+		}
+
+		if (octeontx_pko_dq_open(dq) < 0)
+			break;
+
+		dq_cnt++;
+		dq++;
+	}
+
+	return dq_cnt;
+}
+
+int
+octeontx_pko_channel_start(int chanid)
+{
+	struct octeontx_pko_vf_ctl_s *ctl = &pko_vf_ctl;
+	int dq_cnt;
+
+	dq_cnt = octeontx_pko_chan_start(ctl, chanid);
+	if (dq_cnt < 0)
+		return -1;
+
+	return dq_cnt;
+}
+
+static inline int
+octeontx_pko_chan_stop(struct octeontx_pko_vf_ctl_s *ctl, uint64_t chanid)
+{
+	unsigned int dq, dq_cnt, dq_vf;
+	int res;
+
+	dq_cnt = 0;
+	dq = 0;
+	while (dq < RTE_DIM(ctl->dq_map)) {
+		dq_vf = dq / PKO_VF_NUM_DQ;
+
+		if (!ctl->pko[dq_vf].bar0) {
+			dq += PKO_VF_NUM_DQ;
+			continue;
+		}
+
+		if (ctl->dq_map[dq].chanid != ~chanid) {
+			dq++;
+			continue;
+		}
+
+		res = octeontx_pko_dq_drain(dq);
+		if (res > 0)
+			octeontx_log_err("draining DQ%d, buffers left: %x",
+					 dq, res);
+
+		res = octeontx_pko_dq_close(dq);
+		if (res < 0)
+			octeontx_log_err("closing DQ%d failed\n", dq);
+
+		dq_cnt++;
+		dq++;
+	}
+	return dq_cnt;
+}
+
+int
+octeontx_pko_channel_stop(int chanid)
+{
+	struct octeontx_pko_vf_ctl_s *ctl = &pko_vf_ctl;
+
+	octeontx_pko_chan_stop(ctl, chanid);
+	return 0;
+}
+
+static inline int
+octeontx_pko_channel_query(struct octeontx_pko_vf_ctl_s *ctl, uint64_t chanid,
+			   void *out, size_t out_elem_size,
+			   size_t dq_num, octeontx_pko_dq_getter_t getter)
+{
+	octeontx_dq_t curr;
+	unsigned int dq_vf;
+	unsigned int dq;
+
+	RTE_SET_USED(out_elem_size);
+	memset(&curr, 0, sizeof(octeontx_dq_t));
+
+	dq_vf = dq_num / PKO_VF_NUM_DQ;
+	dq = dq_num % PKO_VF_NUM_DQ;
+
+	if (!ctl->pko[dq_vf].bar0)
+		return -EINVAL;
+
+	if (ctl->dq_map[dq_num].chanid != ~chanid)
+		return -EINVAL;
+
+	uint8_t *iter = (uint8_t *)out;
+	curr.lmtline_va = ctl->pko[dq_vf].bar2;
+	curr.ioreg_va = (void *)((uintptr_t)ctl->pko[dq_vf].bar0
+		+ PKO_VF_DQ_OP_SEND((dq), 0));
+	curr.fc_status_va = ctl->fc_ctl + dq;
+
+	octeontx_log_dbg("lmtline=%p ioreg_va=%p fc_status_va=%p",
+			 curr.lmtline_va, curr.ioreg_va,
+			 curr.fc_status_va);
+
+	getter(&curr, (void *)iter);
+	return 0;
+}
+
+int
+octeontx_pko_channel_query_dqs(int chanid, void *out, size_t out_elem_size,
+				size_t dq_num, octeontx_pko_dq_getter_t getter)
+{
+	struct octeontx_pko_vf_ctl_s *ctl = &pko_vf_ctl;
+	int dq_cnt;
+
+	dq_cnt = octeontx_pko_channel_query(ctl, chanid, out, out_elem_size,
+						dq_num, getter);
+	if (dq_cnt < 0)
+		return -1;
+
+	return dq_cnt;
+}
+
+int
+octeontx_pko_vf_count(void)
+{
+	int vf_cnt;
+
+	vf_cnt = 0;
+	while (pko_vf_ctl.pko[vf_cnt].bar0)
+		vf_cnt++;
+
+	return vf_cnt;
+}
+
+int
+octeontx_pko_init_fc(const size_t pko_vf_count)
+{
+	int dq_ix;
+	uint64_t reg;
+	uint8_t *vf_bar0;
+	size_t vf_idx;
+	size_t fc_mem_size;
+
+	fc_mem_size = sizeof(struct octeontx_pko_fc_ctl_s) *
+			pko_vf_count * PKO_VF_NUM_DQ;
+
+	pko_vf_ctl.fc_iomem.va = rte_malloc(NULL, fc_mem_size, 128);
+	if (unlikely(!pko_vf_ctl.fc_iomem.va)) {
+		octeontx_log_err("fc_iomem: not enough memory");
+		return -ENOMEM;
+	}
+
+	pko_vf_ctl.fc_iomem.iova = rte_malloc_virt2iova((void *)
+							pko_vf_ctl.fc_iomem.va);
+	pko_vf_ctl.fc_iomem.size = fc_mem_size;
+
+	pko_vf_ctl.fc_ctl =
+		(struct octeontx_pko_fc_ctl_s *)pko_vf_ctl.fc_iomem.va;
+
+	/* Configure Flow-Control feature for all DQs of open VFs */
+	for (vf_idx = 0; vf_idx < pko_vf_count; vf_idx++) {
+		dq_ix = vf_idx * PKO_VF_NUM_DQ;
+
+		vf_bar0 = pko_vf_ctl.pko[vf_idx].bar0;
+
+		reg = (pko_vf_ctl.fc_iomem.iova +
+			(sizeof(struct octeontx_pko_fc_ctl_s) * dq_ix)) & ~0x7F;
+		reg |=			/* BASE */
+		    (0x2 << 3) |	/* HYST_BITS */
+		    (((PKO_DQ_FC_STRIDE == PKO_DQ_FC_STRIDE_16) ? 1 : 0) << 2) |
+		    (0x1 << 0);		/* ENABLE */
+
+		octeontx_write64(reg, vf_bar0 + PKO_VF_DQ_FC_CONFIG);
+
+		octeontx_log_dbg("PKO: bar0 %p VF_idx %d DQ_FC_CFG=%" PRIx64 "",
+				 vf_bar0, (int)vf_idx, reg);
+	}
+	return 0;
+}
+
+void
+octeontx_pko_fc_free(void)
+{
+	rte_free(pko_vf_ctl.fc_iomem.va);
+}
+
+static void
+octeontx_pkovf_setup(void)
+{
+	static bool init_once;
+
+	if (!init_once) {
+		unsigned int i;
+
+		rte_spinlock_init(&pko_vf_ctl.lock);
+
+		pko_vf_ctl.fc_iomem = PKO_IOMEM_NULL;
+		pko_vf_ctl.fc_ctl = NULL;
+
+		for (i = 0; i < PKO_VF_MAX; i++) {
+			pko_vf_ctl.pko[i].bar0 = NULL;
+			pko_vf_ctl.pko[i].bar2 = NULL;
+			pko_vf_ctl.pko[i].domain = ~(uint16_t)0;
+			pko_vf_ctl.pko[i].vfid = ~(uint16_t)0;
+		}
+
+		for (i = 0; i < (PKO_VF_MAX * PKO_VF_NUM_DQ); i++)
+			pko_vf_ctl.dq_map[i].chanid = 0;
+
+		init_once = true;
+	}
+}
+
+/* PKOVF pcie device*/
+static int
+pkovf_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+	uint64_t val;
+	uint16_t vfid;
+	uint16_t domain;
+	uint8_t *bar0;
+	uint8_t *bar2;
+	struct octeontx_pkovf *res;
+
+	RTE_SET_USED(pci_drv);
+
+	/* For secondary processes, the primary has done all the work */
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return 0;
+
+	if (pci_dev->mem_resource[0].addr == NULL ||
+	    pci_dev->mem_resource[2].addr == NULL) {
+		octeontx_log_err("Empty bars %p %p",
+			pci_dev->mem_resource[0].addr,
+			pci_dev->mem_resource[2].addr);
+		return -ENODEV;
+	}
+	bar0 = pci_dev->mem_resource[0].addr;
+	bar2 = pci_dev->mem_resource[2].addr;
+
+	octeontx_pkovf_setup();
+
+	/* get vfid and domain */
+	val = octeontx_read64(bar0 + PKO_VF_DQ_FC_CONFIG);
+	domain = (val >> 7) & 0xffff;
+	vfid = (val >> 23) & 0xffff;
+
+	if (unlikely(vfid >= PKO_VF_MAX)) {
+		octeontx_log_err("pko: Invalid vfid %d", vfid);
+		return -EINVAL;
+	}
+
+	res = &pko_vf_ctl.pko[vfid];
+	res->vfid = vfid;
+	res->domain = domain;
+	res->bar0 = bar0;
+	res->bar2 = bar2;
+
+	octeontx_log_dbg("Domain=%d group=%d", res->domain, res->vfid);
+	return 0;
+}
+
+#define PCI_VENDOR_ID_CAVIUM               0x177D
+#define PCI_DEVICE_ID_OCTEONTX_PKO_VF      0xA049
+
+static const struct rte_pci_id pci_pkovf_map[] = {
+	{
+		RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM,
+				PCI_DEVICE_ID_OCTEONTX_PKO_VF)
+	},
+	{
+		.vendor_id = 0,
+	},
+};
+
+static struct rte_pci_driver pci_pkovf = {
+	.id_table = pci_pkovf_map,
+	.drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+	.probe = pkovf_probe,
+};
+
+RTE_PMD_REGISTER_PCI(octeontx_pkovf, pci_pkovf);
diff --git a/drivers/net/octeontx/base/octeontx_pkovf.h b/drivers/net/octeontx/base/octeontx_pkovf.h
new file mode 100644
index 00000000..cfc3715d
--- /dev/null
+++ b/drivers/net/octeontx/base/octeontx_pkovf.h
@@ -0,0 +1,97 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	__OCTEONTX_PKO_H__
+#define	__OCTEONTX_PKO_H__
+
+/* PKO maximum constants */
+#define	PKO_VF_MAX			(32)
+#define	PKO_VF_NUM_DQ			(8)
+#define PKO_MAX_NUM_DQ			(8)
+#define	PKO_DQ_DRAIN_TO			(1000)
+
+#define PKO_DQ_FC_SKID			(4)
+#define PKO_DQ_FC_DEPTH_PAGES		(2048)
+#define PKO_DQ_FC_STRIDE_16		(16)
+#define PKO_DQ_FC_STRIDE_128		(128)
+#define PKO_DQ_FC_STRIDE		PKO_DQ_FC_STRIDE_16
+
+#define PKO_DQ_KIND_BIT			49
+#define PKO_DQ_STATUS_BIT		60
+#define PKO_DQ_OP_BIT			48
+
+/* PKO VF register offsets from VF_BAR0 */
+#define	PKO_VF_DQ_SW_XOFF(gdq)		(0x000100 | (gdq) << 17)
+#define	PKO_VF_DQ_WM_CTL(gdq)		(0x000130 | (gdq) << 17)
+#define	PKO_VF_DQ_WM_CNT(gdq)		(0x000150 | (gdq) << 17)
+#define	PKO_VF_DQ_FC_CONFIG		(0x000160)
+#define	PKO_VF_DQ_FC_STATUS(gdq)	(0x000168 | (gdq) << 17)
+#define	PKO_VF_DQ_OP_SEND(gdq, op)	(0x001000 | (gdq) << 17 | (op) << 3)
+#define	PKO_VF_DQ_OP_OPEN(gdq)		(0x001100 | (gdq) << 17)
+#define	PKO_VF_DQ_OP_CLOSE(gdq)		(0x001200 | (gdq) << 17)
+#define	PKO_VF_DQ_OP_QUERY(gdq)		(0x001300 | (gdq) << 17)
+
+/* pko_send_hdr_s + pko_send_link */
+#define PKO_CMD_SZ			(2 << 1)
+#define PKO_SEND_GATHER_SUBDC		(0x0ull << 60)
+#define PKO_SEND_GATHER_LDTYPE(x)	((x) << 58)
+#define PKO_SEND_GATHER_GAUAR(x)	((x) << 24)
+
+typedef struct octeontx_dq_s {
+	void *lmtline_va;
+	void *ioreg_va;
+	void *fc_status_va;
+} octeontx_dq_t;
+
+/**
+ * Function for extracting information out of a given DQ.
+ *
+ * It is intended to be used in slow path (configuration) in
+ * octeontx_pko_channel_query().
+ *
+ * @param dq The DQ to extract information from.
+ * @param out Pointer to the user's structure he wants to fill.
+ */
+typedef void (*octeontx_pko_dq_getter_t)(octeontx_dq_t *dq, void *out);
+
+int
+octeontx_pko_channel_query_dqs(int chanid, void *out, size_t out_elem_size,
+			       size_t dq_num, octeontx_pko_dq_getter_t getter);
+int octeontx_pko_channel_open(int dq_base, int dq_num, int chanid);
+int octeontx_pko_channel_close(int chanid);
+int octeontx_pko_channel_start(int chanid);
+int octeontx_pko_channel_stop(int chanid);
+int octeontx_pko_vf_count(void);
+int octeontx_pko_init_fc(const size_t pko_vf_count);
+void octeontx_pko_fc_free(void);
+
+#endif /* __OCTEONTX_PKO_H__ */
diff --git a/drivers/net/octeontx/octeontx_ethdev.c b/drivers/net/octeontx/octeontx_ethdev.c
new file mode 100644
index 00000000..bd24ec33
--- /dev/null
+++ b/drivers/net/octeontx/octeontx_ethdev.c
@@ -0,0 +1,1333 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_alarm.h>
+#include <rte_branch_prediction.h>
+#include <rte_debug.h>
+#include <rte_devargs.h>
+#include <rte_dev.h>
+#include <rte_kvargs.h>
+#include <rte_malloc.h>
+#include <rte_prefetch.h>
+#include <rte_bus_vdev.h>
+
+#include "octeontx_ethdev.h"
+#include "octeontx_rxtx.h"
+#include "octeontx_logs.h"
+
+struct octeontx_vdev_init_params {
+	uint8_t	nr_port;
+};
+
+enum octeontx_link_speed {
+	OCTEONTX_LINK_SPEED_SGMII,
+	OCTEONTX_LINK_SPEED_XAUI,
+	OCTEONTX_LINK_SPEED_RXAUI,
+	OCTEONTX_LINK_SPEED_10G_R,
+	OCTEONTX_LINK_SPEED_40G_R,
+	OCTEONTX_LINK_SPEED_RESERVE1,
+	OCTEONTX_LINK_SPEED_QSGMII,
+	OCTEONTX_LINK_SPEED_RESERVE2
+};
+
+/* Parse integer from integer argument */
+static int
+parse_integer_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	int *i = (int *)extra_args;
+
+	*i = atoi(value);
+	if (*i < 0) {
+		octeontx_log_err("argument has to be positive.");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+octeontx_parse_vdev_init_params(struct octeontx_vdev_init_params *params,
+				struct rte_vdev_device *dev)
+{
+	struct rte_kvargs *kvlist = NULL;
+	int ret = 0;
+
+	static const char * const octeontx_vdev_valid_params[] = {
+		OCTEONTX_VDEV_NR_PORT_ARG,
+		NULL
+	};
+
+	const char *input_args = rte_vdev_device_args(dev);
+	if (params == NULL)
+		return -EINVAL;
+
+
+	if (input_args) {
+		kvlist = rte_kvargs_parse(input_args,
+				octeontx_vdev_valid_params);
+		if (kvlist == NULL)
+			return -1;
+
+		ret = rte_kvargs_process(kvlist,
+					OCTEONTX_VDEV_NR_PORT_ARG,
+					&parse_integer_arg,
+					&params->nr_port);
+		if (ret < 0)
+			goto free_kvlist;
+	}
+
+free_kvlist:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
+static int
+octeontx_port_open(struct octeontx_nic *nic)
+{
+	octeontx_mbox_bgx_port_conf_t bgx_port_conf;
+	int res;
+
+	res = 0;
+
+	PMD_INIT_FUNC_TRACE();
+
+	res = octeontx_bgx_port_open(nic->port_id, &bgx_port_conf);
+	if (res < 0) {
+		octeontx_log_err("failed to open port %d", res);
+		return res;
+	}
+
+	nic->node = bgx_port_conf.node;
+	nic->port_ena = bgx_port_conf.enable;
+	nic->base_ichan = bgx_port_conf.base_chan;
+	nic->base_ochan = bgx_port_conf.base_chan;
+	nic->num_ichans = bgx_port_conf.num_chans;
+	nic->num_ochans = bgx_port_conf.num_chans;
+	nic->mtu = bgx_port_conf.mtu;
+	nic->bpen = bgx_port_conf.bpen;
+	nic->fcs_strip = bgx_port_conf.fcs_strip;
+	nic->bcast_mode = bgx_port_conf.bcast_mode;
+	nic->mcast_mode = bgx_port_conf.mcast_mode;
+	nic->speed	= bgx_port_conf.mode;
+
+	memcpy(&nic->mac_addr[0], &bgx_port_conf.macaddr[0], ETHER_ADDR_LEN);
+
+	octeontx_log_dbg("port opened %d", nic->port_id);
+	return res;
+}
+
+static void
+octeontx_port_close(struct octeontx_nic *nic)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	octeontx_bgx_port_close(nic->port_id);
+	octeontx_log_dbg("port closed %d", nic->port_id);
+}
+
+static int
+octeontx_port_start(struct octeontx_nic *nic)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	return octeontx_bgx_port_start(nic->port_id);
+}
+
+static int
+octeontx_port_stop(struct octeontx_nic *nic)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	return octeontx_bgx_port_stop(nic->port_id);
+}
+
+static void
+octeontx_port_promisc_set(struct octeontx_nic *nic, int en)
+{
+	struct rte_eth_dev *dev;
+	int res;
+
+	res = 0;
+	PMD_INIT_FUNC_TRACE();
+	dev = nic->dev;
+
+	res = octeontx_bgx_port_promisc_set(nic->port_id, en);
+	if (res < 0)
+		octeontx_log_err("failed to set promiscuous mode %d",
+				nic->port_id);
+
+	/* Set proper flag for the mode */
+	dev->data->promiscuous = (en != 0) ? 1 : 0;
+
+	octeontx_log_dbg("port %d : promiscuous mode %s",
+			nic->port_id, en ? "set" : "unset");
+}
+
+static int
+octeontx_port_stats(struct octeontx_nic *nic, struct rte_eth_stats *stats)
+{
+	octeontx_mbox_bgx_port_stats_t bgx_stats;
+	int res;
+
+	PMD_INIT_FUNC_TRACE();
+
+	res = octeontx_bgx_port_stats(nic->port_id, &bgx_stats);
+	if (res < 0) {
+		octeontx_log_err("failed to get port stats %d", nic->port_id);
+		return res;
+	}
+
+	stats->ipackets = bgx_stats.rx_packets;
+	stats->ibytes = bgx_stats.rx_bytes;
+	stats->imissed = bgx_stats.rx_dropped;
+	stats->ierrors = bgx_stats.rx_errors;
+	stats->opackets = bgx_stats.tx_packets;
+	stats->obytes = bgx_stats.tx_bytes;
+	stats->oerrors = bgx_stats.tx_errors;
+
+	octeontx_log_dbg("port%d stats inpkts=%" PRIx64 " outpkts=%" PRIx64 "",
+			nic->port_id, stats->ipackets, stats->opackets);
+
+	return 0;
+}
+
+static void
+octeontx_port_stats_clr(struct octeontx_nic *nic)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	octeontx_bgx_port_stats_clr(nic->port_id);
+}
+
+static inline void
+devconf_set_default_sane_values(struct rte_event_dev_config *dev_conf,
+				struct rte_event_dev_info *info)
+{
+	memset(dev_conf, 0, sizeof(struct rte_event_dev_config));
+	dev_conf->dequeue_timeout_ns = info->min_dequeue_timeout_ns;
+
+	dev_conf->nb_event_ports = info->max_event_ports;
+	dev_conf->nb_event_queues = info->max_event_queues;
+
+	dev_conf->nb_event_queue_flows = info->max_event_queue_flows;
+	dev_conf->nb_event_port_dequeue_depth =
+			info->max_event_port_dequeue_depth;
+	dev_conf->nb_event_port_enqueue_depth =
+			info->max_event_port_enqueue_depth;
+	dev_conf->nb_event_port_enqueue_depth =
+			info->max_event_port_enqueue_depth;
+	dev_conf->nb_events_limit =
+			info->max_num_events;
+}
+
+static int
+octeontx_dev_configure(struct rte_eth_dev *dev)
+{
+	struct rte_eth_dev_data *data = dev->data;
+	struct rte_eth_conf *conf = &data->dev_conf;
+	struct rte_eth_rxmode *rxmode = &conf->rxmode;
+	struct rte_eth_txmode *txmode = &conf->txmode;
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+	RTE_SET_USED(conf);
+
+	if (!rte_eal_has_hugepages()) {
+		octeontx_log_err("huge page is not configured");
+		return -EINVAL;
+	}
+
+	if (txmode->mq_mode) {
+		octeontx_log_err("tx mq_mode DCB or VMDq not supported");
+		return -EINVAL;
+	}
+
+	if (rxmode->mq_mode != ETH_MQ_RX_NONE &&
+		rxmode->mq_mode != ETH_MQ_RX_RSS) {
+		octeontx_log_err("unsupported rx qmode %d", rxmode->mq_mode);
+		return -EINVAL;
+	}
+
+	if (!rxmode->hw_strip_crc) {
+		PMD_INIT_LOG(NOTICE, "can't disable hw crc strip");
+		rxmode->hw_strip_crc = 1;
+	}
+
+	if (rxmode->hw_ip_checksum) {
+		PMD_INIT_LOG(NOTICE, "rxcksum not supported");
+		rxmode->hw_ip_checksum = 0;
+	}
+
+	if (rxmode->split_hdr_size) {
+		octeontx_log_err("rxmode does not support split header");
+		return -EINVAL;
+	}
+
+	if (rxmode->hw_vlan_filter) {
+		octeontx_log_err("VLAN filter not supported");
+		return -EINVAL;
+	}
+
+	if (rxmode->hw_vlan_extend) {
+		octeontx_log_err("VLAN extended not supported");
+		return -EINVAL;
+	}
+
+	if (rxmode->enable_lro) {
+		octeontx_log_err("LRO not supported");
+		return -EINVAL;
+	}
+
+	if (conf->link_speeds & ETH_LINK_SPEED_FIXED) {
+		octeontx_log_err("setting link speed/duplex not supported");
+		return -EINVAL;
+	}
+
+	if (conf->dcb_capability_en) {
+		octeontx_log_err("DCB enable not supported");
+		return -EINVAL;
+	}
+
+	if (conf->fdir_conf.mode != RTE_FDIR_MODE_NONE) {
+		octeontx_log_err("flow director not supported");
+		return -EINVAL;
+	}
+
+	nic->num_tx_queues = dev->data->nb_tx_queues;
+
+	ret = octeontx_pko_channel_open(nic->port_id * PKO_VF_NUM_DQ,
+					nic->num_tx_queues,
+					nic->base_ochan);
+	if (ret) {
+		octeontx_log_err("failed to open channel %d no-of-txq %d",
+			   nic->base_ochan, nic->num_tx_queues);
+		return -EFAULT;
+	}
+
+	nic->pki.classifier_enable = false;
+	nic->pki.hash_enable = true;
+	nic->pki.initialized = false;
+
+	return 0;
+}
+
+static void
+octeontx_dev_close(struct rte_eth_dev *dev)
+{
+	struct octeontx_txq *txq = NULL;
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+	unsigned int i;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	rte_event_dev_close(nic->evdev);
+
+	ret = octeontx_pko_channel_close(nic->base_ochan);
+	if (ret < 0) {
+		octeontx_log_err("failed to close channel %d VF%d %d %d",
+			     nic->base_ochan, nic->port_id, nic->num_tx_queues,
+			     ret);
+	}
+	/* Free txq resources for this port */
+	for (i = 0; i < nic->num_tx_queues; i++) {
+		txq = dev->data->tx_queues[i];
+		if (!txq)
+			continue;
+
+		rte_free(txq);
+	}
+}
+
+static int
+octeontx_dev_start(struct rte_eth_dev *dev)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+	int ret;
+
+	ret = 0;
+
+	PMD_INIT_FUNC_TRACE();
+	/*
+	 * Tx start
+	 */
+	dev->tx_pkt_burst = octeontx_xmit_pkts;
+	ret = octeontx_pko_channel_start(nic->base_ochan);
+	if (ret < 0) {
+		octeontx_log_err("fail to conf VF%d no. txq %d chan %d ret %d",
+			   nic->port_id, nic->num_tx_queues, nic->base_ochan,
+			   ret);
+		goto error;
+	}
+
+	/*
+	 * Rx start
+	 */
+	dev->rx_pkt_burst = octeontx_recv_pkts;
+	ret = octeontx_pki_port_start(nic->port_id);
+	if (ret < 0) {
+		octeontx_log_err("fail to start Rx on port %d", nic->port_id);
+		goto channel_stop_error;
+	}
+
+	/*
+	 * Start port
+	 */
+	ret = octeontx_port_start(nic);
+	if (ret < 0) {
+		octeontx_log_err("failed start port %d", ret);
+		goto pki_port_stop_error;
+	}
+
+	PMD_TX_LOG(DEBUG, "pko: start channel %d no.of txq %d port %d",
+			nic->base_ochan, nic->num_tx_queues, nic->port_id);
+
+	ret = rte_event_dev_start(nic->evdev);
+	if (ret < 0) {
+		octeontx_log_err("failed to start evdev: ret (%d)", ret);
+		goto pki_port_stop_error;
+	}
+
+	/* Success */
+	return ret;
+
+pki_port_stop_error:
+	octeontx_pki_port_stop(nic->port_id);
+channel_stop_error:
+	octeontx_pko_channel_stop(nic->base_ochan);
+error:
+	return ret;
+}
+
+static void
+octeontx_dev_stop(struct rte_eth_dev *dev)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	rte_event_dev_stop(nic->evdev);
+
+	ret = octeontx_port_stop(nic);
+	if (ret < 0) {
+		octeontx_log_err("failed to req stop port %d res=%d",
+					nic->port_id, ret);
+		return;
+	}
+
+	ret = octeontx_pki_port_stop(nic->port_id);
+	if (ret < 0) {
+		octeontx_log_err("failed to stop pki port %d res=%d",
+					nic->port_id, ret);
+		return;
+	}
+
+	ret = octeontx_pko_channel_stop(nic->base_ochan);
+	if (ret < 0) {
+		octeontx_log_err("failed to stop channel %d VF%d %d %d",
+			     nic->base_ochan, nic->port_id, nic->num_tx_queues,
+			     ret);
+		return;
+	}
+
+	dev->tx_pkt_burst = NULL;
+	dev->rx_pkt_burst = NULL;
+}
+
+static void
+octeontx_dev_promisc_enable(struct rte_eth_dev *dev)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+
+	PMD_INIT_FUNC_TRACE();
+	octeontx_port_promisc_set(nic, 1);
+}
+
+static void
+octeontx_dev_promisc_disable(struct rte_eth_dev *dev)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+
+	PMD_INIT_FUNC_TRACE();
+	octeontx_port_promisc_set(nic, 0);
+}
+
+static inline int
+octeontx_atomic_write_link_status(struct rte_eth_dev *dev,
+				  struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = &dev->data->dev_link;
+	struct rte_eth_link *src = link;
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+		*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+static int
+octeontx_port_link_status(struct octeontx_nic *nic)
+{
+	int res;
+
+	PMD_INIT_FUNC_TRACE();
+	res = octeontx_bgx_port_link_status(nic->port_id);
+	if (res < 0) {
+		octeontx_log_err("failed to get port %d link status",
+				nic->port_id);
+		return res;
+	}
+
+	nic->link_up = (uint8_t)res;
+	octeontx_log_dbg("port %d link status %d", nic->port_id, nic->link_up);
+
+	return res;
+}
+
+/*
+ * Return 0 means link status changed, -1 means not changed
+ */
+static int
+octeontx_dev_link_update(struct rte_eth_dev *dev,
+			 int wait_to_complete __rte_unused)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+	struct rte_eth_link link;
+	int res;
+
+	res = 0;
+	PMD_INIT_FUNC_TRACE();
+
+	res = octeontx_port_link_status(nic);
+	if (res < 0) {
+		octeontx_log_err("failed to request link status %d", res);
+		return res;
+	}
+
+	link.link_status = nic->link_up;
+
+	switch (nic->speed) {
+	case OCTEONTX_LINK_SPEED_SGMII:
+		link.link_speed = ETH_SPEED_NUM_1G;
+		break;
+
+	case OCTEONTX_LINK_SPEED_XAUI:
+		link.link_speed = ETH_SPEED_NUM_10G;
+		break;
+
+	case OCTEONTX_LINK_SPEED_RXAUI:
+	case OCTEONTX_LINK_SPEED_10G_R:
+		link.link_speed = ETH_SPEED_NUM_10G;
+		break;
+	case OCTEONTX_LINK_SPEED_QSGMII:
+		link.link_speed = ETH_SPEED_NUM_5G;
+		break;
+	case OCTEONTX_LINK_SPEED_40G_R:
+		link.link_speed = ETH_SPEED_NUM_40G;
+		break;
+
+	case OCTEONTX_LINK_SPEED_RESERVE1:
+	case OCTEONTX_LINK_SPEED_RESERVE2:
+	default:
+		octeontx_log_err("incorrect link speed %d", nic->speed);
+		break;
+	}
+
+	link.link_duplex = ETH_LINK_AUTONEG;
+	link.link_autoneg = ETH_LINK_SPEED_AUTONEG;
+
+	return octeontx_atomic_write_link_status(dev, &link);
+}
+
+static int
+octeontx_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+
+	PMD_INIT_FUNC_TRACE();
+	return octeontx_port_stats(nic, stats);
+}
+
+static void
+octeontx_dev_stats_reset(struct rte_eth_dev *dev)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+
+	PMD_INIT_FUNC_TRACE();
+	octeontx_port_stats_clr(nic);
+}
+
+static void
+octeontx_dev_default_mac_addr_set(struct rte_eth_dev *dev,
+					struct ether_addr *addr)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+	int ret;
+
+	ret = octeontx_bgx_port_mac_set(nic->port_id, addr->addr_bytes);
+	if (ret != 0)
+		octeontx_log_err("failed to set MAC address on port %d",
+				nic->port_id);
+}
+
+static void
+octeontx_dev_info(struct rte_eth_dev *dev,
+		struct rte_eth_dev_info *dev_info)
+{
+	RTE_SET_USED(dev);
+
+	/* Autonegotiation may be disabled */
+	dev_info->speed_capa = ETH_LINK_SPEED_FIXED;
+	dev_info->speed_capa |= ETH_LINK_SPEED_10M | ETH_LINK_SPEED_100M |
+			ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G |
+			ETH_LINK_SPEED_40G;
+
+	dev_info->driver_name = RTE_STR(rte_octeontx_pmd);
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = PKI_MAX_PKTLEN;
+	dev_info->max_rx_queues = 1;
+	dev_info->max_tx_queues = PKO_MAX_NUM_DQ;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+
+	dev_info->default_rxconf = (struct rte_eth_rxconf) {
+		.rx_free_thresh = 0,
+		.rx_drop_en = 0,
+	};
+
+	dev_info->default_txconf = (struct rte_eth_txconf) {
+		.tx_free_thresh = 0,
+		.txq_flags =
+			ETH_TXQ_FLAGS_NOMULTSEGS |
+			ETH_TXQ_FLAGS_NOOFFLOADS |
+			ETH_TXQ_FLAGS_NOXSUMS,
+	};
+
+	dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MT_LOCKFREE;
+}
+
+static void
+octeontx_dq_info_getter(octeontx_dq_t *dq, void *out)
+{
+	((octeontx_dq_t *)out)->lmtline_va = dq->lmtline_va;
+	((octeontx_dq_t *)out)->ioreg_va = dq->ioreg_va;
+	((octeontx_dq_t *)out)->fc_status_va = dq->fc_status_va;
+}
+
+static int
+octeontx_vf_start_tx_queue(struct rte_eth_dev *dev, struct octeontx_nic *nic,
+				uint16_t qidx)
+{
+	struct octeontx_txq *txq;
+	int res;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (dev->data->tx_queue_state[qidx] == RTE_ETH_QUEUE_STATE_STARTED)
+		return 0;
+
+	txq = dev->data->tx_queues[qidx];
+
+	res = octeontx_pko_channel_query_dqs(nic->base_ochan,
+						&txq->dq,
+						sizeof(octeontx_dq_t),
+						txq->queue_id,
+						octeontx_dq_info_getter);
+	if (res < 0) {
+		res = -EFAULT;
+		goto close_port;
+	}
+
+	dev->data->tx_queue_state[qidx] = RTE_ETH_QUEUE_STATE_STARTED;
+	return res;
+
+close_port:
+	(void)octeontx_port_stop(nic);
+	octeontx_pko_channel_stop(nic->base_ochan);
+	octeontx_pko_channel_close(nic->base_ochan);
+	dev->data->tx_queue_state[qidx] = RTE_ETH_QUEUE_STATE_STOPPED;
+	return res;
+}
+
+static int
+octeontx_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t qidx)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+
+	PMD_INIT_FUNC_TRACE();
+	qidx = qidx % PKO_VF_NUM_DQ;
+	return octeontx_vf_start_tx_queue(dev, nic, qidx);
+}
+
+static inline int
+octeontx_vf_stop_tx_queue(struct rte_eth_dev *dev, struct octeontx_nic *nic,
+			  uint16_t qidx)
+{
+	int ret = 0;
+
+	RTE_SET_USED(nic);
+	PMD_INIT_FUNC_TRACE();
+
+	if (dev->data->tx_queue_state[qidx] == RTE_ETH_QUEUE_STATE_STOPPED)
+		return 0;
+
+	dev->data->tx_queue_state[qidx] = RTE_ETH_QUEUE_STATE_STOPPED;
+	return ret;
+}
+
+static int
+octeontx_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t qidx)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+
+	PMD_INIT_FUNC_TRACE();
+	qidx = qidx % PKO_VF_NUM_DQ;
+
+	return octeontx_vf_stop_tx_queue(dev, nic, qidx);
+}
+
+static void
+octeontx_dev_tx_queue_release(void *tx_queue)
+{
+	struct octeontx_txq *txq = tx_queue;
+	int res;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (txq) {
+		res = octeontx_dev_tx_queue_stop(txq->eth_dev, txq->queue_id);
+		if (res < 0)
+			octeontx_log_err("failed stop tx_queue(%d)\n",
+				   txq->queue_id);
+
+		rte_free(txq);
+	}
+}
+
+static int
+octeontx_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx,
+			    uint16_t nb_desc, unsigned int socket_id,
+			    const struct rte_eth_txconf *tx_conf)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+	struct octeontx_txq *txq = NULL;
+	uint16_t dq_num;
+	int res = 0;
+
+	RTE_SET_USED(nb_desc);
+	RTE_SET_USED(socket_id);
+	RTE_SET_USED(tx_conf);
+
+	dq_num = (nic->port_id * PKO_VF_NUM_DQ) + qidx;
+
+	/* Socket id check */
+	if (socket_id != (unsigned int)SOCKET_ID_ANY &&
+			socket_id != (unsigned int)nic->node)
+		PMD_TX_LOG(INFO, "socket_id expected %d, configured %d",
+						socket_id, nic->node);
+
+	/* Free memory prior to re-allocation if needed. */
+	if (dev->data->tx_queues[qidx] != NULL) {
+		PMD_TX_LOG(DEBUG, "freeing memory prior to re-allocation %d",
+				qidx);
+		octeontx_dev_tx_queue_release(dev->data->tx_queues[qidx]);
+		dev->data->tx_queues[qidx] = NULL;
+	}
+
+	/* Allocating tx queue data structure */
+	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct octeontx_txq),
+				 RTE_CACHE_LINE_SIZE, nic->node);
+	if (txq == NULL) {
+		octeontx_log_err("failed to allocate txq=%d", qidx);
+		res = -ENOMEM;
+		goto err;
+	}
+
+	txq->eth_dev = dev;
+	txq->queue_id = dq_num;
+	dev->data->tx_queues[qidx] = txq;
+	dev->data->tx_queue_state[qidx] = RTE_ETH_QUEUE_STATE_STOPPED;
+
+	res = octeontx_pko_channel_query_dqs(nic->base_ochan,
+						&txq->dq,
+						sizeof(octeontx_dq_t),
+						txq->queue_id,
+						octeontx_dq_info_getter);
+	if (res < 0) {
+		res = -EFAULT;
+		goto err;
+	}
+
+	PMD_TX_LOG(DEBUG, "[%d]:[%d] txq=%p nb_desc=%d lmtline=%p ioreg_va=%p fc_status_va=%p",
+			qidx, txq->queue_id, txq, nb_desc, txq->dq.lmtline_va,
+			txq->dq.ioreg_va,
+			txq->dq.fc_status_va);
+
+	return res;
+
+err:
+	if (txq)
+		rte_free(txq);
+
+	return res;
+}
+
+static int
+octeontx_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx,
+				uint16_t nb_desc, unsigned int socket_id,
+				const struct rte_eth_rxconf *rx_conf,
+				struct rte_mempool *mb_pool)
+{
+	struct octeontx_nic *nic = octeontx_pmd_priv(dev);
+	struct rte_mempool_ops *mp_ops = NULL;
+	struct octeontx_rxq *rxq = NULL;
+	pki_pktbuf_cfg_t pktbuf_conf;
+	pki_hash_cfg_t pki_hash;
+	pki_qos_cfg_t pki_qos;
+	uintptr_t pool;
+	int ret, port;
+	uint8_t gaura;
+	unsigned int ev_queues = (nic->ev_queues * nic->port_id) + qidx;
+	unsigned int ev_ports = (nic->ev_ports * nic->port_id) + qidx;
+
+	RTE_SET_USED(nb_desc);
+
+	memset(&pktbuf_conf, 0, sizeof(pktbuf_conf));
+	memset(&pki_hash, 0, sizeof(pki_hash));
+	memset(&pki_qos, 0, sizeof(pki_qos));
+
+	mp_ops = rte_mempool_get_ops(mb_pool->ops_index);
+	if (strcmp(mp_ops->name, "octeontx_fpavf")) {
+		octeontx_log_err("failed to find octeontx_fpavf mempool");
+		return -ENOTSUP;
+	}
+
+	/* Handle forbidden configurations */
+	if (nic->pki.classifier_enable) {
+		octeontx_log_err("cannot setup queue %d. "
+					"Classifier option unsupported", qidx);
+		return -EINVAL;
+	}
+
+	port = nic->port_id;
+
+	/* Rx deferred start is not supported */
+	if (rx_conf->rx_deferred_start) {
+		octeontx_log_err("rx deferred start not supported");
+		return -EINVAL;
+	}
+
+	/* Verify queue index */
+	if (qidx >= dev->data->nb_rx_queues) {
+		octeontx_log_err("QID %d not supporteded (0 - %d available)\n",
+				qidx, (dev->data->nb_rx_queues - 1));
+		return -ENOTSUP;
+	}
+
+	/* Socket id check */
+	if (socket_id != (unsigned int)SOCKET_ID_ANY &&
+			socket_id != (unsigned int)nic->node)
+		PMD_RX_LOG(INFO, "socket_id expected %d, configured %d",
+						socket_id, nic->node);
+
+	/* Allocating rx queue data structure */
+	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct octeontx_rxq),
+				 RTE_CACHE_LINE_SIZE, nic->node);
+	if (rxq == NULL) {
+		octeontx_log_err("failed to allocate rxq=%d", qidx);
+		return -ENOMEM;
+	}
+
+	if (!nic->pki.initialized) {
+		pktbuf_conf.port_type = 0;
+		pki_hash.port_type = 0;
+		pki_qos.port_type = 0;
+
+		pktbuf_conf.mmask.f_wqe_skip = 1;
+		pktbuf_conf.mmask.f_first_skip = 1;
+		pktbuf_conf.mmask.f_later_skip = 1;
+		pktbuf_conf.mmask.f_mbuff_size = 1;
+		pktbuf_conf.mmask.f_cache_mode = 1;
+
+		pktbuf_conf.wqe_skip = OCTTX_PACKET_WQE_SKIP;
+		pktbuf_conf.first_skip = OCTTX_PACKET_FIRST_SKIP;
+		pktbuf_conf.later_skip = OCTTX_PACKET_LATER_SKIP;
+		pktbuf_conf.mbuff_size = (mb_pool->elt_size -
+					RTE_PKTMBUF_HEADROOM -
+					sizeof(struct rte_mbuf));
+
+		pktbuf_conf.cache_mode = PKI_OPC_MODE_STF2_STT;
+
+		ret = octeontx_pki_port_pktbuf_config(port, &pktbuf_conf);
+		if (ret != 0) {
+			octeontx_log_err("fail to configure pktbuf for port %d",
+					port);
+			rte_free(rxq);
+			return ret;
+		}
+		PMD_RX_LOG(DEBUG, "Port %d Rx pktbuf configured:\n"
+				"\tmbuf_size:\t0x%0x\n"
+				"\twqe_skip:\t0x%0x\n"
+				"\tfirst_skip:\t0x%0x\n"
+				"\tlater_skip:\t0x%0x\n"
+				"\tcache_mode:\t%s\n",
+				port,
+				pktbuf_conf.mbuff_size,
+				pktbuf_conf.wqe_skip,
+				pktbuf_conf.first_skip,
+				pktbuf_conf.later_skip,
+				(pktbuf_conf.cache_mode ==
+						PKI_OPC_MODE_STT) ?
+				"STT" :
+				(pktbuf_conf.cache_mode ==
+						PKI_OPC_MODE_STF) ?
+				"STF" :
+				(pktbuf_conf.cache_mode ==
+						PKI_OPC_MODE_STF1_STT) ?
+				"STF1_STT" : "STF2_STT");
+
+		if (nic->pki.hash_enable) {
+			pki_hash.tag_dlc = 1;
+			pki_hash.tag_slc = 1;
+			pki_hash.tag_dlf = 1;
+			pki_hash.tag_slf = 1;
+			pki_hash.tag_prt = 1;
+			octeontx_pki_port_hash_config(port, &pki_hash);
+		}
+
+		pool = (uintptr_t)mb_pool->pool_id;
+
+		/* Get the gpool Id */
+		gaura = octeontx_fpa_bufpool_gpool(pool);
+
+		pki_qos.qpg_qos = PKI_QPG_QOS_NONE;
+		pki_qos.num_entry = 1;
+		pki_qos.drop_policy = 0;
+		pki_qos.tag_type = 0L;
+		pki_qos.qos_entry[0].port_add = 0;
+		pki_qos.qos_entry[0].gaura = gaura;
+		pki_qos.qos_entry[0].ggrp_ok = ev_queues;
+		pki_qos.qos_entry[0].ggrp_bad = ev_queues;
+		pki_qos.qos_entry[0].grptag_bad = 0;
+		pki_qos.qos_entry[0].grptag_ok = 0;
+
+		ret = octeontx_pki_port_create_qos(port, &pki_qos);
+		if (ret < 0) {
+			octeontx_log_err("failed to create QOS port=%d, q=%d",
+					port, qidx);
+			rte_free(rxq);
+			return ret;
+		}
+		nic->pki.initialized = true;
+	}
+
+	rxq->port_id = nic->port_id;
+	rxq->eth_dev = dev;
+	rxq->queue_id = qidx;
+	rxq->evdev = nic->evdev;
+	rxq->ev_queues = ev_queues;
+	rxq->ev_ports = ev_ports;
+
+	dev->data->rx_queues[qidx] = rxq;
+	dev->data->rx_queue_state[qidx] = RTE_ETH_QUEUE_STATE_STOPPED;
+	return 0;
+}
+
+static void
+octeontx_dev_rx_queue_release(void *rxq)
+{
+	rte_free(rxq);
+}
+
+static const uint32_t *
+octeontx_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+	static const uint32_t ptypes[] = {
+		RTE_PTYPE_L3_IPV4,
+		RTE_PTYPE_L3_IPV4_EXT,
+		RTE_PTYPE_L3_IPV6,
+		RTE_PTYPE_L3_IPV6_EXT,
+		RTE_PTYPE_L4_TCP,
+		RTE_PTYPE_L4_UDP,
+		RTE_PTYPE_L4_FRAG,
+		RTE_PTYPE_UNKNOWN
+	};
+
+	if (dev->rx_pkt_burst == octeontx_recv_pkts)
+		return ptypes;
+
+	return NULL;
+}
+
+/* Initialize and register driver with DPDK Application */
+static const struct eth_dev_ops octeontx_dev_ops = {
+	.dev_configure		 = octeontx_dev_configure,
+	.dev_infos_get		 = octeontx_dev_info,
+	.dev_close		 = octeontx_dev_close,
+	.dev_start		 = octeontx_dev_start,
+	.dev_stop		 = octeontx_dev_stop,
+	.promiscuous_enable	 = octeontx_dev_promisc_enable,
+	.promiscuous_disable	 = octeontx_dev_promisc_disable,
+	.link_update		 = octeontx_dev_link_update,
+	.stats_get		 = octeontx_dev_stats_get,
+	.stats_reset		 = octeontx_dev_stats_reset,
+	.mac_addr_set		 = octeontx_dev_default_mac_addr_set,
+	.tx_queue_start		 = octeontx_dev_tx_queue_start,
+	.tx_queue_stop		 = octeontx_dev_tx_queue_stop,
+	.tx_queue_setup		 = octeontx_dev_tx_queue_setup,
+	.tx_queue_release	 = octeontx_dev_tx_queue_release,
+	.rx_queue_setup		 = octeontx_dev_rx_queue_setup,
+	.rx_queue_release	 = octeontx_dev_rx_queue_release,
+	.dev_supported_ptypes_get = octeontx_dev_supported_ptypes_get,
+};
+
+/* Create Ethdev interface per BGX LMAC ports */
+static int
+octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev,
+			int socket_id)
+{
+	int res;
+	char octtx_name[OCTEONTX_MAX_NAME_LEN];
+	struct octeontx_nic *nic = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	const char *name = rte_vdev_device_name(dev);
+
+	PMD_INIT_FUNC_TRACE();
+
+	sprintf(octtx_name, "%s_%d", name, port);
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		eth_dev = rte_eth_dev_attach_secondary(octtx_name);
+		if (eth_dev == NULL)
+			return -ENODEV;
+
+		eth_dev->tx_pkt_burst = octeontx_xmit_pkts;
+		eth_dev->rx_pkt_burst = octeontx_recv_pkts;
+		return 0;
+	}
+
+	data = rte_zmalloc_socket(octtx_name, sizeof(*data), 0, socket_id);
+	if (data == NULL) {
+		octeontx_log_err("failed to allocate devdata");
+		res = -ENOMEM;
+		goto err;
+	}
+
+	nic = rte_zmalloc_socket(octtx_name, sizeof(*nic), 0, socket_id);
+	if (nic == NULL) {
+		octeontx_log_err("failed to allocate nic structure");
+		res = -ENOMEM;
+		goto err;
+	}
+
+	nic->port_id = port;
+	nic->evdev = evdev;
+
+	res = octeontx_port_open(nic);
+	if (res < 0)
+		goto err;
+
+	/* Rx side port configuration */
+	res = octeontx_pki_port_open(port);
+	if (res != 0) {
+		octeontx_log_err("failed to open PKI port %d", port);
+		res = -ENODEV;
+		goto err;
+	}
+
+	/* Reserve an ethdev entry */
+	eth_dev = rte_eth_dev_allocate(octtx_name);
+	if (eth_dev == NULL) {
+		octeontx_log_err("failed to allocate rte_eth_dev");
+		res = -ENOMEM;
+		goto err;
+	}
+
+	eth_dev->device = &dev->device;
+	eth_dev->intr_handle = NULL;
+	eth_dev->data->kdrv = RTE_KDRV_NONE;
+	eth_dev->data->numa_node = dev->device.numa_node;
+
+	rte_memcpy(data, (eth_dev)->data, sizeof(*data));
+	data->dev_private = nic;
+
+	data->port_id = eth_dev->data->port_id;
+	snprintf(data->name, sizeof(data->name), "%s", eth_dev->data->name);
+
+	nic->ev_queues = 1;
+	nic->ev_ports = 1;
+
+	data->dev_link.link_status = ETH_LINK_DOWN;
+	data->dev_started = 0;
+	data->promiscuous = 0;
+	data->all_multicast = 0;
+	data->scattered_rx = 0;
+
+	data->mac_addrs = rte_zmalloc_socket(octtx_name, ETHER_ADDR_LEN, 0,
+							socket_id);
+	if (data->mac_addrs == NULL) {
+		octeontx_log_err("failed to allocate memory for mac_addrs");
+		res = -ENOMEM;
+		goto err;
+	}
+
+	eth_dev->data = data;
+	eth_dev->dev_ops = &octeontx_dev_ops;
+
+	/* Finally save ethdev pointer to the NIC structure */
+	nic->dev = eth_dev;
+
+	if (nic->port_id != data->port_id) {
+		octeontx_log_err("eth_dev->port_id (%d) is diff to orig (%d)",
+				data->port_id, nic->port_id);
+		res = -EINVAL;
+		goto err;
+	}
+
+	/* Update port_id mac to eth_dev */
+	memcpy(data->mac_addrs, nic->mac_addr, ETHER_ADDR_LEN);
+
+	PMD_INIT_LOG(DEBUG, "ethdev info: ");
+	PMD_INIT_LOG(DEBUG, "port %d, port_ena %d ochan %d num_ochan %d tx_q %d",
+				nic->port_id, nic->port_ena,
+				nic->base_ochan, nic->num_ochans,
+				nic->num_tx_queues);
+	PMD_INIT_LOG(DEBUG, "speed %d mtu %d", nic->speed, nic->mtu);
+
+	return data->port_id;
+
+err:
+	if (port)
+		octeontx_port_close(nic);
+
+	if (eth_dev != NULL) {
+		rte_free(eth_dev->data->mac_addrs);
+		rte_free(data);
+		rte_free(nic);
+		rte_eth_dev_release_port(eth_dev);
+	}
+
+	return res;
+}
+
+/* Un initialize octeontx device */
+static int
+octeontx_remove(struct rte_vdev_device *dev)
+{
+	char octtx_name[OCTEONTX_MAX_NAME_LEN];
+	struct rte_eth_dev *eth_dev = NULL;
+	struct octeontx_nic *nic = NULL;
+	int i;
+
+	if (dev == NULL)
+		return -EINVAL;
+
+	for (i = 0; i < OCTEONTX_VDEV_DEFAULT_MAX_NR_PORT; i++) {
+		sprintf(octtx_name, "eth_octeontx_%d", i);
+
+		/* reserve an ethdev entry */
+		eth_dev = rte_eth_dev_allocated(octtx_name);
+		if (eth_dev == NULL)
+			return -ENODEV;
+
+		nic = octeontx_pmd_priv(eth_dev);
+		rte_event_dev_stop(nic->evdev);
+		PMD_INIT_LOG(INFO, "Closing octeontx device %s", octtx_name);
+
+		rte_free(eth_dev->data->mac_addrs);
+		rte_free(eth_dev->data->dev_private);
+		rte_free(eth_dev->data);
+		rte_eth_dev_release_port(eth_dev);
+		rte_event_dev_close(nic->evdev);
+	}
+
+	/* Free FC resource */
+	octeontx_pko_fc_free();
+
+	return 0;
+}
+
+/* Initialize octeontx device */
+static int
+octeontx_probe(struct rte_vdev_device *dev)
+{
+	const char *dev_name;
+	static int probe_once;
+	uint8_t socket_id, qlist;
+	int tx_vfcnt, port_id, evdev, qnum, pnum, res, i;
+	struct rte_event_dev_config dev_conf;
+	const char *eventdev_name = "event_octeontx";
+	struct rte_event_dev_info info;
+
+	struct octeontx_vdev_init_params init_params = {
+		OCTEONTX_VDEV_DEFAULT_MAX_NR_PORT
+	};
+
+	dev_name = rte_vdev_device_name(dev);
+	res = octeontx_parse_vdev_init_params(&init_params, dev);
+	if (res < 0)
+		return -EINVAL;
+
+	if (init_params.nr_port > OCTEONTX_VDEV_DEFAULT_MAX_NR_PORT) {
+		octeontx_log_err("nr_port (%d) > max (%d)", init_params.nr_port,
+				OCTEONTX_VDEV_DEFAULT_MAX_NR_PORT);
+		return -ENOTSUP;
+	}
+
+	PMD_INIT_LOG(DEBUG, "initializing %s pmd", dev_name);
+
+	socket_id = rte_socket_id();
+
+	tx_vfcnt = octeontx_pko_vf_count();
+
+	if (tx_vfcnt < init_params.nr_port) {
+		octeontx_log_err("not enough PKO (%d) for port number (%d)",
+				tx_vfcnt, init_params.nr_port);
+		return -EINVAL;
+	}
+	evdev = rte_event_dev_get_dev_id(eventdev_name);
+	if (evdev < 0) {
+		octeontx_log_err("eventdev %s not found", eventdev_name);
+		return -ENODEV;
+	}
+
+	res = rte_event_dev_info_get(evdev, &info);
+	if (res < 0) {
+		octeontx_log_err("failed to eventdev info %d", res);
+		return -EINVAL;
+	}
+
+	PMD_INIT_LOG(DEBUG, "max_queue %d max_port %d",
+			info.max_event_queues, info.max_event_ports);
+
+	if (octeontx_pko_init_fc(tx_vfcnt))
+		return -ENOMEM;
+
+	devconf_set_default_sane_values(&dev_conf, &info);
+	res = rte_event_dev_configure(evdev, &dev_conf);
+	if (res < 0)
+		goto parse_error;
+
+	rte_event_dev_attr_get(evdev, RTE_EVENT_DEV_ATTR_PORT_COUNT,
+			(uint32_t *)&pnum);
+	rte_event_dev_attr_get(evdev, RTE_EVENT_DEV_ATTR_QUEUE_COUNT,
+			(uint32_t *)&qnum);
+	if (pnum < qnum) {
+		octeontx_log_err("too few event ports (%d) for event_q(%d)",
+				pnum, qnum);
+		res = -EINVAL;
+		goto parse_error;
+	}
+	if (pnum > qnum) {
+		/*
+		 * We don't poll on event ports
+		 * that do not have any queues assigned.
+		 */
+		pnum = qnum;
+		PMD_INIT_LOG(INFO,
+			"reducing number of active event ports to %d", pnum);
+	}
+	for (i = 0; i < qnum; i++) {
+		res = rte_event_queue_setup(evdev, i, NULL);
+		if (res < 0) {
+			octeontx_log_err("failed to setup event_q(%d): res %d",
+					i, res);
+			goto parse_error;
+		}
+	}
+
+	for (i = 0; i < pnum; i++) {
+		res = rte_event_port_setup(evdev, i, NULL);
+		if (res < 0) {
+			res = -ENODEV;
+			octeontx_log_err("failed to setup ev port(%d) res=%d",
+						i, res);
+			goto parse_error;
+		}
+		/* Link one queue to one event port */
+		qlist = i;
+		res = rte_event_port_link(evdev, i, &qlist, NULL, 1);
+		if (res < 0) {
+			res = -ENODEV;
+			octeontx_log_err("failed to link port (%d): res=%d",
+					i, res);
+			goto parse_error;
+		}
+	}
+
+	/* Create ethdev interface */
+	for (i = 0; i < init_params.nr_port; i++) {
+		port_id = octeontx_create(dev, i, evdev, socket_id);
+		if (port_id < 0) {
+			octeontx_log_err("failed to create device %s",
+					dev_name);
+			res = -ENODEV;
+			goto parse_error;
+		}
+
+		PMD_INIT_LOG(INFO, "created ethdev %s for port %d", dev_name,
+					port_id);
+	}
+
+	if (probe_once) {
+		octeontx_log_err("interface %s not supported", dev_name);
+		octeontx_remove(dev);
+		res = -ENOTSUP;
+		goto parse_error;
+	}
+	probe_once = 1;
+
+	return 0;
+
+parse_error:
+	octeontx_pko_fc_free();
+	return res;
+}
+
+static struct rte_vdev_driver octeontx_pmd_drv = {
+	.probe = octeontx_probe,
+	.remove = octeontx_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(OCTEONTX_PMD, octeontx_pmd_drv);
+RTE_PMD_REGISTER_ALIAS(OCTEONTX_PMD, eth_octeontx);
+RTE_PMD_REGISTER_PARAM_STRING(OCTEONTX_PMD, "nr_port=<int> ");
diff --git a/drivers/net/octeontx/octeontx_ethdev.h b/drivers/net/octeontx/octeontx_ethdev.h
new file mode 100644
index 00000000..c47d4c6d
--- /dev/null
+++ b/drivers/net/octeontx/octeontx_ethdev.h
@@ -0,0 +1,109 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef	__OCTEONTX_ETHDEV_H__
+#define	__OCTEONTX_ETHDEV_H__
+
+#include <stdbool.h>
+
+#include <rte_common.h>
+#include <rte_ethdev.h>
+#include <rte_eventdev.h>
+#include <rte_mempool.h>
+#include <rte_memory.h>
+
+#include <octeontx_fpavf.h>
+
+#include "base/octeontx_bgx.h"
+#include "base/octeontx_pki_var.h"
+#include "base/octeontx_pkivf.h"
+#include "base/octeontx_pkovf.h"
+#include "base/octeontx_io.h"
+
+#define OCTEONTX_VDEV_DEFAULT_MAX_NR_PORT	12
+#define OCTEONTX_VDEV_NR_PORT_ARG		("nr_port")
+#define OCTEONTX_MAX_NAME_LEN			32
+
+static inline struct octeontx_nic *
+octeontx_pmd_priv(struct rte_eth_dev *dev)
+{
+	return dev->data->dev_private;
+}
+
+/* Octeontx ethdev nic */
+struct octeontx_nic {
+	struct rte_eth_dev *dev;
+	int node;
+	int port_id;
+	int port_ena;
+	int base_ichan;
+	int num_ichans;
+	int base_ochan;
+	int num_ochans;
+	uint8_t evdev;
+	uint8_t bpen;
+	uint8_t fcs_strip;
+	uint8_t bcast_mode;
+	uint8_t mcast_mode;
+	uint16_t num_tx_queues;
+	uint64_t hwcap;
+	uint8_t link_up;
+	uint8_t	duplex;
+	uint8_t speed;
+	uint16_t mtu;
+	uint8_t mac_addr[ETHER_ADDR_LEN];
+	/* Rx port parameters */
+	struct {
+		bool classifier_enable;
+		bool hash_enable;
+		bool initialized;
+	} pki;
+
+	uint16_t ev_queues;
+	uint16_t ev_ports;
+} __rte_cache_aligned;
+
+struct octeontx_txq {
+	uint16_t queue_id;
+	octeontx_dq_t dq;
+	struct rte_eth_dev *eth_dev;
+} __rte_cache_aligned;
+
+struct octeontx_rxq {
+	uint16_t queue_id;
+	uint16_t port_id;
+	uint8_t evdev;
+	struct rte_eth_dev *eth_dev;
+	uint16_t ev_queues;
+	uint16_t ev_ports;
+} __rte_cache_aligned;
+
+#endif /* __OCTEONTX_ETHDEV_H__ */
diff --git a/drivers/net/xenvirt/virtio_logs.h b/drivers/net/octeontx/octeontx_logs.h
index d6c33f7b..d5da7331 100644
--- a/drivers/net/xenvirt/virtio_logs.h
+++ b/drivers/net/octeontx/octeontx_logs.h
@@ -1,8 +1,7 @@
-/*-
+/*
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -14,7 +13,7 @@
  *       notice, this list of conditions and the following disclaimer in
  *       the documentation and/or other materials provided with the
  *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
+ *     * Neither the name of Cavium networks nor the names of its
  *       contributors may be used to endorse or promote products derived
  *       from this software without specific prior written permission.
  *
@@ -30,41 +29,48 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
+#ifndef __OCTEONTX_LOGS_H__
+#define __OCTEONTX_LOGS_H__
 
-#ifndef _VIRTIO_LOGS_H_
-#define _VIRTIO_LOGS_H_
-
-#include <rte_log.h>
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT
 #define PMD_INIT_LOG(level, fmt, args...) \
 	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
-#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+
+#ifdef RTE_LIBRTE_OCTEONTX_DEBUG_INIT
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, ">>")
 #else
-#define PMD_INIT_LOG(level, fmt, args...) do { } while(0)
-#define PMD_INIT_FUNC_TRACE() do { } while(0)
+#define PMD_INIT_FUNC_TRACE() do { } while (0)
 #endif
 
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX
+#ifdef RTE_LIBRTE_OCTEONTX_DEBUG_RX
 #define PMD_RX_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args)
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
 #else
-#define PMD_RX_LOG(level, fmt, args...) do { } while(0)
+#define PMD_RX_LOG(level, fmt, args...) do { } while (0)
 #endif
 
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX
+#ifdef RTE_LIBRTE_OCTEONTX_DEBUG_TX
 #define PMD_TX_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args)
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
 #else
-#define PMD_TX_LOG(level, fmt, args...) do { } while(0)
+#define PMD_TX_LOG(level, fmt, args...) do { } while (0)
 #endif
 
-
-#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER
+#ifdef RTE_LIBRTE_OCTEONTX_DEBUG_DRIVER
 #define PMD_DRV_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args)
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#else
+#define PMD_DRV_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_OCTEONTX_DEBUG_MBOX
+#define PMD_MBOX_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
 #else
-#define PMD_DRV_LOG(level, fmt, args...) do { } while(0)
+#define PMD_MBOX_LOG(level, fmt, args...) do { } while (0)
 #endif
 
-#endif /* _VIRTIO_LOGS_H_ */
+#define octeontx_log_err(s, ...) PMD_INIT_LOG(ERR, s, ##__VA_ARGS__)
+#define octeontx_log_dbg(s, ...) PMD_DRV_LOG(DEBUG, s, ##__VA_ARGS__)
+#define octeontx_mbox_log(s, ...) PMD_MBOX_LOG(DEBUG, s, ##__VA_ARGS__)
+
+#endif /* __OCTEONTX_LOGS_H__*/
diff --git a/drivers/net/octeontx/octeontx_rxtx.c b/drivers/net/octeontx/octeontx_rxtx.c
new file mode 100644
index 00000000..c97d5b35
--- /dev/null
+++ b/drivers/net/octeontx/octeontx_rxtx.c
@@ -0,0 +1,127 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium, Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium, Inc nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <rte_atomic.h>
+#include <rte_common.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_prefetch.h>
+
+#include "octeontx_ethdev.h"
+#include "octeontx_rxtx.h"
+#include "octeontx_logs.h"
+
+
+static __rte_always_inline uint16_t __hot
+__octeontx_xmit_pkts(void *lmtline_va, void *ioreg_va, int64_t *fc_status_va,
+			struct rte_mbuf *tx_pkt)
+{
+	uint64_t cmd_buf[4];
+	uint16_t gaura_id;
+
+	if (unlikely(*((volatile int64_t *)fc_status_va) < 0))
+		return -ENOSPC;
+
+	/* Get the gaura Id */
+	gaura_id = octeontx_fpa_bufpool_gpool((uintptr_t)tx_pkt->pool->pool_id);
+
+	/* Setup PKO_SEND_HDR_S */
+	cmd_buf[0] = tx_pkt->data_len & 0xffff;
+	cmd_buf[1] = 0x0;
+
+	/* Set don't free bit if reference count > 1 */
+	if (rte_mbuf_refcnt_read(tx_pkt) > 1)
+		cmd_buf[0] |= (1ULL << 58); /* SET DF */
+
+	/* Setup PKO_SEND_GATHER_S */
+	cmd_buf[(1 << 1) | 1] = rte_mbuf_data_iova(tx_pkt);
+	cmd_buf[(1 << 1) | 0] = PKO_SEND_GATHER_SUBDC |
+				PKO_SEND_GATHER_LDTYPE(0x1ull) |
+				PKO_SEND_GATHER_GAUAR((long)gaura_id) |
+				tx_pkt->data_len;
+
+	octeontx_reg_lmtst(lmtline_va, ioreg_va, cmd_buf, PKO_CMD_SZ);
+
+	return 0;
+}
+
+uint16_t __hot
+octeontx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	int count;
+	struct octeontx_txq *txq = tx_queue;
+	octeontx_dq_t *dq = &txq->dq;
+	int res;
+
+	count = 0;
+
+	while (count < nb_pkts) {
+		res = __octeontx_xmit_pkts(dq->lmtline_va, dq->ioreg_va,
+					   dq->fc_status_va,
+					   tx_pkts[count]);
+		if (res < 0)
+			break;
+
+		count++;
+	}
+
+	return count; /* return number of pkts transmitted */
+}
+
+uint16_t __hot
+octeontx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	struct octeontx_rxq *rxq;
+	struct rte_event ev;
+	size_t count;
+	uint16_t valid_event;
+
+	rxq = rx_queue;
+	count = 0;
+	while (count < nb_pkts) {
+		valid_event = rte_event_dequeue_burst(rxq->evdev,
+							rxq->ev_ports, &ev,
+							1, 0);
+		if (!valid_event)
+			break;
+		rx_pkts[count++] = ev.mbuf;
+	}
+
+	return count; /* return number of pkts received */
+}
diff --git a/drivers/net/octeontx/octeontx_rxtx.h b/drivers/net/octeontx/octeontx_rxtx.h
new file mode 100644
index 00000000..382ff2b2
--- /dev/null
+++ b/drivers/net/octeontx/octeontx_rxtx.h
@@ -0,0 +1,137 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium Inc. 2017. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	__OCTEONTX_RXTX_H__
+#define	__OCTEONTX_RXTX_H__
+
+#include <rte_ethdev.h>
+
+#ifndef __hot
+#define __hot	__attribute__((hot))
+#endif
+
+/* Packet type table */
+#define PTYPE_SIZE	OCCTX_PKI_LTYPE_LAST
+
+static const uint32_t __rte_cache_aligned
+ptype_table[PTYPE_SIZE][PTYPE_SIZE][PTYPE_SIZE] = {
+	[LC_NONE][LE_NONE][LF_NONE] = RTE_PTYPE_UNKNOWN,
+	[LC_NONE][LE_NONE][LF_IPSEC_ESP] = RTE_PTYPE_UNKNOWN,
+	[LC_NONE][LE_NONE][LF_IPFRAG] = RTE_PTYPE_L4_FRAG,
+	[LC_NONE][LE_NONE][LF_IPCOMP] = RTE_PTYPE_UNKNOWN,
+	[LC_NONE][LE_NONE][LF_TCP] = RTE_PTYPE_L4_TCP,
+	[LC_NONE][LE_NONE][LF_UDP] = RTE_PTYPE_L4_UDP,
+	[LC_NONE][LE_NONE][LF_GRE] = RTE_PTYPE_TUNNEL_GRE,
+	[LC_NONE][LE_NONE][LF_UDP_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE,
+	[LC_NONE][LE_NONE][LF_UDP_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
+	[LC_NONE][LE_NONE][LF_NVGRE] = RTE_PTYPE_TUNNEL_NVGRE,
+
+	[LC_IPV4][LE_NONE][LF_NONE] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_UNKNOWN,
+	[LC_IPV4][LE_NONE][LF_IPSEC_ESP] =
+				RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV4,
+	[LC_IPV4][LE_NONE][LF_IPFRAG] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_FRAG,
+	[LC_IPV4][LE_NONE][LF_IPCOMP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_UNKNOWN,
+	[LC_IPV4][LE_NONE][LF_TCP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
+	[LC_IPV4][LE_NONE][LF_UDP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
+	[LC_IPV4][LE_NONE][LF_GRE] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_GRE,
+	[LC_IPV4][LE_NONE][LF_UDP_GENEVE] =
+				RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_GENEVE,
+	[LC_IPV4][LE_NONE][LF_UDP_VXLAN] =
+				RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_VXLAN,
+	[LC_IPV4][LE_NONE][LF_NVGRE] =
+				RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_NVGRE,
+
+	[LC_IPV4_OPT][LE_NONE][LF_NONE] =
+				RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_UNKNOWN,
+	[LC_IPV4_OPT][LE_NONE][LF_IPSEC_ESP] =
+				RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L3_IPV4,
+	[LC_IPV4_OPT][LE_NONE][LF_IPFRAG] =
+				RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_FRAG,
+	[LC_IPV4_OPT][LE_NONE][LF_IPCOMP] =
+				RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_UNKNOWN,
+	[LC_IPV4_OPT][LE_NONE][LF_TCP] =
+				RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
+	[LC_IPV4_OPT][LE_NONE][LF_UDP] =
+				RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
+	[LC_IPV4_OPT][LE_NONE][LF_GRE] =
+				RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_GRE,
+	[LC_IPV4_OPT][LE_NONE][LF_UDP_GENEVE] =
+				RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_GENEVE,
+	[LC_IPV4_OPT][LE_NONE][LF_UDP_VXLAN] =
+				RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_VXLAN,
+	[LC_IPV4_OPT][LE_NONE][LF_NVGRE] =
+				RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_NVGRE,
+
+	[LC_IPV6][LE_NONE][LF_NONE] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_UNKNOWN,
+	[LC_IPV6][LE_NONE][LF_IPSEC_ESP] =
+				RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L3_IPV4,
+	[LC_IPV6][LE_NONE][LF_IPFRAG] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_FRAG,
+	[LC_IPV6][LE_NONE][LF_IPCOMP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_UNKNOWN,
+	[LC_IPV6][LE_NONE][LF_TCP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
+	[LC_IPV6][LE_NONE][LF_UDP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
+	[LC_IPV6][LE_NONE][LF_GRE] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_GRE,
+	[LC_IPV6][LE_NONE][LF_UDP_GENEVE] =
+				RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_GENEVE,
+	[LC_IPV6][LE_NONE][LF_UDP_VXLAN] =
+				RTE_PTYPE_L3_IPV6 | RTE_PTYPE_TUNNEL_VXLAN,
+	[LC_IPV6][LE_NONE][LF_NVGRE] =
+				RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_NVGRE,
+	[LC_IPV6_OPT][LE_NONE][LF_NONE] =
+				RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_UNKNOWN,
+	[LC_IPV6_OPT][LE_NONE][LF_IPSEC_ESP] =
+				RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L3_IPV4,
+	[LC_IPV6_OPT][LE_NONE][LF_IPFRAG] =
+				RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_FRAG,
+	[LC_IPV6_OPT][LE_NONE][LF_IPCOMP] =
+				RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_UNKNOWN,
+	[LC_IPV6_OPT][LE_NONE][LF_TCP] =
+				RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
+	[LC_IPV6_OPT][LE_NONE][LF_UDP] =
+				RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
+	[LC_IPV6_OPT][LE_NONE][LF_GRE] =
+				RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_TUNNEL_GRE,
+	[LC_IPV6_OPT][LE_NONE][LF_UDP_GENEVE] =
+				RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_TUNNEL_GENEVE,
+	[LC_IPV6_OPT][LE_NONE][LF_UDP_VXLAN] =
+				RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_TUNNEL_VXLAN,
+	[LC_IPV6_OPT][LE_NONE][LF_NVGRE] =
+				RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_TUNNEL_NVGRE,
+
+};
+
+uint16_t
+octeontx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+uint16_t
+octeontx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+
+#endif /* __OCTEONTX_RXTX_H__ */
diff --git a/drivers/net/octeontx/rte_pmd_octeontx_version.map b/drivers/net/octeontx/rte_pmd_octeontx_version.map
new file mode 100644
index 00000000..a70bd197
--- /dev/null
+++ b/drivers/net/octeontx/rte_pmd_octeontx_version.map
@@ -0,0 +1,4 @@
+DPDK_17.11 {
+
+	local: *;
+};
diff --git a/drivers/net/pcap/Makefile b/drivers/net/pcap/Makefile
index 7ebd0bef..b6487d42 100644
--- a/drivers/net/pcap/Makefile
+++ b/drivers/net/pcap/Makefile
@@ -40,6 +40,9 @@ LIB = librte_pmd_pcap.a
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lpcap
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vdev
 
 EXPORT_MAP := rte_pmd_pcap_version.map
 
diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
index defb3b41..5a86752f 100644
--- a/drivers/net/pcap/rte_eth_pcap.c
+++ b/drivers/net/pcap/rte_eth_pcap.c
@@ -44,7 +44,7 @@
 #include <rte_kvargs.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 
 #define RTE_ETH_PCAP_SNAPSHOT_LEN 65535
 #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN
@@ -75,7 +75,7 @@ struct queue_stat {
 
 struct pcap_rx_queue {
 	pcap_t *pcap;
-	uint8_t in_port;
+	uint16_t in_port;
 	struct rte_mempool *mb_pool;
 	struct queue_stat rx_stat;
 	char name[PATH_MAX];
@@ -411,11 +411,13 @@ open_single_tx_pcap(const char *pcap_filename, pcap_dumper_t **dumper)
 	/* The dumper is created using the previous pcap_t reference */
 	*dumper = pcap_dump_open(tx_pcap, pcap_filename);
 	if (*dumper == NULL) {
+		pcap_close(tx_pcap);
 		RTE_LOG(ERR, PMD, "Couldn't open %s for writing.\n",
 			pcap_filename);
 		return -1;
 	}
 
+	pcap_close(tx_pcap);
 	return 0;
 }
 
@@ -560,7 +562,7 @@ eth_dev_info(struct rte_eth_dev *dev,
 	dev_info->min_rx_bufsize = 0;
 }
 
-static void
+static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
@@ -592,6 +594,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	stats->opackets = tx_packets_total;
 	stats->obytes = tx_bytes_total;
 	stats->oerrors = tx_packets_err_total;
+
+	return 0;
 }
 
 static void
@@ -838,7 +842,6 @@ pmd_init_internals(struct rte_vdev_device *vdev,
 	 */
 	(*eth_dev)->data = data;
 	(*eth_dev)->dev_ops = &ops;
-	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 
 	return 0;
 }
diff --git a/drivers/net/qede/Makefile b/drivers/net/qede/Makefile
index f03441d9..ccbffa45 100644
--- a/drivers/net/qede/Makefile
+++ b/drivers/net/qede/Makefile
@@ -13,6 +13,9 @@ LIB = librte_pmd_qede.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 EXPORT_MAP := rte_pmd_qede_version.map
 
@@ -69,8 +72,9 @@ CFLAGS_BASE_DRIVER += -Wno-sometimes-uninitialized
 ifeq ($(shell clang -Wno-pointer-bool-conversion -Werror -E - < /dev/null > /dev/null 2>&1; echo $$?),0)
 CFLAGS_BASE_DRIVER += -Wno-pointer-bool-conversion
 endif
-else
-CFLAGS_BASE_DRIVER += -wd188 #188: enumerated type mixed with another type
+else #ICC
+CFLAGS_BASE_DRIVER += -wd188   #188: enumerated type mixed with another type
+CFLAGS_qede_ethdev.o += -wd279 #279: controlling expression is constant
 endif
 
 #
@@ -78,7 +82,7 @@ endif
 # to disable warnings in them
 #
 #
-BASE_DRIVER_OBJS=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))
+BASE_DRIVER_OBJS=$(sort $(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))))
 $(foreach obj, $(BASE_DRIVER_OBJS), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
 
 VPATH += $(SRCDIR)/base
diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 2603a8b3..fe42f325 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -144,12 +144,12 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
 		*phys = 0;
 		return OSAL_NULL;
 	}
-	*phys = mz->phys_addr;
+	*phys = mz->iova;
 	ecore_mz_mapping[ecore_mz_count++] = mz;
 	DP_VERBOSE(p_dev, ECORE_MSG_SP,
 		   "Allocated dma memory size=%zu phys=0x%lx"
 		   " virt=%p core=%d\n",
-		   mz->len, (unsigned long)mz->phys_addr, mz->addr, core_id);
+		   mz->len, (unsigned long)mz->iova, mz->addr, core_id);
 	return mz->addr;
 }
 
@@ -182,12 +182,12 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
 		*phys = 0;
 		return OSAL_NULL;
 	}
-	*phys = mz->phys_addr;
+	*phys = mz->iova;
 	ecore_mz_mapping[ecore_mz_count++] = mz;
 	DP_VERBOSE(p_dev, ECORE_MSG_SP,
 		   "Allocated aligned dma memory size=%zu phys=0x%lx"
 		   " virt=%p core=%d\n",
-		   mz->len, (unsigned long)mz->phys_addr, mz->addr, core_id);
+		   mz->len, (unsigned long)mz->iova, mz->addr, core_id);
 	return mz->addr;
 }
 
@@ -196,7 +196,7 @@ void osal_dma_free_mem(struct ecore_dev *p_dev, dma_addr_t phys)
 	uint16_t j;
 
 	for (j = 0 ; j < ecore_mz_count; j++) {
-		if (phys == ecore_mz_mapping[j]->phys_addr) {
+		if (phys == ecore_mz_mapping[j]->iova) {
 			DP_VERBOSE(p_dev, ECORE_MSG_SP,
 				"Free memzone %s\n", ecore_mz_mapping[j]->name);
 			rte_memzone_free(ecore_mz_mapping[j]);
@@ -292,3 +292,15 @@ qede_hw_err_notify(struct ecore_hwfn *p_hwfn, enum ecore_hw_err_type err_type)
 	DP_ERR(p_hwfn, "HW error occurred [%s]\n", err_str);
 	ecore_int_attn_clr_enable(p_hwfn->p_dev, true);
 }
+
+u32 qede_crc32(u32 crc, u8 *ptr, u32 length)
+{
+	int i;
+
+	while (length--) {
+		crc ^= *ptr++;
+		for (i = 0; i < 8; i++)
+			crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+	}
+	return crc;
+}
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 3acf8f7c..52c2f0ec 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -23,6 +23,7 @@
 /* Forward declaration */
 struct ecore_dev;
 struct ecore_hwfn;
+struct ecore_ptt;
 struct ecore_vf_acquire_sw_info;
 struct vf_pf_resc_request;
 enum ecore_mcp_protocol_type;
@@ -45,6 +46,8 @@ void qed_link_update(struct ecore_hwfn *hwfn);
 
 #define OSAL_WARN(arg1, arg2, arg3, ...) (0)
 
+#define UNUSED(x)	(void)(x)
+
 /* Memory Types */
 typedef uint8_t u8;
 typedef uint16_t u16;
@@ -60,7 +63,7 @@ typedef u32 OSAL_BE32;
 
 #define osal_uintptr_t uintptr_t
 
-typedef phys_addr_t dma_addr_t;
+typedef rte_iova_t dma_addr_t;
 
 typedef rte_spinlock_t osal_spinlock_t;
 
@@ -147,6 +150,9 @@ void osal_dma_free_mem(struct ecore_dev *edev, dma_addr_t phys);
 			      ((u8 *)(uintptr_t)(_p_hwfn->doorbells) +	\
 			      (_db_addr)), (u32)_val)
 
+#define DIRECT_REG_WR64(hwfn, addr, value) nothing
+#define DIRECT_REG_RD64(hwfn, addr) 0
+
 /* Mutexes */
 
 typedef pthread_mutex_t osal_mutex_t;
@@ -161,7 +167,12 @@ typedef pthread_mutex_t osal_mutex_t;
 #define OSAL_SPIN_LOCK_INIT(lock) rte_spinlock_init(lock)
 #define OSAL_SPIN_LOCK(lock) rte_spinlock_lock(lock)
 #define OSAL_SPIN_UNLOCK(lock) rte_spinlock_unlock(lock)
-#define OSAL_SPIN_LOCK_IRQSAVE(lock, flags) nothing
+#define OSAL_SPIN_LOCK_IRQSAVE(lock, flags)	\
+	do {					\
+		UNUSED(lock);			\
+		flags = 0;			\
+		UNUSED(flags);			\
+	} while (0)
 #define OSAL_SPIN_UNLOCK_IRQSAVE(lock, flags) nothing
 #define OSAL_SPIN_LOCK_ALLOC(hwfn, lock) nothing
 #define OSAL_SPIN_LOCK_DEALLOC(lock) nothing
@@ -328,6 +339,7 @@ u32 qede_find_first_zero_bit(unsigned long *, u32);
 #define OSAL_BITMAP_WEIGHT(bitmap, count) 0
 
 #define OSAL_LINK_UPDATE(hwfn) qed_link_update(hwfn)
+#define OSAL_TRANSCEIVER_UPDATE(hwfn) nothing
 #define OSAL_DCBX_AEN(hwfn, mib_type) nothing
 
 /* SR-IOV channel */
@@ -344,8 +356,9 @@ u32 qede_find_first_zero_bit(unsigned long *, u32);
 #define OSAL_IOV_VF_VPORT_UPDATE(hwfn, vfid, p_params, p_mask) 0
 #define OSAL_VF_UPDATE_ACQUIRE_RESC_RESP(_dev_p, _resc_resp) 0
 #define OSAL_IOV_GET_OS_TYPE() 0
-#define OSAL_IOV_VF_MSG_TYPE(hwfn, vfid, vf_msg_type) 0
-#define OSAL_IOV_PF_RESP_TYPE(hwfn, vfid, pf_resp_type) 0
+#define OSAL_IOV_VF_MSG_TYPE(hwfn, vfid, vf_msg_type) nothing
+#define OSAL_IOV_PF_RESP_TYPE(hwfn, vfid, pf_resp_type) nothing
+#define OSAL_IOV_VF_VPORT_STOP(hwfn, vf) nothing
 
 u32 qede_unzip_data(struct ecore_hwfn *p_hwfn, u32 input_len,
 		   u8 *input_buf, u32 max_size, u8 *unzip_buf);
@@ -365,7 +378,7 @@ void qede_hw_err_notify(struct ecore_hwfn *p_hwfn,
 	qede_hw_err_notify(hwfn, err_type)
 
 #define OSAL_NVM_IS_ACCESS_ENABLED(hwfn) (1)
-#define OSAL_NUM_ACTIVE_CPU()	0
+#define OSAL_NUM_CPUS()	0
 
 /* Utility functions */
 
@@ -414,7 +427,9 @@ u32 qede_osal_log2(u32);
 #define OSAL_REG_ADDR(_p_hwfn, _offset) \
 		(void *)((u8 *)(uintptr_t)(_p_hwfn->regview) + (_offset))
 #define OSAL_PAGE_SIZE 4096
+#define OSAL_CACHE_LINE_SIZE RTE_CACHE_LINE_SIZE
 #define OSAL_IOMEM volatile
+#define OSAL_UNUSED    __attribute__((unused))
 #define OSAL_UNLIKELY(x)  __builtin_expect(!!(x), 0)
 #define OSAL_MIN_T(type, __min1, __min2)	\
 	((type)(__min1) < (type)(__min2) ? (type)(__min1) : (type)(__min2))
@@ -427,10 +442,17 @@ void qede_get_mcp_proto_stats(struct ecore_dev *, enum ecore_mcp_protocol_type,
 	qede_get_mcp_proto_stats(dev, type, stats)
 
 #define	OSAL_SLOWPATH_IRQ_REQ(p_hwfn) (0)
-#define OSAL_CRC32(crc, buf, length) 0
+
+u32 qede_crc32(u32 crc, u8 *ptr, u32 length);
+#define OSAL_CRC32(crc, buf, length) qede_crc32(crc, buf, length)
 #define OSAL_CRC8_POPULATE(table, polynomial) nothing
 #define OSAL_CRC8(table, pdata, nbytes, crc) 0
-#define OSAL_MFW_TLV_REQ(p_hwfn) (0)
+#define OSAL_MFW_TLV_REQ(p_hwfn) nothing
 #define OSAL_MFW_FILL_TLV_DATA(type, buf, data) (0)
+#define OSAL_MFW_CMD_PREEMPT(p_hwfn) nothing
 #define OSAL_PF_VALIDATE_MODIFY_TUNN_CONFIG(p_hwfn, mask, b_update, tunn) 0
+
+#define OSAL_DIV_S64(a, b)	((a) / (b))
+#define OSAL_LLDP_RX_TLVS(p_hwfn, tlv_buf, tlv_size) nothing
+
 #endif /* __BCM_OSAL_H */
diff --git a/drivers/net/qede/base/common_hsi.h b/drivers/net/qede/base/common_hsi.h
index bfe50e1f..9a6059ac 100644
--- a/drivers/net/qede/base/common_hsi.h
+++ b/drivers/net/qede/base/common_hsi.h
@@ -97,8 +97,8 @@
 
 
 #define FW_MAJOR_VERSION		8
-#define FW_MINOR_VERSION		20
-#define FW_REVISION_VERSION		0
+#define FW_MINOR_VERSION		30
+#define FW_REVISION_VERSION		12
 #define FW_ENGINEERING_VERSION	0
 
 /***********************/
@@ -106,73 +106,70 @@
 /***********************/
 
 /* PCI functions */
-#define MAX_NUM_PORTS_K2	(4)
-#define MAX_NUM_PORTS_BB	(2)
-#define MAX_NUM_PORTS		(MAX_NUM_PORTS_K2)
-
-#define MAX_NUM_PFS_K2	(16)
-#define MAX_NUM_PFS_BB	(8)
-#define MAX_NUM_PFS	(MAX_NUM_PFS_K2)
-#define MAX_NUM_OF_PFS_IN_CHIP (16) /* On both engines */
-
-#define MAX_NUM_VFS_BB	(120)
-#define MAX_NUM_VFS_K2	(192)
-#define E4_MAX_NUM_VFS	(MAX_NUM_VFS_K2)
-#define COMMON_MAX_NUM_VFS (240)
-
-#define MAX_NUM_FUNCTIONS_BB	(MAX_NUM_PFS_BB + MAX_NUM_VFS_BB)
-#define MAX_NUM_FUNCTIONS_K2	(MAX_NUM_PFS_K2 + MAX_NUM_VFS_K2)
-#define MAX_NUM_FUNCTIONS	(MAX_NUM_PFS + E4_MAX_NUM_VFS)
+#define MAX_NUM_PORTS_BB        (2)
+#define MAX_NUM_PORTS_K2        (4)
+#define MAX_NUM_PORTS_E5        (4)
+#define MAX_NUM_PORTS           (MAX_NUM_PORTS_E5)
+
+#define MAX_NUM_PFS_BB          (8)
+#define MAX_NUM_PFS_K2          (16)
+#define MAX_NUM_PFS_E5          (16)
+#define MAX_NUM_PFS             (MAX_NUM_PFS_E5)
+#define MAX_NUM_OF_PFS_IN_CHIP  (16) /* On both engines */
+
+#define MAX_NUM_VFS_BB          (120)
+#define MAX_NUM_VFS_K2          (192)
+#define MAX_NUM_VFS_E4          (MAX_NUM_VFS_K2)
+#define MAX_NUM_VFS_E5          (240)
+#define COMMON_MAX_NUM_VFS      (MAX_NUM_VFS_E5)
+
+#define MAX_NUM_FUNCTIONS_BB    (MAX_NUM_PFS_BB + MAX_NUM_VFS_BB)
+#define MAX_NUM_FUNCTIONS_K2    (MAX_NUM_PFS_K2 + MAX_NUM_VFS_K2)
+#define MAX_NUM_FUNCTIONS       (MAX_NUM_PFS + MAX_NUM_VFS_E4)
 
 /* in both BB and K2, the VF number starts from 16. so for arrays containing all
  * possible PFs and VFs - we need a constant for this size
  */
-#define MAX_FUNCTION_NUMBER_BB	(MAX_NUM_PFS + MAX_NUM_VFS_BB)
-#define MAX_FUNCTION_NUMBER_K2	(MAX_NUM_PFS + MAX_NUM_VFS_K2)
-#define MAX_FUNCTION_NUMBER	(MAX_NUM_PFS + E4_MAX_NUM_VFS)
-
-#define MAX_NUM_VPORTS_K2	(208)
-#define MAX_NUM_VPORTS_BB	(160)
-#define MAX_NUM_VPORTS		(MAX_NUM_VPORTS_K2)
+#define MAX_FUNCTION_NUMBER_BB      (MAX_NUM_PFS + MAX_NUM_VFS_BB)
+#define MAX_FUNCTION_NUMBER_K2      (MAX_NUM_PFS + MAX_NUM_VFS_K2)
+#define MAX_FUNCTION_NUMBER_E4      (MAX_NUM_PFS + MAX_NUM_VFS_E4)
+#define MAX_FUNCTION_NUMBER_E5      (MAX_NUM_PFS + MAX_NUM_VFS_E5)
+#define COMMON_MAX_FUNCTION_NUMBER  (MAX_NUM_PFS + MAX_NUM_VFS_E5)
+
+#define MAX_NUM_VPORTS_K2       (208)
+#define MAX_NUM_VPORTS_BB       (160)
+#define MAX_NUM_VPORTS_E4       (MAX_NUM_VPORTS_K2)
+#define MAX_NUM_VPORTS_E5       (256)
+#define COMMON_MAX_NUM_VPORTS   (MAX_NUM_VPORTS_E5)
 
-#define MAX_NUM_L2_QUEUES_K2	(320)
 #define MAX_NUM_L2_QUEUES_BB	(256)
-#define MAX_NUM_L2_QUEUES	(MAX_NUM_L2_QUEUES_K2)
+#define MAX_NUM_L2_QUEUES_K2    (320)
+#define MAX_NUM_L2_QUEUES_E5    (320) /* TODO_E5_VITALY - fix to 512 */
+#define MAX_NUM_L2_QUEUES		(MAX_NUM_L2_QUEUES_E5)
 
 /* Traffic classes in network-facing blocks (PBF, BTB, NIG, BRB, PRS and QM) */
-/* 4-Port K2. */
-#define NUM_PHYS_TCS_4PORT_K2	(4)
-#define NUM_OF_PHYS_TCS		(8)
-
-#define NUM_TCS_4PORT_K2	(NUM_PHYS_TCS_4PORT_K2 + 1)
-#define NUM_OF_TCS		(NUM_OF_PHYS_TCS + 1)
-
-#define LB_TC			(NUM_OF_PHYS_TCS)
-
-/* Num of possible traffic priority values */
-#define NUM_OF_PRIO		(8)
-
-#define MAX_NUM_VOQS_K2		(NUM_TCS_4PORT_K2 * MAX_NUM_PORTS_K2)
-#define MAX_NUM_VOQS_BB		(NUM_OF_TCS * MAX_NUM_PORTS_BB)
-#define MAX_NUM_VOQS		(MAX_NUM_VOQS_K2)
-#define MAX_PHYS_VOQS		(NUM_OF_PHYS_TCS * MAX_NUM_PORTS_BB)
+#define NUM_PHYS_TCS_4PORT_K2     4
+#define NUM_PHYS_TCS_4PORT_TX_E5  6
+#define NUM_PHYS_TCS_4PORT_RX_E5  4
+#define NUM_OF_PHYS_TCS           8
+#define PURE_LB_TC                NUM_OF_PHYS_TCS
+#define NUM_TCS_4PORT_K2          (NUM_PHYS_TCS_4PORT_K2 + 1)
+#define NUM_TCS_4PORT_TX_E5       (NUM_PHYS_TCS_4PORT_TX_E5 + 1)
+#define NUM_TCS_4PORT_RX_E5       (NUM_PHYS_TCS_4PORT_RX_E5 + 1)
+#define NUM_OF_TCS                (NUM_OF_PHYS_TCS + 1)
 
 /* CIDs */
-#define E4_NUM_OF_CONNECTION_TYPES (8)
-#define NUM_OF_TASK_TYPES		(8)
-#define NUM_OF_LCIDS			(320)
-#define NUM_OF_LTIDS			(320)
-
-/* Clock values */
-#define MASTER_CLK_FREQ_E4		(375e6)
-#define STORM_CLK_FREQ_E4		(1000e6)
-#define CLK25M_CLK_FREQ_E4		(25e6)
+#define NUM_OF_CONNECTION_TYPES_E4 (8)
+#define NUM_OF_CONNECTION_TYPES_E5 (16)
+#define NUM_OF_TASK_TYPES       (8)
+#define NUM_OF_LCIDS            (320)
+#define NUM_OF_LTIDS            (320)
 
 /* Global PXP windows (GTT) */
-#define NUM_OF_GTT			19
-#define GTT_DWORD_SIZE_BITS	10
-#define GTT_BYTE_SIZE_BITS	(GTT_DWORD_SIZE_BITS + 2)
-#define GTT_DWORD_SIZE		(1 << GTT_DWORD_SIZE_BITS)
+#define NUM_OF_GTT          19
+#define GTT_DWORD_SIZE_BITS 10
+#define GTT_BYTE_SIZE_BITS  (GTT_DWORD_SIZE_BITS + 2)
+#define GTT_DWORD_SIZE      (1 << GTT_DWORD_SIZE_BITS)
 
 /* Tools Version */
 #define TOOLS_VERSION 10
@@ -417,49 +414,51 @@
 #define CAU_FSM_ETH_TX  1
 
 /* Number of Protocol Indices per Status Block */
-#define PIS_PER_SB    12
+#define PIS_PER_SB_E4    12
+#define PIS_PER_SB_E5    8
+#define MAX_PIS_PER_SB_E4	 OSAL_MAX_T(PIS_PER_SB_E4, PIS_PER_SB_E5)
 
 /* fsm is stopped or not valid for this sb */
-#define CAU_HC_STOPPED_STATE	3
+#define CAU_HC_STOPPED_STATE		3
 /* fsm is working without interrupt coalescing for this sb*/
-#define CAU_HC_DISABLE_STATE	4
+#define CAU_HC_DISABLE_STATE		4
 /* fsm is working with interrupt coalescing for this sb*/
-#define CAU_HC_ENABLE_STATE	0
+#define CAU_HC_ENABLE_STATE			0
 
 
 /*****************/
 /* IGU CONSTANTS */
 /*****************/
 
-#define MAX_SB_PER_PATH_K2	(368)
-#define MAX_SB_PER_PATH_BB	(288)
-#define MAX_TOT_SB_PER_PATH \
-	MAX_SB_PER_PATH_K2
+#define MAX_SB_PER_PATH_K2			(368)
+#define MAX_SB_PER_PATH_BB			(288)
+#define MAX_SB_PER_PATH_E5			(512)
+#define MAX_TOT_SB_PER_PATH			MAX_SB_PER_PATH_E5
 
-#define MAX_SB_PER_PF_MIMD	129
-#define MAX_SB_PER_PF_SIMD	64
-#define MAX_SB_PER_VF		64
+#define MAX_SB_PER_PF_MIMD			129
+#define MAX_SB_PER_PF_SIMD			64
+#define MAX_SB_PER_VF				64
 
 /* Memory addresses on the BAR for the IGU Sub Block */
-#define IGU_MEM_BASE			0x0000
+#define IGU_MEM_BASE				0x0000
 
-#define IGU_MEM_MSIX_BASE		0x0000
-#define IGU_MEM_MSIX_UPPER		0x0101
-#define IGU_MEM_MSIX_RESERVED_UPPER	0x01ff
+#define IGU_MEM_MSIX_BASE			0x0000
+#define IGU_MEM_MSIX_UPPER			0x0101
+#define IGU_MEM_MSIX_RESERVED_UPPER		0x01ff
 
-#define IGU_MEM_PBA_MSIX_BASE		0x0200
-#define IGU_MEM_PBA_MSIX_UPPER		0x0202
-#define IGU_MEM_PBA_MSIX_RESERVED_UPPER	0x03ff
+#define IGU_MEM_PBA_MSIX_BASE			0x0200
+#define IGU_MEM_PBA_MSIX_UPPER			0x0202
+#define IGU_MEM_PBA_MSIX_RESERVED_UPPER		0x03ff
 
-#define IGU_CMD_INT_ACK_BASE		0x0400
-#define IGU_CMD_INT_ACK_UPPER		(IGU_CMD_INT_ACK_BASE +	\
-					 MAX_TOT_SB_PER_PATH -	\
-					 1)
-#define IGU_CMD_INT_ACK_RESERVED_UPPER	0x05ff
+#define IGU_CMD_INT_ACK_BASE			0x0400
+#define IGU_CMD_INT_ACK_UPPER			(IGU_CMD_INT_ACK_BASE + \
+						 MAX_TOT_SB_PER_PATH - \
+						 1)
+#define IGU_CMD_INT_ACK_RESERVED_UPPER		0x05ff
 
-#define IGU_CMD_ATTN_BIT_UPD_UPPER	0x05f0
-#define IGU_CMD_ATTN_BIT_SET_UPPER	0x05f1
-#define IGU_CMD_ATTN_BIT_CLR_UPPER	0x05f2
+#define IGU_CMD_ATTN_BIT_UPD_UPPER		0x05f0
+#define IGU_CMD_ATTN_BIT_SET_UPPER		0x05f1
+#define IGU_CMD_ATTN_BIT_CLR_UPPER		0x05f2
 
 #define IGU_REG_SISR_MDPC_WMASK_UPPER		0x05f3
 #define IGU_REG_SISR_MDPC_WMASK_LSB_UPPER	0x05f4
@@ -467,8 +466,8 @@
 #define IGU_REG_SISR_MDPC_WOMASK_UPPER		0x05f6
 
 #define IGU_CMD_PROD_UPD_BASE			0x0600
-#define IGU_CMD_PROD_UPD_UPPER			(IGU_CMD_PROD_UPD_BASE +\
-						 MAX_TOT_SB_PER_PATH - \
+#define IGU_CMD_PROD_UPD_UPPER			(IGU_CMD_PROD_UPD_BASE + \
+						 MAX_TOT_SB_PER_PATH  - \
 						 1)
 #define IGU_CMD_PROD_UPD_RESERVED_UPPER		0x07ff
 
@@ -491,16 +490,16 @@
 #define PXP_PER_PF_ENTRY_SIZE		8
 #define PXP_NUM_GLOBAL_WINDOWS		243
 #define PXP_GLOBAL_ENTRY_SIZE		4
-#define PXP_ADMIN_WINDOW_ALLOWED_LENGTH	4
+#define PXP_ADMIN_WINDOW_ALLOWED_LENGTH		4
 #define PXP_PF_WINDOW_ADMIN_START	0
 #define PXP_PF_WINDOW_ADMIN_LENGTH	0x1000
 #define PXP_PF_WINDOW_ADMIN_END		(PXP_PF_WINDOW_ADMIN_START + \
-					 PXP_PF_WINDOW_ADMIN_LENGTH - 1)
+				PXP_PF_WINDOW_ADMIN_LENGTH - 1)
 #define PXP_PF_WINDOW_ADMIN_PER_PF_START	0
 #define PXP_PF_WINDOW_ADMIN_PER_PF_LENGTH	(PXP_NUM_PF_WINDOWS * \
 						 PXP_PER_PF_ENTRY_SIZE)
-#define PXP_PF_WINDOW_ADMIN_PER_PF_END	(PXP_PF_WINDOW_ADMIN_PER_PF_START + \
-					 PXP_PF_WINDOW_ADMIN_PER_PF_LENGTH - 1)
+#define PXP_PF_WINDOW_ADMIN_PER_PF_END (PXP_PF_WINDOW_ADMIN_PER_PF_START + \
+					PXP_PF_WINDOW_ADMIN_PER_PF_LENGTH - 1)
 #define PXP_PF_WINDOW_ADMIN_GLOBAL_START	0x200
 #define PXP_PF_WINDOW_ADMIN_GLOBAL_LENGTH	(PXP_NUM_GLOBAL_WINDOWS * \
 						 PXP_GLOBAL_ENTRY_SIZE)
@@ -575,19 +574,79 @@
 #define PXP_BAR0_FIRST_INVALID_ADDRESS          \
 	(PXP_BAR0_END_PSDM + 1)
 
-#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN	12
-#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER	1024
-
-/* ILT Records */
+/* VF BAR */
+#define PXP_VF_BAR0                             0
+
+#define PXP_VF_BAR0_START_IGU                   0
+#define PXP_VF_BAR0_IGU_LENGTH                  0x3000
+#define PXP_VF_BAR0_END_IGU                     \
+	(PXP_VF_BAR0_START_IGU + PXP_VF_BAR0_IGU_LENGTH - 1)
+
+#define PXP_VF_BAR0_START_DQ                    0x3000
+#define PXP_VF_BAR0_DQ_LENGTH                   0x200
+#define PXP_VF_BAR0_DQ_OPAQUE_OFFSET            0
+#define PXP_VF_BAR0_ME_OPAQUE_ADDRESS           \
+	(PXP_VF_BAR0_START_DQ + PXP_VF_BAR0_DQ_OPAQUE_OFFSET)
+#define PXP_VF_BAR0_ME_CONCRETE_ADDRESS         \
+	(PXP_VF_BAR0_ME_OPAQUE_ADDRESS + 4)
+#define PXP_VF_BAR0_END_DQ                      \
+	(PXP_VF_BAR0_START_DQ + PXP_VF_BAR0_DQ_LENGTH - 1)
+
+#define PXP_VF_BAR0_START_TSDM_ZONE_B           0x3200
+#define PXP_VF_BAR0_SDM_LENGTH_ZONE_B           0x200
+#define PXP_VF_BAR0_END_TSDM_ZONE_B             \
+	(PXP_VF_BAR0_START_TSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_MSDM_ZONE_B           0x3400
+#define PXP_VF_BAR0_END_MSDM_ZONE_B             \
+	(PXP_VF_BAR0_START_MSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_USDM_ZONE_B           0x3600
+#define PXP_VF_BAR0_END_USDM_ZONE_B             \
+	(PXP_VF_BAR0_START_USDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_XSDM_ZONE_B           0x3800
+#define PXP_VF_BAR0_END_XSDM_ZONE_B             \
+	(PXP_VF_BAR0_START_XSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_YSDM_ZONE_B           0x3a00
+#define PXP_VF_BAR0_END_YSDM_ZONE_B             \
+	(PXP_VF_BAR0_START_YSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_PSDM_ZONE_B           0x3c00
+#define PXP_VF_BAR0_END_PSDM_ZONE_B             \
+	(PXP_VF_BAR0_START_PSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_GRC                   0x3E00
+#define PXP_VF_BAR0_GRC_LENGTH                  0x200
+#define PXP_VF_BAR0_END_GRC                     \
+	(PXP_VF_BAR0_START_GRC + PXP_VF_BAR0_GRC_LENGTH - 1)
+
+#define PXP_VF_BAR0_START_SDM_ZONE_A            0x4000
+#define PXP_VF_BAR0_END_SDM_ZONE_A              0x10000
+
+#define PXP_VF_BAR0_START_IGU2                   0x10000
+#define PXP_VF_BAR0_IGU2_LENGTH                  0xD000
+#define PXP_VF_BAR0_END_IGU2                     \
+	(PXP_VF_BAR0_START_IGU2 + PXP_VF_BAR0_IGU2_LENGTH - 1)
+
+#define PXP_VF_BAR0_GRC_WINDOW_LENGTH           32
+
+#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN          12
+#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER         1024
+
+// ILT Records
 #define PXP_NUM_ILT_RECORDS_BB 7600
 #define PXP_NUM_ILT_RECORDS_K2 11000
-#define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2)
-
+#define MAX_NUM_ILT_RECORDS \
+	OSAL_MAX_T(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2)
 
-/* Host Interface */
-#define PXP_QUEUES_ZONE_MAX_NUM	320
+#define PXP_NUM_ILT_RECORDS_E5 13664
 
 
+// Host Interface
+#define PXP_QUEUES_ZONE_MAX_NUM_E4	320
+#define PXP_QUEUES_ZONE_MAX_NUM_E5	512
 
 
 /*****************/
@@ -635,7 +694,8 @@
 /******************/
 
 /* Number of PBF command queue lines. Each line is 32B. */
-#define PBF_MAX_CMD_LINES 3328
+#define PBF_MAX_CMD_LINES_E4 3328
+#define PBF_MAX_CMD_LINES_E5 5280
 
 /* Number of BTB blocks. Each block is 256B. */
 #define BTB_MAX_BLOCKS 1440
@@ -645,17 +705,6 @@
 /*****************/
 
 #define PRS_GFT_CAM_LINES_NO_MATCH  31
-/* Async data KCQ CQE */
-struct async_data {
-	/* Context ID of the connection */
-	__le32	cid;
-	/* Task Id of the task (for error that happened on a a task) */
-	__le16	itid;
-	/* error code - relevant only if the opcode indicates its an error */
-	u8	error_code;
-	/* internal fw debug parameter */
-	u8	fw_debug_param;
-};
 
 /*
  * Interrupt coalescing TimeSet
@@ -683,22 +732,29 @@ struct eth_rx_prod_data {
 	__le16 cqe_prod /* CQE producer. */;
 };
 
-struct regpair {
-	__le32 lo /* low word for reg-pair */;
-	__le32 hi /* high word for reg-pair */;
+
+struct tcp_ulp_connect_done_params {
+	__le16 mss;
+	u8 snd_wnd_scale;
+	u8 flags;
+#define TCP_ULP_CONNECT_DONE_PARAMS_TS_EN_MASK     0x1
+#define TCP_ULP_CONNECT_DONE_PARAMS_TS_EN_SHIFT    0
+#define TCP_ULP_CONNECT_DONE_PARAMS_RESERVED_MASK  0x7F
+#define TCP_ULP_CONNECT_DONE_PARAMS_RESERVED_SHIFT 1
 };
 
-/*
- * Event Ring VF-PF Channel data
- */
-struct vf_pf_channel_eqe_data {
-	struct regpair msg_addr /* VF-PF message address */;
+struct iscsi_connect_done_results {
+	__le16 icid /* Context ID of the connection */;
+	__le16 conn_id /* Driver connection ID */;
+/* decided tcp params after connect done */
+	struct tcp_ulp_connect_done_params params;
 };
 
+
 struct iscsi_eqe_data {
-	__le32 cid /* Context ID of the connection */;
-	    /* Task Id of the task (for error that happened on a a task) */;
-	__le16 conn_id;
+	__le16 icid /* Context ID of the connection */;
+	__le16 conn_id /* Driver connection ID */;
+	__le16 reserved;
 /* error code - relevant only if the opcode indicates its an error */
 	u8 error_code;
 	u8 error_pdu_opcode_reserved;
@@ -714,52 +770,10 @@ struct iscsi_eqe_data {
 #define ISCSI_EQE_DATA_RESERVED0_SHIFT              7
 };
 
-/*
- * Event Ring malicious VF data
- */
-struct malicious_vf_eqe_data {
-	u8 vfId /* Malicious VF ID */;
-	u8 errId /* Malicious VF error */;
-	__le16 reserved[3];
-};
 
 /*
- * Event Ring initial cleanup data
+ * Multi function mode
  */
-struct initial_cleanup_eqe_data {
-	u8 vfId /* VF ID */;
-	u8 reserved[7];
-};
-
-/*
- * Event Data Union
- */
-union event_ring_data {
-	u8 bytes[8] /* Byte Array */;
-	struct vf_pf_channel_eqe_data vf_pf_channel /* VF-PF Channel data */;
-	struct iscsi_eqe_data iscsi_info /* Dedicated fields to iscsi data */;
-	struct regpair roceHandle /* Dedicated field for RDMA data */;
-	struct malicious_vf_eqe_data malicious_vf /* Malicious VF data */;
-	struct initial_cleanup_eqe_data vf_init_cleanup
-	    /* VF Initial Cleanup data */;
-};
-/* Event Ring Entry */
-struct event_ring_entry {
-	u8 protocol_id /* Event Protocol ID */;
-	u8 opcode /* Event Opcode */;
-	__le16 reserved0 /* Reserved */;
-	__le16 echo /* Echo value from ramrod data on the host */;
-	u8 fw_return_code /* FW return code for SP ramrods */;
-	u8 flags;
-/* 0: synchronous EQE - a completion of SP message. 1: asynchronous EQE */
-#define EVENT_RING_ENTRY_ASYNC_MASK      0x1
-#define EVENT_RING_ENTRY_ASYNC_SHIFT     0
-#define EVENT_RING_ENTRY_RESERVED1_MASK  0x7F
-#define EVENT_RING_ENTRY_RESERVED1_SHIFT 1
-	union event_ring_data	data;
-};
-
-/* Multi function mode */
 enum mf_mode {
 	ERROR_MODE /* Unsupported mode */,
 	MF_OVLAN /* Multi function based on outer VLAN */,
@@ -783,6 +797,12 @@ enum protocol_type {
 };
 
 
+struct regpair {
+	__le32 lo /* low word for reg-pair */;
+	__le32 hi /* high word for reg-pair */;
+};
+
+
 
 /*
  * Ustorm Queue Zone
@@ -852,6 +872,18 @@ struct cau_sb_entry {
 #define CAU_SB_ENTRY_TPH_SHIFT         31
 };
 
+
+/*
+ * Igu cleanup bit values to distinguish between clean or producer consumer
+ * update.
+ */
+enum command_type_bit {
+	IGU_COMMAND_TYPE_NOP = 0,
+	IGU_COMMAND_TYPE_SET = 1,
+	MAX_COMMAND_TYPE_BIT
+};
+
+
 /* core doorbell data */
 struct core_db_data {
 	u8 params;
@@ -1008,7 +1040,7 @@ struct db_rdma_dpm_params {
 #define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT     28
 #define DB_RDMA_DPM_PARAMS_S_FLG_MASK               0x1 /* RoCE S flag */
 #define DB_RDMA_DPM_PARAMS_S_FLG_SHIFT              29
-#define DB_RDMA_DPM_PARAMS_RESERVED1_MASK           0x3
+#define DB_RDMA_DPM_PARAMS_RESERVED1_MASK           0x1
 #define DB_RDMA_DPM_PARAMS_RESERVED1_SHIFT          30
 /* Connection type is iWARP */
 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_MASK  0x1
@@ -1072,9 +1104,9 @@ enum igu_seg_access {
  * to the last-ethertype)
  */
 enum l3_type {
-	e_l3Type_unknown,
-	e_l3Type_ipv4,
-	e_l3Type_ipv6,
+	e_l3_type_unknown,
+	e_l3_type_ipv4,
+	e_l3_type_ipv6,
 	MAX_L3_TYPE
 };
 
@@ -1085,9 +1117,9 @@ enum l3_type {
  * first fragment, the protocol-type should be set to none.
  */
 enum l4_protocol {
-	e_l4Protocol_none,
-	e_l4Protocol_tcp,
-	e_l4Protocol_udp,
+	e_l4_protocol_none,
+	e_l4_protocol_tcp,
+	e_l4_protocol_udp,
 	MAX_L4_PROTOCOL
 };
 
@@ -1311,260 +1343,230 @@ struct pxp_vf_zone_a_permission {
  * Rdif context
  */
 struct rdif_task_context {
-	__le32 initialRefTag;
-	__le16 appTagValue;
-	__le16 appTagMask;
+	__le32 initial_ref_tag;
+	__le16 app_tag_value;
+	__le16 app_tag_mask;
 	u8 flags0;
-#define RDIF_TASK_CONTEXT_IGNOREAPPTAG_MASK            0x1
-#define RDIF_TASK_CONTEXT_IGNOREAPPTAG_SHIFT           0
-#define RDIF_TASK_CONTEXT_INITIALREFTAGVALID_MASK      0x1
-#define RDIF_TASK_CONTEXT_INITIALREFTAGVALID_SHIFT     1
+#define RDIF_TASK_CONTEXT_IGNORE_APP_TAG_MASK             0x1
+#define RDIF_TASK_CONTEXT_IGNORE_APP_TAG_SHIFT            0
+#define RDIF_TASK_CONTEXT_INITIAL_REF_TAG_VALID_MASK      0x1
+#define RDIF_TASK_CONTEXT_INITIAL_REF_TAG_VALID_SHIFT     1
 /* 0 = IP checksum, 1 = CRC */
-#define RDIF_TASK_CONTEXT_HOSTGUARDTYPE_MASK           0x1
-#define RDIF_TASK_CONTEXT_HOSTGUARDTYPE_SHIFT          2
-#define RDIF_TASK_CONTEXT_SETERRORWITHEOP_MASK         0x1
-#define RDIF_TASK_CONTEXT_SETERRORWITHEOP_SHIFT        3
+#define RDIF_TASK_CONTEXT_HOST_GUARD_TYPE_MASK            0x1
+#define RDIF_TASK_CONTEXT_HOST_GUARD_TYPE_SHIFT           2
+#define RDIF_TASK_CONTEXT_SET_ERROR_WITH_EOP_MASK         0x1
+#define RDIF_TASK_CONTEXT_SET_ERROR_WITH_EOP_SHIFT        3
 /* 1/2/3 - Protection Type */
-#define RDIF_TASK_CONTEXT_PROTECTIONTYPE_MASK          0x3
-#define RDIF_TASK_CONTEXT_PROTECTIONTYPE_SHIFT         4
+#define RDIF_TASK_CONTEXT_PROTECTION_TYPE_MASK            0x3
+#define RDIF_TASK_CONTEXT_PROTECTION_TYPE_SHIFT           4
 /* 0=0x0000, 1=0xffff */
-#define RDIF_TASK_CONTEXT_CRC_SEED_MASK                0x1
-#define RDIF_TASK_CONTEXT_CRC_SEED_SHIFT               6
+#define RDIF_TASK_CONTEXT_CRC_SEED_MASK                   0x1
+#define RDIF_TASK_CONTEXT_CRC_SEED_SHIFT                  6
 /* Keep reference tag constant */
-#define RDIF_TASK_CONTEXT_KEEPREFTAGCONST_MASK         0x1
-#define RDIF_TASK_CONTEXT_KEEPREFTAGCONST_SHIFT        7
-	u8 partialDifData[7];
-	__le16 partialCrcValue;
-	__le16 partialChecksumValue;
-	__le32 offsetInIO;
+#define RDIF_TASK_CONTEXT_KEEP_REF_TAG_CONST_MASK         0x1
+#define RDIF_TASK_CONTEXT_KEEP_REF_TAG_CONST_SHIFT        7
+	u8 partial_dif_data[7];
+	__le16 partial_crc_value;
+	__le16 partial_checksum_value;
+	__le32 offset_in_io;
 	__le16 flags1;
-#define RDIF_TASK_CONTEXT_VALIDATEGUARD_MASK           0x1
-#define RDIF_TASK_CONTEXT_VALIDATEGUARD_SHIFT          0
-#define RDIF_TASK_CONTEXT_VALIDATEAPPTAG_MASK          0x1
-#define RDIF_TASK_CONTEXT_VALIDATEAPPTAG_SHIFT         1
-#define RDIF_TASK_CONTEXT_VALIDATEREFTAG_MASK          0x1
-#define RDIF_TASK_CONTEXT_VALIDATEREFTAG_SHIFT         2
-#define RDIF_TASK_CONTEXT_FORWARDGUARD_MASK            0x1
-#define RDIF_TASK_CONTEXT_FORWARDGUARD_SHIFT           3
-#define RDIF_TASK_CONTEXT_FORWARDAPPTAG_MASK           0x1
-#define RDIF_TASK_CONTEXT_FORWARDAPPTAG_SHIFT          4
-#define RDIF_TASK_CONTEXT_FORWARDREFTAG_MASK           0x1
-#define RDIF_TASK_CONTEXT_FORWARDREFTAG_SHIFT          5
+#define RDIF_TASK_CONTEXT_VALIDATE_GUARD_MASK             0x1
+#define RDIF_TASK_CONTEXT_VALIDATE_GUARD_SHIFT            0
+#define RDIF_TASK_CONTEXT_VALIDATE_APP_TAG_MASK           0x1
+#define RDIF_TASK_CONTEXT_VALIDATE_APP_TAG_SHIFT          1
+#define RDIF_TASK_CONTEXT_VALIDATE_REF_TAG_MASK           0x1
+#define RDIF_TASK_CONTEXT_VALIDATE_REF_TAG_SHIFT          2
+#define RDIF_TASK_CONTEXT_FORWARD_GUARD_MASK              0x1
+#define RDIF_TASK_CONTEXT_FORWARD_GUARD_SHIFT             3
+#define RDIF_TASK_CONTEXT_FORWARD_APP_TAG_MASK            0x1
+#define RDIF_TASK_CONTEXT_FORWARD_APP_TAG_SHIFT           4
+#define RDIF_TASK_CONTEXT_FORWARD_REF_TAG_MASK            0x1
+#define RDIF_TASK_CONTEXT_FORWARD_REF_TAG_SHIFT           5
 /* 0=512B, 1=1KB, 2=2KB, 3=4KB, 4=8KB */
-#define RDIF_TASK_CONTEXT_INTERVALSIZE_MASK            0x7
-#define RDIF_TASK_CONTEXT_INTERVALSIZE_SHIFT           6
+#define RDIF_TASK_CONTEXT_INTERVAL_SIZE_MASK              0x7
+#define RDIF_TASK_CONTEXT_INTERVAL_SIZE_SHIFT             6
 /* 0=None, 1=DIF, 2=DIX */
-#define RDIF_TASK_CONTEXT_HOSTINTERFACE_MASK           0x3
-#define RDIF_TASK_CONTEXT_HOSTINTERFACE_SHIFT          9
+#define RDIF_TASK_CONTEXT_HOST_INTERFACE_MASK             0x3
+#define RDIF_TASK_CONTEXT_HOST_INTERFACE_SHIFT            9
 /* DIF tag right at the beginning of DIF interval */
-#define RDIF_TASK_CONTEXT_DIFBEFOREDATA_MASK           0x1
-#define RDIF_TASK_CONTEXT_DIFBEFOREDATA_SHIFT          11
-#define RDIF_TASK_CONTEXT_RESERVED0_MASK               0x1
-#define RDIF_TASK_CONTEXT_RESERVED0_SHIFT              12
+#define RDIF_TASK_CONTEXT_DIF_BEFORE_DATA_MASK            0x1
+#define RDIF_TASK_CONTEXT_DIF_BEFORE_DATA_SHIFT           11
+#define RDIF_TASK_CONTEXT_RESERVED0_MASK                  0x1
+#define RDIF_TASK_CONTEXT_RESERVED0_SHIFT                 12
 /* 0=None, 1=DIF */
-#define RDIF_TASK_CONTEXT_NETWORKINTERFACE_MASK        0x1
-#define RDIF_TASK_CONTEXT_NETWORKINTERFACE_SHIFT       13
+#define RDIF_TASK_CONTEXT_NETWORK_INTERFACE_MASK          0x1
+#define RDIF_TASK_CONTEXT_NETWORK_INTERFACE_SHIFT         13
 /* Forward application tag with mask */
-#define RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_MASK   0x1
-#define RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_SHIFT  14
+#define RDIF_TASK_CONTEXT_FORWARD_APP_TAG_WITH_MASK_MASK  0x1
+#define RDIF_TASK_CONTEXT_FORWARD_APP_TAG_WITH_MASK_SHIFT 14
 /* Forward reference tag with mask */
-#define RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_MASK   0x1
-#define RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_SHIFT  15
+#define RDIF_TASK_CONTEXT_FORWARD_REF_TAG_WITH_MASK_MASK  0x1
+#define RDIF_TASK_CONTEXT_FORWARD_REF_TAG_WITH_MASK_SHIFT 15
 	__le16 state;
-#define RDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFT_MASK    0xF
-#define RDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFT_SHIFT   0
-#define RDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFT_MASK  0xF
-#define RDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFT_SHIFT 4
-#define RDIF_TASK_CONTEXT_ERRORINIO_MASK               0x1
-#define RDIF_TASK_CONTEXT_ERRORINIO_SHIFT              8
-#define RDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_MASK        0x1
-#define RDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_SHIFT       9
+#define RDIF_TASK_CONTEXT_RECEIVED_DIF_BYTES_LEFT_MASK    0xF
+#define RDIF_TASK_CONTEXT_RECEIVED_DIF_BYTES_LEFT_SHIFT   0
+#define RDIF_TASK_CONTEXT_TRANSMITED_DIF_BYTES_LEFT_MASK  0xF
+#define RDIF_TASK_CONTEXT_TRANSMITED_DIF_BYTES_LEFT_SHIFT 4
+#define RDIF_TASK_CONTEXT_ERROR_IN_IO_MASK                0x1
+#define RDIF_TASK_CONTEXT_ERROR_IN_IO_SHIFT               8
+#define RDIF_TASK_CONTEXT_CHECKSUM_OVERFLOW_MASK          0x1
+#define RDIF_TASK_CONTEXT_CHECKSUM_OVERFLOW_SHIFT         9
 /* mask for refernce tag handling */
-#define RDIF_TASK_CONTEXT_REFTAGMASK_MASK              0xF
-#define RDIF_TASK_CONTEXT_REFTAGMASK_SHIFT             10
-#define RDIF_TASK_CONTEXT_RESERVED1_MASK               0x3
-#define RDIF_TASK_CONTEXT_RESERVED1_SHIFT              14
+#define RDIF_TASK_CONTEXT_REF_TAG_MASK_MASK               0xF
+#define RDIF_TASK_CONTEXT_REF_TAG_MASK_SHIFT              10
+#define RDIF_TASK_CONTEXT_RESERVED1_MASK                  0x3
+#define RDIF_TASK_CONTEXT_RESERVED1_SHIFT                 14
 	__le32 reserved2;
 };
 
-/* RSS hash type */
+/*
+ * RSS hash type
+ */
 enum rss_hash_type {
-	RSS_HASH_TYPE_DEFAULT	= 0,
-	RSS_HASH_TYPE_IPV4	= 1,
-	RSS_HASH_TYPE_TCP_IPV4	= 2,
-	RSS_HASH_TYPE_IPV6	= 3,
-	RSS_HASH_TYPE_TCP_IPV6	= 4,
-	RSS_HASH_TYPE_UDP_IPV4	= 5,
-	RSS_HASH_TYPE_UDP_IPV6	= 6,
+	RSS_HASH_TYPE_DEFAULT = 0,
+	RSS_HASH_TYPE_IPV4 = 1,
+	RSS_HASH_TYPE_TCP_IPV4 = 2,
+	RSS_HASH_TYPE_IPV6 = 3,
+	RSS_HASH_TYPE_TCP_IPV6 = 4,
+	RSS_HASH_TYPE_UDP_IPV4 = 5,
+	RSS_HASH_TYPE_UDP_IPV6 = 6,
 	MAX_RSS_HASH_TYPE
 };
 
-/* status block structure */
-struct status_block {
-	__le16	pi_array[PIS_PER_SB];
-	__le32	sb_num;
-#define STATUS_BLOCK_SB_NUM_MASK      0x1FF
-#define STATUS_BLOCK_SB_NUM_SHIFT     0
-#define STATUS_BLOCK_ZERO_PAD_MASK    0x7F
-#define STATUS_BLOCK_ZERO_PAD_SHIFT   9
-#define STATUS_BLOCK_ZERO_PAD2_MASK   0xFFFF
-#define STATUS_BLOCK_ZERO_PAD2_SHIFT  16
+/*
+ * status block structure
+ */
+struct status_block_e4 {
+	__le16 pi_array[PIS_PER_SB_E4];
+	__le32 sb_num;
+#define STATUS_BLOCK_E4_SB_NUM_MASK      0x1FF
+#define STATUS_BLOCK_E4_SB_NUM_SHIFT     0
+#define STATUS_BLOCK_E4_ZERO_PAD_MASK    0x7F
+#define STATUS_BLOCK_E4_ZERO_PAD_SHIFT   9
+#define STATUS_BLOCK_E4_ZERO_PAD2_MASK   0xFFFF
+#define STATUS_BLOCK_E4_ZERO_PAD2_SHIFT  16
 	__le32 prod_index;
-#define STATUS_BLOCK_PROD_INDEX_MASK  0xFFFFFF
-#define STATUS_BLOCK_PROD_INDEX_SHIFT 0
-#define STATUS_BLOCK_ZERO_PAD3_MASK   0xFF
-#define STATUS_BLOCK_ZERO_PAD3_SHIFT  24
+#define STATUS_BLOCK_E4_PROD_INDEX_MASK  0xFFFFFF
+#define STATUS_BLOCK_E4_PROD_INDEX_SHIFT 0
+#define STATUS_BLOCK_E4_ZERO_PAD3_MASK   0xFF
+#define STATUS_BLOCK_E4_ZERO_PAD3_SHIFT  24
 };
 
 
-/* VF BAR */
-#define PXP_VF_BAR0 0
-
-#define PXP_VF_BAR0_START_GRC		0x3E00
-#define PXP_VF_BAR0_GRC_LENGTH		0x200
-#define PXP_VF_BAR0_END_GRC \
-(PXP_VF_BAR0_START_GRC + PXP_VF_BAR0_GRC_LENGTH - 1)
-
-#define PXP_VF_BAR0_START_IGU		0
-#define PXP_VF_BAR0_IGU_LENGTH		0x3000
-#define PXP_VF_BAR0_END_IGU \
-(PXP_VF_BAR0_START_IGU + PXP_VF_BAR0_IGU_LENGTH - 1)
-
-#define PXP_VF_BAR0_START_DQ		0x3000
-#define PXP_VF_BAR0_DQ_LENGTH		0x200
-#define PXP_VF_BAR0_DQ_OPAQUE_OFFSET    0
-#define PXP_VF_BAR0_ME_OPAQUE_ADDRESS \
-(PXP_VF_BAR0_START_DQ + PXP_VF_BAR0_DQ_OPAQUE_OFFSET)
-#define PXP_VF_BAR0_ME_CONCRETE_ADDRESS \
-(PXP_VF_BAR0_ME_OPAQUE_ADDRESS + 4)
-#define PXP_VF_BAR0_END_DQ \
-(PXP_VF_BAR0_START_DQ + PXP_VF_BAR0_DQ_LENGTH - 1)
-
-#define PXP_VF_BAR0_START_TSDM_ZONE_B   0x3200
-#define PXP_VF_BAR0_SDM_LENGTH_ZONE_B   0x200
-#define PXP_VF_BAR0_END_TSDM_ZONE_B \
-(PXP_VF_BAR0_START_TSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
-
-#define PXP_VF_BAR0_START_MSDM_ZONE_B   0x3400
-#define PXP_VF_BAR0_END_MSDM_ZONE_B \
-(PXP_VF_BAR0_START_MSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
-
-#define PXP_VF_BAR0_START_USDM_ZONE_B   0x3600
-#define PXP_VF_BAR0_END_USDM_ZONE_B \
-(PXP_VF_BAR0_START_USDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
-
-#define PXP_VF_BAR0_START_XSDM_ZONE_B   0x3800
-#define PXP_VF_BAR0_END_XSDM_ZONE_B \
-(PXP_VF_BAR0_START_XSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
-
-#define PXP_VF_BAR0_START_YSDM_ZONE_B   0x3a00
-#define PXP_VF_BAR0_END_YSDM_ZONE_B \
-(PXP_VF_BAR0_START_YSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
-
-#define PXP_VF_BAR0_START_PSDM_ZONE_B   0x3c00
-#define PXP_VF_BAR0_END_PSDM_ZONE_B \
-(PXP_VF_BAR0_START_PSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
-
-#define PXP_VF_BAR0_START_SDM_ZONE_A    0x4000
-#define PXP_VF_BAR0_END_SDM_ZONE_A      0x10000
-
-#define PXP_VF_BAR0_GRC_WINDOW_LENGTH   32
+/*
+ * status block structure
+ */
+struct status_block_e5 {
+	__le16 pi_array[PIS_PER_SB_E5];
+	__le32 sb_num;
+#define STATUS_BLOCK_E5_SB_NUM_MASK      0x1FF
+#define STATUS_BLOCK_E5_SB_NUM_SHIFT     0
+#define STATUS_BLOCK_E5_ZERO_PAD_MASK    0x7F
+#define STATUS_BLOCK_E5_ZERO_PAD_SHIFT   9
+#define STATUS_BLOCK_E5_ZERO_PAD2_MASK   0xFFFF
+#define STATUS_BLOCK_E5_ZERO_PAD2_SHIFT  16
+	__le32 prod_index;
+#define STATUS_BLOCK_E5_PROD_INDEX_MASK  0xFFFFFF
+#define STATUS_BLOCK_E5_PROD_INDEX_SHIFT 0
+#define STATUS_BLOCK_E5_ZERO_PAD3_MASK   0xFF
+#define STATUS_BLOCK_E5_ZERO_PAD3_SHIFT  24
+};
+
 
 /*
  * Tdif context
  */
 struct tdif_task_context {
-	__le32 initialRefTag;
-	__le16 appTagValue;
-	__le16 appTagMask;
-	__le16 partialCrcValueB;
-	__le16 partialChecksumValueB;
+	__le32 initial_ref_tag;
+	__le16 app_tag_value;
+	__le16 app_tag_mask;
+	__le16 partial_crc_value_b;
+	__le16 partial_checksum_value_b;
 	__le16 stateB;
-#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTB_MASK    0xF
-#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTB_SHIFT   0
-#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTB_MASK  0xF
-#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTB_SHIFT 4
-#define TDIF_TASK_CONTEXT_ERRORINIOB_MASK               0x1
-#define TDIF_TASK_CONTEXT_ERRORINIOB_SHIFT              8
-#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_MASK         0x1
-#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_SHIFT        9
-#define TDIF_TASK_CONTEXT_RESERVED0_MASK                0x3F
-#define TDIF_TASK_CONTEXT_RESERVED0_SHIFT               10
+#define TDIF_TASK_CONTEXT_RECEIVED_DIF_BYTES_LEFT_B_MASK    0xF
+#define TDIF_TASK_CONTEXT_RECEIVED_DIF_BYTES_LEFT_B_SHIFT   0
+#define TDIF_TASK_CONTEXT_TRANSMITED_DIF_BYTES_LEFT_B_MASK  0xF
+#define TDIF_TASK_CONTEXT_TRANSMITED_DIF_BYTES_LEFT_B_SHIFT 4
+#define TDIF_TASK_CONTEXT_ERROR_IN_IO_B_MASK                0x1
+#define TDIF_TASK_CONTEXT_ERROR_IN_IO_B_SHIFT               8
+#define TDIF_TASK_CONTEXT_CHECKSUM_VERFLOW_MASK             0x1
+#define TDIF_TASK_CONTEXT_CHECKSUM_VERFLOW_SHIFT            9
+#define TDIF_TASK_CONTEXT_RESERVED0_MASK                    0x3F
+#define TDIF_TASK_CONTEXT_RESERVED0_SHIFT                   10
 	u8 reserved1;
 	u8 flags0;
-#define TDIF_TASK_CONTEXT_IGNOREAPPTAG_MASK             0x1
-#define TDIF_TASK_CONTEXT_IGNOREAPPTAG_SHIFT            0
-#define TDIF_TASK_CONTEXT_INITIALREFTAGVALID_MASK       0x1
-#define TDIF_TASK_CONTEXT_INITIALREFTAGVALID_SHIFT      1
+#define TDIF_TASK_CONTEXT_IGNORE_APP_TAG_MASK               0x1
+#define TDIF_TASK_CONTEXT_IGNORE_APP_TAG_SHIFT              0
+#define TDIF_TASK_CONTEXT_INITIAL_REF_TAG_VALID_MASK        0x1
+#define TDIF_TASK_CONTEXT_INITIAL_REF_TAG_VALID_SHIFT       1
 /* 0 = IP checksum, 1 = CRC */
-#define TDIF_TASK_CONTEXT_HOSTGUARDTYPE_MASK            0x1
-#define TDIF_TASK_CONTEXT_HOSTGUARDTYPE_SHIFT           2
-#define TDIF_TASK_CONTEXT_SETERRORWITHEOP_MASK          0x1
-#define TDIF_TASK_CONTEXT_SETERRORWITHEOP_SHIFT         3
+#define TDIF_TASK_CONTEXT_HOST_GUARD_TYPE_MASK              0x1
+#define TDIF_TASK_CONTEXT_HOST_GUARD_TYPE_SHIFT             2
+#define TDIF_TASK_CONTEXT_SET_ERROR_WITH_EOP_MASK           0x1
+#define TDIF_TASK_CONTEXT_SET_ERROR_WITH_EOP_SHIFT          3
 /* 1/2/3 - Protection Type */
-#define TDIF_TASK_CONTEXT_PROTECTIONTYPE_MASK           0x3
-#define TDIF_TASK_CONTEXT_PROTECTIONTYPE_SHIFT          4
+#define TDIF_TASK_CONTEXT_PROTECTION_TYPE_MASK              0x3
+#define TDIF_TASK_CONTEXT_PROTECTION_TYPE_SHIFT             4
 /* 0=0x0000, 1=0xffff */
-#define TDIF_TASK_CONTEXT_CRC_SEED_MASK                 0x1
-#define TDIF_TASK_CONTEXT_CRC_SEED_SHIFT                6
-#define TDIF_TASK_CONTEXT_RESERVED2_MASK                0x1
-#define TDIF_TASK_CONTEXT_RESERVED2_SHIFT               7
+#define TDIF_TASK_CONTEXT_CRC_SEED_MASK                     0x1
+#define TDIF_TASK_CONTEXT_CRC_SEED_SHIFT                    6
+#define TDIF_TASK_CONTEXT_RESERVED2_MASK                    0x1
+#define TDIF_TASK_CONTEXT_RESERVED2_SHIFT                   7
 	__le32 flags1;
-#define TDIF_TASK_CONTEXT_VALIDATEGUARD_MASK            0x1
-#define TDIF_TASK_CONTEXT_VALIDATEGUARD_SHIFT           0
-#define TDIF_TASK_CONTEXT_VALIDATEAPPTAG_MASK           0x1
-#define TDIF_TASK_CONTEXT_VALIDATEAPPTAG_SHIFT          1
-#define TDIF_TASK_CONTEXT_VALIDATEREFTAG_MASK           0x1
-#define TDIF_TASK_CONTEXT_VALIDATEREFTAG_SHIFT          2
-#define TDIF_TASK_CONTEXT_FORWARDGUARD_MASK             0x1
-#define TDIF_TASK_CONTEXT_FORWARDGUARD_SHIFT            3
-#define TDIF_TASK_CONTEXT_FORWARDAPPTAG_MASK            0x1
-#define TDIF_TASK_CONTEXT_FORWARDAPPTAG_SHIFT           4
-#define TDIF_TASK_CONTEXT_FORWARDREFTAG_MASK            0x1
-#define TDIF_TASK_CONTEXT_FORWARDREFTAG_SHIFT           5
+#define TDIF_TASK_CONTEXT_VALIDATE_GUARD_MASK               0x1
+#define TDIF_TASK_CONTEXT_VALIDATE_GUARD_SHIFT              0
+#define TDIF_TASK_CONTEXT_VALIDATE_APP_TAG_MASK             0x1
+#define TDIF_TASK_CONTEXT_VALIDATE_APP_TAG_SHIFT            1
+#define TDIF_TASK_CONTEXT_VALIDATE_REF_TAG_MASK             0x1
+#define TDIF_TASK_CONTEXT_VALIDATE_REF_TAG_SHIFT            2
+#define TDIF_TASK_CONTEXT_FORWARD_GUARD_MASK                0x1
+#define TDIF_TASK_CONTEXT_FORWARD_GUARD_SHIFT               3
+#define TDIF_TASK_CONTEXT_FORWARD_APP_TAG_MASK              0x1
+#define TDIF_TASK_CONTEXT_FORWARD_APP_TAG_SHIFT             4
+#define TDIF_TASK_CONTEXT_FORWARD_REF_TAG_MASK              0x1
+#define TDIF_TASK_CONTEXT_FORWARD_REF_TAG_SHIFT             5
 /* 0=512B, 1=1KB, 2=2KB, 3=4KB, 4=8KB */
-#define TDIF_TASK_CONTEXT_INTERVALSIZE_MASK             0x7
-#define TDIF_TASK_CONTEXT_INTERVALSIZE_SHIFT            6
+#define TDIF_TASK_CONTEXT_INTERVAL_SIZE_MASK                0x7
+#define TDIF_TASK_CONTEXT_INTERVAL_SIZE_SHIFT               6
 /* 0=None, 1=DIF, 2=DIX */
-#define TDIF_TASK_CONTEXT_HOSTINTERFACE_MASK            0x3
-#define TDIF_TASK_CONTEXT_HOSTINTERFACE_SHIFT           9
+#define TDIF_TASK_CONTEXT_HOST_INTERFACE_MASK               0x3
+#define TDIF_TASK_CONTEXT_HOST_INTERFACE_SHIFT              9
 /* DIF tag right at the beginning of DIF interval */
-#define TDIF_TASK_CONTEXT_DIFBEFOREDATA_MASK            0x1
-#define TDIF_TASK_CONTEXT_DIFBEFOREDATA_SHIFT           11
-/* reserved */
-#define TDIF_TASK_CONTEXT_RESERVED3_MASK                0x1
-#define TDIF_TASK_CONTEXT_RESERVED3_SHIFT               12
+#define TDIF_TASK_CONTEXT_DIF_BEFORE_DATA_MASK              0x1
+#define TDIF_TASK_CONTEXT_DIF_BEFORE_DATA_SHIFT             11
+#define TDIF_TASK_CONTEXT_RESERVED3_MASK                    0x1 /* reserved */
+#define TDIF_TASK_CONTEXT_RESERVED3_SHIFT                   12
 /* 0=None, 1=DIF */
-#define TDIF_TASK_CONTEXT_NETWORKINTERFACE_MASK         0x1
-#define TDIF_TASK_CONTEXT_NETWORKINTERFACE_SHIFT        13
-#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTA_MASK    0xF
-#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTA_SHIFT   14
-#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTA_MASK  0xF
-#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTA_SHIFT 18
-#define TDIF_TASK_CONTEXT_ERRORINIOA_MASK               0x1
-#define TDIF_TASK_CONTEXT_ERRORINIOA_SHIFT              22
-#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOWA_MASK        0x1
-#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOWA_SHIFT       23
+#define TDIF_TASK_CONTEXT_NETWORK_INTERFACE_MASK            0x1
+#define TDIF_TASK_CONTEXT_NETWORK_INTERFACE_SHIFT           13
+#define TDIF_TASK_CONTEXT_RECEIVED_DIF_BYTES_LEFT_A_MASK    0xF
+#define TDIF_TASK_CONTEXT_RECEIVED_DIF_BYTES_LEFT_A_SHIFT   14
+#define TDIF_TASK_CONTEXT_TRANSMITED_DIF_BYTES_LEFT_A_MASK  0xF
+#define TDIF_TASK_CONTEXT_TRANSMITED_DIF_BYTES_LEFT_A_SHIFT 18
+#define TDIF_TASK_CONTEXT_ERROR_IN_IO_A_MASK                0x1
+#define TDIF_TASK_CONTEXT_ERROR_IN_IO_A_SHIFT               22
+#define TDIF_TASK_CONTEXT_CHECKSUM_OVERFLOW_A_MASK          0x1
+#define TDIF_TASK_CONTEXT_CHECKSUM_OVERFLOW_A_SHIFT         23
 /* mask for refernce tag handling */
-#define TDIF_TASK_CONTEXT_REFTAGMASK_MASK               0xF
-#define TDIF_TASK_CONTEXT_REFTAGMASK_SHIFT              24
+#define TDIF_TASK_CONTEXT_REF_TAG_MASK_MASK                 0xF
+#define TDIF_TASK_CONTEXT_REF_TAG_MASK_SHIFT                24
 /* Forward application tag with mask */
-#define TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_MASK    0x1
-#define TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_SHIFT   28
+#define TDIF_TASK_CONTEXT_FORWARD_APP_TAG_WITH_MASK_MASK    0x1
+#define TDIF_TASK_CONTEXT_FORWARD_APP_TAG_WITH_MASK_SHIFT   28
 /* Forward reference tag with mask */
-#define TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_MASK    0x1
-#define TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_SHIFT   29
+#define TDIF_TASK_CONTEXT_FORWARD_REF_TAG_WITH_MASK_MASK    0x1
+#define TDIF_TASK_CONTEXT_FORWARD_REF_TAG_WITH_MASK_SHIFT   29
 /* Keep reference tag constant */
-#define TDIF_TASK_CONTEXT_KEEPREFTAGCONST_MASK          0x1
-#define TDIF_TASK_CONTEXT_KEEPREFTAGCONST_SHIFT         30
-#define TDIF_TASK_CONTEXT_RESERVED4_MASK                0x1
-#define TDIF_TASK_CONTEXT_RESERVED4_SHIFT               31
-	__le32 offsetInIOB;
-	__le16 partialCrcValueA;
-	__le16 partialChecksumValueA;
-	__le32 offsetInIOA;
-	u8 partialDifDataA[8];
-	u8 partialDifDataB[8];
+#define TDIF_TASK_CONTEXT_KEEP_REF_TAG_CONST_MASK           0x1
+#define TDIF_TASK_CONTEXT_KEEP_REF_TAG_CONST_SHIFT          30
+#define TDIF_TASK_CONTEXT_RESERVED4_MASK                    0x1
+#define TDIF_TASK_CONTEXT_RESERVED4_SHIFT                   31
+	__le32 offset_in_io_b;
+	__le16 partial_crc_value_a;
+	__le16 partial_checksum_value_a;
+	__le32 offset_in_io_a;
+	u8 partial_dif_data_a[8];
+	u8 partial_dif_data_b[8];
 };
 
 
diff --git a/drivers/net/qede/base/ecore.h b/drivers/net/qede/base/ecore.h
index 0d68a9bc..ce5f3a90 100644
--- a/drivers/net/qede/base/ecore.h
+++ b/drivers/net/qede/base/ecore.h
@@ -29,9 +29,9 @@
 #include "mcp_public.h"
 
 #define ECORE_MAJOR_VERSION		8
-#define ECORE_MINOR_VERSION		18
-#define ECORE_REVISION_VERSION		7
-#define ECORE_ENGINEERING_VERSION	1
+#define ECORE_MINOR_VERSION		30
+#define ECORE_REVISION_VERSION		8
+#define ECORE_ENGINEERING_VERSION	0
 
 #define ECORE_VERSION							\
 	((ECORE_MAJOR_VERSION << 24) | (ECORE_MINOR_VERSION << 16) |	\
@@ -50,6 +50,7 @@
 #define FCOE_BDQ_ID(_port_id) (_port_id + 2)
 /* Constants */
 #define ECORE_WID_SIZE		(1024)
+#define ECORE_MIN_WIDS		(4)
 
 /* Configurable */
 #define ECORE_PF_DEMS_SIZE	(4)
@@ -66,6 +67,7 @@ enum ecore_nvm_cmd {
 	ECORE_NVM_READ_NVRAM = DRV_MSG_CODE_NVM_READ_NVRAM,
 	ECORE_NVM_WRITE_NVRAM = DRV_MSG_CODE_NVM_WRITE_NVRAM,
 	ECORE_NVM_DEL_FILE = DRV_MSG_CODE_NVM_DEL_FILE,
+	ECORE_EXT_PHY_FW_UPGRADE = DRV_MSG_CODE_EXT_PHY_FW_UPGRADE,
 	ECORE_NVM_SET_SECURE_MODE = DRV_MSG_CODE_SET_SECURE_MODE,
 	ECORE_PHY_RAW_READ = DRV_MSG_CODE_PHY_RAW_READ,
 	ECORE_PHY_RAW_WRITE = DRV_MSG_CODE_PHY_RAW_WRITE,
@@ -97,16 +99,16 @@ do {									\
 
 #define GET_FIELD(value, name)						\
 	(((value) >> (name##_SHIFT)) & name##_MASK)
-#endif
 
-#define ECORE_MFW_GET_FIELD(name, field)				\
-	(((name) & (field ## _MASK)) >> (field ## _SHIFT))
+#define GET_MFW_FIELD(name, field)				\
+	(((name) & (field ## _MASK)) >> (field ## _OFFSET))
 
-#define ECORE_MFW_SET_FIELD(name, field, value)				\
+#define SET_MFW_FIELD(name, field, value)				\
 do {									\
-	(name) &= ~(field ## _MASK);					\
-	(name) |= (((value) << (field ## _SHIFT)) & (field ## _MASK));	\
+	(name) &= ~((field ## _MASK));		\
+	(name) |= (((value) << (field ## _OFFSET)) & (field ## _MASK));	\
 } while (0)
+#endif
 
 static OSAL_INLINE u32 DB_ADDR(u32 cid, u32 DEMS)
 {
@@ -279,7 +281,6 @@ struct ecore_qm_iids {
  * is received from MFW.
  */
 enum ecore_resources {
-	ECORE_SB,
 	ECORE_L2_QUEUE,
 	ECORE_VPORT,
 	ECORE_RSS_ENG,
@@ -293,7 +294,13 @@ enum ecore_resources {
 	ECORE_CMDQS_CQS,
 	ECORE_RDMA_STATS_QUEUE,
 	ECORE_BDQ,
-	ECORE_MAX_RESC,			/* must be last */
+
+	/* This is needed only internally for matching against the IGU.
+	 * In case of legacy MFW, would be set to `0'.
+	 */
+	ECORE_SB,
+
+	ECORE_MAX_RESC,
 };
 
 /* Features that require resources, given as input to the resource management
@@ -345,22 +352,32 @@ enum ecore_hw_err_type {
 };
 #endif
 
+enum ecore_db_rec_exec {
+	DB_REC_DRY_RUN,
+	DB_REC_REAL_DEAL,
+	DB_REC_ONCE,
+};
+
 struct ecore_hw_info {
 	/* PCI personality */
 	enum ecore_pci_personality personality;
-#define ECORE_IS_RDMA_PERSONALITY(dev)			    \
-	((dev)->hw_info.personality == ECORE_PCI_ETH_ROCE ||  \
+#define ECORE_IS_RDMA_PERSONALITY(dev) \
+	((dev)->hw_info.personality == ECORE_PCI_ETH_ROCE || \
 	 (dev)->hw_info.personality == ECORE_PCI_ETH_IWARP || \
 	 (dev)->hw_info.personality == ECORE_PCI_ETH_RDMA)
-#define ECORE_IS_ROCE_PERSONALITY(dev)			   \
+#define ECORE_IS_ROCE_PERSONALITY(dev) \
 	((dev)->hw_info.personality == ECORE_PCI_ETH_ROCE || \
 	 (dev)->hw_info.personality == ECORE_PCI_ETH_RDMA)
-#define ECORE_IS_IWARP_PERSONALITY(dev)			    \
+#define ECORE_IS_IWARP_PERSONALITY(dev) \
 	((dev)->hw_info.personality == ECORE_PCI_ETH_IWARP || \
 	 (dev)->hw_info.personality == ECORE_PCI_ETH_RDMA)
-#define ECORE_IS_L2_PERSONALITY(dev)		      \
+#define ECORE_IS_L2_PERSONALITY(dev) \
 	((dev)->hw_info.personality == ECORE_PCI_ETH || \
 	 ECORE_IS_RDMA_PERSONALITY(dev))
+#define ECORE_IS_FCOE_PERSONALITY(dev) \
+	((dev)->hw_info.personality == ECORE_PCI_FCOE)
+#define ECORE_IS_ISCSI_PERSONALITY(dev) \
+	((dev)->hw_info.personality == ECORE_PCI_ISCSI)
 
 	/* Resource Allocation scheme results */
 	u32 resc_start[ECORE_MAX_RESC];
@@ -473,6 +490,12 @@ struct ecore_qm_info {
 	u8			num_pf_rls;
 };
 
+struct ecore_db_recovery_info {
+	osal_list_t list;
+	osal_spinlock_t lock;
+	u32 db_recovery_counter;
+};
+
 struct storm_stats {
 	u32 address;
 	u32 len;
@@ -488,14 +511,60 @@ struct ecore_fw_data {
 	u32 init_ops_size;
 };
 
+enum ecore_mf_mode_bit {
+	/* Supports PF-classification based on tag */
+	ECORE_MF_OVLAN_CLSS,
+
+	/* Supports PF-classification based on MAC */
+	ECORE_MF_LLH_MAC_CLSS,
+
+	/* Supports PF-classification based on protocol type */
+	ECORE_MF_LLH_PROTO_CLSS,
+
+	/* Requires a default PF to be set */
+	ECORE_MF_NEED_DEF_PF,
+
+	/* Allow LL2 to multicast/broadcast */
+	ECORE_MF_LL2_NON_UNICAST,
+
+	/* Allow Cross-PF [& child VFs] Tx-switching */
+	ECORE_MF_INTER_PF_SWITCH,
+
+	/* TODO - if we ever re-utilize any of this logic, we can rename */
+	ECORE_MF_UFP_SPECIFIC,
+
+	ECORE_MF_DISABLE_ARFS,
+};
+
+enum ecore_ufp_mode {
+	ECORE_UFP_MODE_ETS,
+	ECORE_UFP_MODE_VNIC_BW,
+};
+
+enum ecore_ufp_pri_type {
+	ECORE_UFP_PRI_OS,
+	ECORE_UFP_PRI_VNIC
+};
+
+struct ecore_ufp_info {
+	enum ecore_ufp_pri_type pri_type;
+	enum ecore_ufp_mode mode;
+	u8 tc;
+};
+
+enum BAR_ID {
+	BAR_ID_0,	/* used for GRC */
+	BAR_ID_1	/* Used for doorbells */
+};
+
 struct ecore_hwfn {
 	struct ecore_dev		*p_dev;
 	u8				my_id;		/* ID inside the PF */
 #define IS_LEAD_HWFN(edev)		(!((edev)->my_id))
 	u8				rel_pf_id;	/* Relative to engine*/
 	u8				abs_pf_id;
-	#define ECORE_PATH_ID(_p_hwfn) \
-		(ECORE_IS_K2((_p_hwfn)->p_dev) ? 0 : ((_p_hwfn)->abs_pf_id & 1))
+#define ECORE_PATH_ID(_p_hwfn) \
+	(ECORE_IS_BB((_p_hwfn)->p_dev) ? ((_p_hwfn)->abs_pf_id & 1) : 0)
 	u8				port_id;
 	bool				b_active;
 
@@ -556,10 +625,6 @@ struct ecore_hwfn {
 	bool				b_rdma_enabled_in_prs;
 	u32				rdma_prs_search_reg;
 
-	/* Array of sb_info of all status blocks */
-	struct ecore_sb_info		*sbs_info[MAX_SB_PER_PF_MIMD];
-	u16				num_sbs;
-
 	struct ecore_cxt_mngr		*p_cxt_mngr;
 
 	/* Flag indicating whether interrupts are enabled or not*/
@@ -573,6 +638,7 @@ struct ecore_hwfn {
 	struct ecore_pf_iov		*pf_iov_info;
 	struct ecore_mcp_info		*mcp_info;
 	struct ecore_dcbx_info		*p_dcbx_info;
+	struct ecore_ufp_info		ufp_info;
 
 	struct ecore_dmae_info		dmae_info;
 
@@ -603,17 +669,19 @@ struct ecore_hwfn {
 	/* L2-related */
 	struct ecore_l2_info		*p_l2_info;
 
+	/* Mechanism for recovering from doorbell drop */
+	struct ecore_db_recovery_info	db_recovery_info;
+
 	/* @DPDK */
 	struct ecore_ptt		*p_arfs_ptt;
 };
 
-#ifndef __EXTRACT__LINUX__
 enum ecore_mf_mode {
 	ECORE_MF_DEFAULT,
 	ECORE_MF_OVLAN,
 	ECORE_MF_NPAR,
+	ECORE_MF_UFP,
 };
-#endif
 
 /* @DPDK */
 struct ecore_dbg_feature {
@@ -632,15 +700,18 @@ enum qed_dbg_features {
 	DBG_FEATURE_NUM
 };
 
+enum ecore_dev_type {
+	ECORE_DEV_TYPE_BB,
+	ECORE_DEV_TYPE_AH,
+};
+
 struct ecore_dev {
 	u32				dp_module;
 	u8				dp_level;
 	char				name[NAME_SIZE];
 	void				*dp_ctx;
 
-	u8				type;
-#define ECORE_DEV_TYPE_BB	(0 << 0)
-#define ECORE_DEV_TYPE_AH	(1 << 0)
+	enum ecore_dev_type		type;
 /* Translate type/revision combo into the proper conditions */
 #define ECORE_IS_BB(dev)	((dev)->type == ECORE_DEV_TYPE_BB)
 #define ECORE_IS_BB_A0(dev)	(ECORE_IS_BB(dev) && CHIP_REV_IS_A0(dev))
@@ -653,66 +724,68 @@ struct ecore_dev {
 #define ECORE_IS_AH(dev)	((dev)->type == ECORE_DEV_TYPE_AH)
 #define ECORE_IS_K2(dev)	ECORE_IS_AH(dev)
 
+	u16 vendor_id;
+	u16 device_id;
 #define ECORE_DEV_ID_MASK	0xff00
 #define ECORE_DEV_ID_MASK_BB	0x1600
 #define ECORE_DEV_ID_MASK_AH	0x8000
 
-	u16 vendor_id;
-	u16 device_id;
-
 	u16				chip_num;
-	#define CHIP_NUM_MASK			0xffff
-	#define CHIP_NUM_SHIFT			16
+#define CHIP_NUM_MASK			0xffff
+#define CHIP_NUM_SHIFT			0
 
-	u16				chip_rev;
-	#define CHIP_REV_MASK			0xf
-	#define CHIP_REV_SHIFT			12
+	u8				chip_rev;
+#define CHIP_REV_MASK			0xf
+#define CHIP_REV_SHIFT			0
 #ifndef ASIC_ONLY
-	#define CHIP_REV_IS_TEDIBEAR(_p_dev) ((_p_dev)->chip_rev == 0x5)
-	#define CHIP_REV_IS_EMUL_A0(_p_dev) ((_p_dev)->chip_rev == 0xe)
-	#define CHIP_REV_IS_EMUL_B0(_p_dev) ((_p_dev)->chip_rev == 0xc)
-	#define CHIP_REV_IS_EMUL(_p_dev) (CHIP_REV_IS_EMUL_A0(_p_dev) || \
-					  CHIP_REV_IS_EMUL_B0(_p_dev))
-	#define CHIP_REV_IS_FPGA_A0(_p_dev) ((_p_dev)->chip_rev == 0xf)
-	#define CHIP_REV_IS_FPGA_B0(_p_dev) ((_p_dev)->chip_rev == 0xd)
-	#define CHIP_REV_IS_FPGA(_p_dev) (CHIP_REV_IS_FPGA_A0(_p_dev) || \
-					  CHIP_REV_IS_FPGA_B0(_p_dev))
-	#define CHIP_REV_IS_SLOW(_p_dev) \
-		(CHIP_REV_IS_EMUL(_p_dev) || CHIP_REV_IS_FPGA(_p_dev))
-	#define CHIP_REV_IS_A0(_p_dev) \
-		(CHIP_REV_IS_EMUL_A0(_p_dev) || \
-		 CHIP_REV_IS_FPGA_A0(_p_dev) || \
-		 !(_p_dev)->chip_rev)
-	#define CHIP_REV_IS_B0(_p_dev) \
-		(CHIP_REV_IS_EMUL_B0(_p_dev) || \
-		 CHIP_REV_IS_FPGA_B0(_p_dev) || \
-		 (_p_dev)->chip_rev == 1)
-	#define CHIP_REV_IS_ASIC(_p_dev) !CHIP_REV_IS_SLOW(_p_dev)
+#define CHIP_REV_IS_TEDIBEAR(_p_dev)	((_p_dev)->chip_rev == 0x5)
+#define CHIP_REV_IS_EMUL_A0(_p_dev)	((_p_dev)->chip_rev == 0xe)
+#define CHIP_REV_IS_EMUL_B0(_p_dev)	((_p_dev)->chip_rev == 0xc)
+#define CHIP_REV_IS_EMUL(_p_dev) \
+	(CHIP_REV_IS_EMUL_A0(_p_dev) || CHIP_REV_IS_EMUL_B0(_p_dev))
+#define CHIP_REV_IS_FPGA_A0(_p_dev)	((_p_dev)->chip_rev == 0xf)
+#define CHIP_REV_IS_FPGA_B0(_p_dev)	((_p_dev)->chip_rev == 0xd)
+#define CHIP_REV_IS_FPGA(_p_dev) \
+	(CHIP_REV_IS_FPGA_A0(_p_dev) || CHIP_REV_IS_FPGA_B0(_p_dev))
+#define CHIP_REV_IS_SLOW(_p_dev) \
+	(CHIP_REV_IS_EMUL(_p_dev) || CHIP_REV_IS_FPGA(_p_dev))
+#define CHIP_REV_IS_A0(_p_dev) \
+	(CHIP_REV_IS_EMUL_A0(_p_dev) || CHIP_REV_IS_FPGA_A0(_p_dev) || \
+	 (!(_p_dev)->chip_rev && !(_p_dev)->chip_metal))
+#define CHIP_REV_IS_B0(_p_dev) \
+	(CHIP_REV_IS_EMUL_B0(_p_dev) || CHIP_REV_IS_FPGA_B0(_p_dev) || \
+	 ((_p_dev)->chip_rev == 1 && !(_p_dev)->chip_metal))
+#define CHIP_REV_IS_ASIC(_p_dev)	!CHIP_REV_IS_SLOW(_p_dev)
 #else
-	#define CHIP_REV_IS_A0(_p_dev)	(!(_p_dev)->chip_rev)
-	#define CHIP_REV_IS_B0(_p_dev)	((_p_dev)->chip_rev == 1)
+#define CHIP_REV_IS_A0(_p_dev) \
+	(!(_p_dev)->chip_rev && !(_p_dev)->chip_metal)
+#define CHIP_REV_IS_B0(_p_dev) \
+	((_p_dev)->chip_rev == 1 && !(_p_dev)->chip_metal)
 #endif
 
-	u16				chip_metal;
-	#define CHIP_METAL_MASK			0xff
-	#define CHIP_METAL_SHIFT		4
+	u8				chip_metal;
+#define CHIP_METAL_MASK			0xff
+#define CHIP_METAL_SHIFT		0
 
-	u16				chip_bond_id;
-	#define CHIP_BOND_ID_MASK		0xf
-	#define CHIP_BOND_ID_SHIFT		0
+	u8				chip_bond_id;
+#define CHIP_BOND_ID_MASK		0xff
+#define CHIP_BOND_ID_SHIFT		0
 
 	u8				num_engines;
-	u8				num_ports_in_engines;
+	u8				num_ports;
+	u8				num_ports_in_engine;
 	u8				num_funcs_in_port;
 
 	u8				path_id;
+
+	unsigned long			mf_bits;
 	enum ecore_mf_mode		mf_mode;
-	#define IS_MF_DEFAULT(_p_hwfn)	\
-			(((_p_hwfn)->p_dev)->mf_mode == ECORE_MF_DEFAULT)
-	#define IS_MF_SI(_p_hwfn)	\
-			(((_p_hwfn)->p_dev)->mf_mode == ECORE_MF_NPAR)
-	#define IS_MF_SD(_p_hwfn)	\
-			(((_p_hwfn)->p_dev)->mf_mode == ECORE_MF_OVLAN)
+#define IS_MF_DEFAULT(_p_hwfn)	\
+	(((_p_hwfn)->p_dev)->mf_mode == ECORE_MF_DEFAULT)
+#define IS_MF_SI(_p_hwfn)	\
+	(((_p_hwfn)->p_dev)->mf_mode == ECORE_MF_NPAR)
+#define IS_MF_SD(_p_hwfn)	\
+	(((_p_hwfn)->p_dev)->mf_mode == ECORE_MF_OVLAN)
 
 	int				pcie_width;
 	int				pcie_speed;
@@ -744,12 +817,14 @@ struct ecore_dev {
 	/* HW functions */
 	u8				num_hwfns;
 	struct ecore_hwfn		hwfns[MAX_HWFNS_PER_DEVICE];
+#define ECORE_IS_CMT(dev)		((dev)->num_hwfns > 1)
 
 	/* SRIOV */
 	struct ecore_hw_sriov_info	*p_iov_info;
 #define IS_ECORE_SRIOV(p_dev)		(!!(p_dev)->p_iov_info)
 	struct ecore_tunnel_info	tunnel;
 	bool				b_is_vf;
+	bool				b_dont_override_vf_msix;
 
 	u32				drv_type;
 
@@ -800,6 +875,8 @@ struct ecore_dev {
 #define NUM_OF_ENG_PFS(dev)	(ECORE_IS_BB(dev) ? MAX_NUM_PFS_BB \
 						  : MAX_NUM_PFS_K2)
 
+#define CRC8_TABLE_SIZE 256
+
 /**
  * @brief ecore_concrete_to_sw_fid - get the sw function id from
  *        the concrete value.
@@ -808,8 +885,7 @@ struct ecore_dev {
  *
  * @return OSAL_INLINE u8
  */
-static OSAL_INLINE u8
-ecore_concrete_to_sw_fid(__rte_unused struct ecore_dev *p_dev, u32 concrete_fid)
+static OSAL_INLINE u8 ecore_concrete_to_sw_fid(u32 concrete_fid)
 {
 	u8 vfid     = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_VFID);
 	u8 pfid     = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_PFID);
@@ -824,11 +900,12 @@ ecore_concrete_to_sw_fid(__rte_unused struct ecore_dev *p_dev, u32 concrete_fid)
 	return sw_fid;
 }
 
-#define PURE_LB_TC 8
 #define PKT_LB_TC 9
+#define MAX_NUM_VOQS_E4 20
 
 int ecore_configure_vport_wfq(struct ecore_dev *p_dev, u16 vp_id, u32 rate);
 void ecore_configure_vp_wfq_on_link_change(struct ecore_dev *p_dev,
+					   struct ecore_ptt *p_ptt,
 					   u32 min_pf_rate);
 
 int ecore_configure_pf_max_bandwidth(struct ecore_dev *p_dev, u8 max_bw);
@@ -854,6 +931,13 @@ u16 ecore_get_cm_pq_idx_mcos(struct ecore_hwfn *p_hwfn, u8 tc);
 u16 ecore_get_cm_pq_idx_vf(struct ecore_hwfn *p_hwfn, u16 vf);
 u16 ecore_get_cm_pq_idx_rl(struct ecore_hwfn *p_hwfn, u8 qpid);
 
+const char *ecore_hw_get_resc_name(enum ecore_resources res_id);
+
+/* doorbell recovery mechanism */
+void ecore_db_recovery_dp(struct ecore_hwfn *p_hwfn);
+void ecore_db_recovery_execute(struct ecore_hwfn *p_hwfn,
+			       enum ecore_db_rec_exec);
+
 /* amount of resources used in qm init */
 u8 ecore_init_qm_get_num_tcs(struct ecore_hwfn *p_hwfn);
 u16 ecore_init_qm_get_num_vfs(struct ecore_hwfn *p_hwfn);
@@ -863,6 +947,4 @@ u16 ecore_init_qm_get_num_pqs(struct ecore_hwfn *p_hwfn);
 
 #define ECORE_LEADING_HWFN(dev)	(&dev->hwfns[0])
 
-const char *ecore_hw_get_resc_name(enum ecore_resources res_id);
-
 #endif /* __ECORE_H */
diff --git a/drivers/net/qede/base/ecore_cxt.c b/drivers/net/qede/base/ecore_cxt.c
index 688118bb..50bd66da 100644
--- a/drivers/net/qede/base/ecore_cxt.c
+++ b/drivers/net/qede/base/ecore_cxt.c
@@ -41,10 +41,7 @@
 #define TM_ELEM_SIZE	4
 
 /* ILT constants */
-/* If for some reason, HW P size is modified to be less than 32K,
- * special handling needs to be made for CDU initialization
- */
-#define ILT_DEFAULT_HW_P_SIZE	3
+#define ILT_DEFAULT_HW_P_SIZE	4
 
 #define ILT_PAGE_IN_BYTES(hw_p_size)	(1U << ((hw_p_size) + 12))
 #define ILT_CFG_REG(cli, reg)		PSWRQ2_REG_##cli##_##reg##_RT_OFFSET
@@ -59,8 +56,8 @@
 
 /* connection context union */
 union conn_context {
-	struct core_conn_context core_ctx;
-	struct eth_conn_context eth_ctx;
+	struct e4_core_conn_context core_ctx;
+	struct e4_eth_conn_context eth_ctx;
 };
 
 /* TYPE-0 task context - iSCSI, FCOE */
@@ -69,6 +66,7 @@ union type0_task_context {
 
 /* TYPE-1 task context - ROCE */
 union type1_task_context {
+	struct regpair reserved; /* @DPDK */
 };
 
 struct src_ent {
@@ -230,13 +228,6 @@ struct ecore_cxt_mngr {
 	/* TODO - VF arfs filters ? */
 };
 
-/* check if resources/configuration is required according to protocol type */
-static OSAL_INLINE bool src_proto(struct ecore_hwfn *p_hwfn,
-				  enum protocol_type type)
-{
-	return type == PROTOCOLID_TOE;
-}
-
 static OSAL_INLINE bool tm_cid_proto(enum protocol_type type)
 {
 	return type == PROTOCOLID_TOE;
@@ -270,16 +261,12 @@ struct ecore_src_iids {
 	u32 per_vf_cids;
 };
 
-static OSAL_INLINE void ecore_cxt_src_iids(struct ecore_hwfn *p_hwfn,
-					   struct ecore_cxt_mngr *p_mngr,
-					   struct ecore_src_iids *iids)
+static void ecore_cxt_src_iids(struct ecore_cxt_mngr *p_mngr,
+			       struct ecore_src_iids *iids)
 {
 	u32 i;
 
 	for (i = 0; i < MAX_CONN_TYPES; i++) {
-		if (!src_proto(p_hwfn, i))
-			continue;
-
 		iids->pf_cids += p_mngr->conn_cfg[i].cid_count;
 		iids->per_vf_cids += p_mngr->conn_cfg[i].cids_per_vf;
 	}
@@ -297,8 +284,8 @@ struct ecore_tm_iids {
 	u32 per_vf_tids;
 };
 
-static OSAL_INLINE void ecore_cxt_tm_iids(struct ecore_cxt_mngr *p_mngr,
-					  struct ecore_tm_iids *iids)
+static void ecore_cxt_tm_iids(struct ecore_cxt_mngr *p_mngr,
+			      struct ecore_tm_iids *iids)
 {
 	bool tm_vf_required = false;
 	bool tm_required = false;
@@ -397,6 +384,20 @@ static struct ecore_tid_seg *ecore_cxt_tid_seg_info(struct ecore_hwfn *p_hwfn,
 	return OSAL_NULL;
 }
 
+static void ecore_cxt_set_srq_count(struct ecore_hwfn *p_hwfn, u32 num_srqs)
+{
+	struct ecore_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
+
+	p_mgr->srq_count = num_srqs;
+}
+
+u32 ecore_cxt_get_srq_count(struct ecore_hwfn *p_hwfn)
+{
+	struct ecore_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
+
+	return p_mgr->srq_count;
+}
+
 /* set the iids (cid/tid) count per protocol */
 static void ecore_cxt_set_proto_cid_count(struct ecore_hwfn *p_hwfn,
 				   enum protocol_type type,
@@ -687,7 +688,7 @@ enum _ecore_status_t ecore_cxt_cfg_ilt_compute(struct ecore_hwfn *p_hwfn)
 	p_blk = &p_cli->pf_blks[0];
 
 	ecore_cxt_qm_iids(p_hwfn, &qm_iids);
-	total = ecore_qm_pf_mem_size(p_hwfn->rel_pf_id, qm_iids.cids,
+	total = ecore_qm_pf_mem_size(qm_iids.cids,
 				     qm_iids.vf_cids, qm_iids.tids,
 				     p_hwfn->qm_info.num_pqs,
 				     p_hwfn->qm_info.num_vf_pqs);
@@ -706,7 +707,7 @@ enum _ecore_status_t ecore_cxt_cfg_ilt_compute(struct ecore_hwfn *p_hwfn)
 
 	/* SRC */
 	p_cli = &p_mngr->clients[ILT_CLI_SRC];
-	ecore_cxt_src_iids(p_hwfn, p_mngr, &src_iids);
+	ecore_cxt_src_iids(p_mngr, &src_iids);
 
 	/* Both the PF and VFs searcher connections are stored in the per PF
 	 * database. Thus sum the PF searcher cids and all the VFs searcher
@@ -820,7 +821,7 @@ static enum _ecore_status_t ecore_cxt_src_t2_alloc(struct ecore_hwfn *p_hwfn)
 	if (!p_src->active)
 		return ECORE_SUCCESS;
 
-	ecore_cxt_src_iids(p_hwfn, p_mngr, &src_iids);
+	ecore_cxt_src_iids(p_mngr, &src_iids);
 	conn_num = src_iids.pf_cids + src_iids.per_vf_cids * p_mngr->vf_count;
 	total_size = conn_num * sizeof(struct src_ent);
 
@@ -1156,7 +1157,7 @@ enum _ecore_status_t ecore_cxt_mngr_alloc(struct ecore_hwfn *p_hwfn)
 	clients[ILT_CLI_TSDM].last.reg  = ILT_CFG_REG(TSDM, LAST_ILT);
 	clients[ILT_CLI_TSDM].p_size.reg = ILT_CFG_REG(TSDM, P_SIZE);
 
-	/* default ILT page size for all clients is 32K */
+	/* default ILT page size for all clients is 64K */
 	for (i = 0; i < ILT_CLI_MAX; i++)
 		p_mngr->clients[i].p_size.val = ILT_DEFAULT_HW_P_SIZE;
 
@@ -1170,7 +1171,9 @@ enum _ecore_status_t ecore_cxt_mngr_alloc(struct ecore_hwfn *p_hwfn)
 		p_mngr->vf_count = p_hwfn->p_dev->p_iov_info->total_vfs;
 
 	/* Initialize the dynamic ILT allocation mutex */
+#ifdef CONFIG_ECORE_LOCK_ALLOC
 	OSAL_MUTEX_ALLOC(p_hwfn, &p_mngr->mutex);
+#endif
 	OSAL_MUTEX_INIT(&p_mngr->mutex);
 
 	/* Set the cxt mangr pointer priori to further allocations */
@@ -1219,7 +1222,9 @@ void ecore_cxt_mngr_free(struct ecore_hwfn *p_hwfn)
 	ecore_cid_map_free(p_hwfn);
 	ecore_cxt_src_t2_free(p_hwfn);
 	ecore_ilt_shadow_free(p_hwfn);
+#ifdef CONFIG_ECORE_LOCK_ALLOC
 	OSAL_MUTEX_DEALLOC(&p_hwfn->p_cxt_mngr->mutex);
+#endif
 	OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_cxt_mngr);
 }
 
@@ -1422,29 +1427,32 @@ static void ecore_cdu_init_pf(struct ecore_hwfn *p_hwfn)
 	}
 }
 
-void ecore_qm_init_pf(struct ecore_hwfn *p_hwfn)
+void ecore_qm_init_pf(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 {
 	struct ecore_qm_info *qm_info = &p_hwfn->qm_info;
+	struct ecore_mcp_link_state *p_link;
 	struct ecore_qm_iids iids;
 
 	OSAL_MEM_ZERO(&iids, sizeof(iids));
 	ecore_cxt_qm_iids(p_hwfn, &iids);
 
-	ecore_qm_pf_rt_init(p_hwfn, p_hwfn->p_main_ptt, p_hwfn->port_id,
+	p_link = &ECORE_LEADING_HWFN(p_hwfn->p_dev)->mcp_info->link_output;
+
+	ecore_qm_pf_rt_init(p_hwfn, p_ptt, p_hwfn->port_id,
 			    p_hwfn->rel_pf_id, qm_info->max_phys_tcs_per_port,
-			    p_hwfn->first_on_engine,
 			    iids.cids, iids.vf_cids, iids.tids,
 			    qm_info->start_pq,
 			    qm_info->num_pqs - qm_info->num_vf_pqs,
 			    qm_info->num_vf_pqs,
 			    qm_info->start_vport,
 			    qm_info->num_vports, qm_info->pf_wfq,
-			    qm_info->pf_rl, p_hwfn->qm_info.qm_pq_params,
+			    qm_info->pf_rl, p_link->speed,
+			    p_hwfn->qm_info.qm_pq_params,
 			    p_hwfn->qm_info.qm_vport_params);
 }
 
 /* CM PF */
-void ecore_cm_init_pf(struct ecore_hwfn *p_hwfn)
+static void ecore_cm_init_pf(struct ecore_hwfn *p_hwfn)
 {
 	STORE_RT_REG(p_hwfn, XCM_REG_CON_PHY_Q3_RT_OFFSET,
 		     ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB));
@@ -1639,7 +1647,7 @@ static void ecore_src_init_pf(struct ecore_hwfn *p_hwfn)
 	struct ecore_src_iids src_iids;
 
 	OSAL_MEM_ZERO(&src_iids, sizeof(src_iids));
-	ecore_cxt_src_iids(p_hwfn, p_mngr, &src_iids);
+	ecore_cxt_src_iids(p_mngr, &src_iids);
 	conn_num = src_iids.pf_cids + src_iids.per_vf_cids * p_mngr->vf_count;
 	if (!conn_num)
 		return;
@@ -1766,9 +1774,11 @@ static void ecore_tm_init_pf(struct ecore_hwfn *p_hwfn)
 static void ecore_prs_init_pf(struct ecore_hwfn *p_hwfn)
 {
 	struct ecore_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
-	struct ecore_conn_type_cfg *p_fcoe = &p_mngr->conn_cfg[PROTOCOLID_FCOE];
+	struct ecore_conn_type_cfg *p_fcoe;
 	struct ecore_tid_seg *p_tid;
 
+	p_fcoe = &p_mngr->conn_cfg[PROTOCOLID_FCOE];
+
 	/* If FCoE is active set the MAX OX_ID (tid) in the Parser */
 	if (!p_fcoe->cid_count)
 		return;
@@ -1785,9 +1795,9 @@ void ecore_cxt_hw_init_common(struct ecore_hwfn *p_hwfn)
 	ecore_cdu_init_common(p_hwfn);
 }
 
-void ecore_cxt_hw_init_pf(struct ecore_hwfn *p_hwfn)
+void ecore_cxt_hw_init_pf(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 {
-	ecore_qm_init_pf(p_hwfn);
+	ecore_qm_init_pf(p_hwfn, p_ptt);
 	ecore_cm_init_pf(p_hwfn);
 	ecore_dq_init_pf(p_hwfn);
 	ecore_cdu_init_pf(p_hwfn);
@@ -1969,20 +1979,6 @@ enum _ecore_status_t ecore_cxt_get_cid_info(struct ecore_hwfn *p_hwfn,
 	return ECORE_SUCCESS;
 }
 
-static void ecore_cxt_set_srq_count(struct ecore_hwfn *p_hwfn, u32 num_srqs)
-{
-	struct ecore_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
-
-	p_mgr->srq_count = num_srqs;
-}
-
-u32 ecore_cxt_get_srq_count(struct ecore_hwfn *p_hwfn)
-{
-	struct ecore_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
-
-	return p_mgr->srq_count;
-}
-
 enum _ecore_status_t ecore_cxt_set_pf_params(struct ecore_hwfn *p_hwfn)
 {
 	/* Set the number of required CORE connections */
@@ -1993,19 +1989,24 @@ enum _ecore_status_t ecore_cxt_set_pf_params(struct ecore_hwfn *p_hwfn)
 	switch (p_hwfn->hw_info.personality) {
 	case ECORE_PCI_ETH:
 		{
-			struct ecore_eth_pf_params *p_params =
+		u32 count = 0;
+
+		struct ecore_eth_pf_params *p_params =
 			    &p_hwfn->pf_params.eth_pf_params;
 
-			/* TODO - we probably want to add VF number to the PF
-			 * params;
-			 * As of now, allocates 16 * 2 per-VF [to retain regular
-			 * functionality].
-			 */
-			ecore_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
-						      p_params->num_cons, 32);
-			p_hwfn->p_cxt_mngr->arfs_count =
-						p_params->num_arfs_filters;
-			break;
+		if (!p_params->num_vf_cons)
+			p_params->num_vf_cons = ETH_PF_PARAMS_VF_CONS_DEFAULT;
+		ecore_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
+					      p_params->num_cons,
+					      p_params->num_vf_cons);
+
+		count = p_params->num_arfs_filters;
+
+		if (!OSAL_TEST_BIT(ECORE_MF_DISABLE_ARFS,
+				   &p_hwfn->p_dev->mf_bits))
+			p_hwfn->p_cxt_mngr->arfs_count = count;
+
+		break;
 		}
 	default:
 		return ECORE_INVAL;
@@ -2220,34 +2221,3 @@ ecore_cxt_free_ilt_range(struct ecore_hwfn *p_hwfn,
 
 	return ECORE_SUCCESS;
 }
-
-enum _ecore_status_t ecore_cxt_free_proto_ilt(struct ecore_hwfn *p_hwfn,
-					      enum protocol_type proto)
-{
-	enum _ecore_status_t rc;
-	u32 cid;
-
-	/* Free Connection CXT */
-	rc = ecore_cxt_free_ilt_range(p_hwfn, ECORE_ELEM_CXT,
-				      ecore_cxt_get_proto_cid_start(p_hwfn,
-								    proto),
-				      ecore_cxt_get_proto_cid_count(p_hwfn,
-								    proto,
-								    &cid));
-
-	if (rc)
-		return rc;
-
-	/* Free Task CXT */
-	rc = ecore_cxt_free_ilt_range(p_hwfn, ECORE_ELEM_TASK, 0,
-				      ecore_cxt_get_proto_tid_count(p_hwfn,
-								    proto));
-	if (rc)
-		return rc;
-
-	/* Free TSDM CXT */
-	rc = ecore_cxt_free_ilt_range(p_hwfn, ECORE_ELEM_SRQ, 0,
-				      ecore_cxt_get_srq_count(p_hwfn));
-
-	return rc;
-}
diff --git a/drivers/net/qede/base/ecore_cxt.h b/drivers/net/qede/base/ecore_cxt.h
index 6ff823a5..54761e4e 100644
--- a/drivers/net/qede/base/ecore_cxt.h
+++ b/drivers/net/qede/base/ecore_cxt.h
@@ -98,15 +98,17 @@ void ecore_cxt_hw_init_common(struct ecore_hwfn *p_hwfn);
  * @brief ecore_cxt_hw_init_pf - Initailze ILT and DQ, PF phase, per path.
  *
  * @param p_hwfn
+ * @param p_ptt
  */
-void ecore_cxt_hw_init_pf(struct ecore_hwfn *p_hwfn);
+void ecore_cxt_hw_init_pf(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt);
 
 /**
  * @brief ecore_qm_init_pf - Initailze the QM PF phase, per path
  *
  * @param p_hwfn
+ * @param p_ptt
  */
-void ecore_qm_init_pf(struct ecore_hwfn *p_hwfn);
+void ecore_qm_init_pf(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt);
 
  /**
  * @brief Reconfigures QM pf on the fly
diff --git a/drivers/net/qede/base/ecore_dcbx.c b/drivers/net/qede/base/ecore_dcbx.c
index 4f1b0698..632297a7 100644
--- a/drivers/net/qede/base/ecore_dcbx.c
+++ b/drivers/net/qede/base/ecore_dcbx.c
@@ -28,13 +28,13 @@
 
 static bool ecore_dcbx_app_ethtype(u32 app_info_bitmap)
 {
-	return !!(ECORE_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF) ==
+	return !!(GET_MFW_FIELD(app_info_bitmap, DCBX_APP_SF) ==
 		  DCBX_APP_SF_ETHTYPE);
 }
 
 static bool ecore_dcbx_ieee_app_ethtype(u32 app_info_bitmap)
 {
-	u8 mfw_val = ECORE_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+	u8 mfw_val = GET_MFW_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
 
 	/* Old MFW */
 	if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
@@ -45,13 +45,13 @@ static bool ecore_dcbx_ieee_app_ethtype(u32 app_info_bitmap)
 
 static bool ecore_dcbx_app_port(u32 app_info_bitmap)
 {
-	return !!(ECORE_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF) ==
+	return !!(GET_MFW_FIELD(app_info_bitmap, DCBX_APP_SF) ==
 		  DCBX_APP_SF_PORT);
 }
 
 static bool ecore_dcbx_ieee_app_port(u32 app_info_bitmap, u8 type)
 {
-	u8 mfw_val = ECORE_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+	u8 mfw_val = GET_MFW_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
 
 	/* Old MFW */
 	if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
@@ -114,6 +114,21 @@ ecore_dcbx_dp_protocol(struct ecore_hwfn *p_hwfn,
 	}
 }
 
+u8 ecore_dcbx_get_dscp_value(struct ecore_hwfn *p_hwfn, u8 pri)
+{
+	struct ecore_dcbx_dscp_params *dscp = &p_hwfn->p_dcbx_info->get.dscp;
+	u8 i;
+
+	if (!dscp->enabled)
+		return ECORE_DCBX_DSCP_DISABLED;
+
+	for (i = 0; i < ECORE_DCBX_DSCP_SIZE; i++)
+		if (pri == dscp->dscp_pri_map[i])
+			return i;
+
+	return ECORE_DCBX_DSCP_DISABLED;
+}
+
 static void
 ecore_dcbx_set_params(struct ecore_dcbx_results *p_data,
 		      struct ecore_hwfn *p_hwfn,
@@ -121,29 +136,18 @@ ecore_dcbx_set_params(struct ecore_dcbx_results *p_data,
 		      enum dcbx_protocol_type type,
 		      enum ecore_pci_personality personality)
 {
-	struct ecore_dcbx_dscp_params *dscp = &p_hwfn->p_dcbx_info->get.dscp;
-
 	/* PF update ramrod data */
 	p_data->arr[type].enable = enable;
 	p_data->arr[type].priority = prio;
 	p_data->arr[type].tc = tc;
-	p_data->arr[type].dscp_enable = dscp->enabled;
-	if (p_data->arr[type].dscp_enable) {
-		u8 i;
-
-		for (i = 0; i < ECORE_DCBX_DSCP_SIZE; i++)
-			if (prio == dscp->dscp_pri_map[i]) {
-				p_data->arr[type].dscp_val = i;
-				break;
-			}
+	p_data->arr[type].dscp_val = ecore_dcbx_get_dscp_value(p_hwfn, prio);
+	if (p_data->arr[type].dscp_val == ECORE_DCBX_DSCP_DISABLED) {
+		p_data->arr[type].dscp_enable = false;
+		p_data->arr[type].dscp_val = 0;
+	} else {
+		p_data->arr[type].dscp_enable = true;
 	}
-
-	if (enable && p_data->arr[type].dscp_enable)
-		p_data->arr[type].update = UPDATE_DCB_DSCP;
-	else if (enable)
-		p_data->arr[type].update = UPDATE_DCB;
-	else
-		p_data->arr[type].update = DONT_UPDATE_DCB_DSCP;
+	p_data->arr[type].update = UPDATE_DCB_DSCP;
 
 	/* QM reconf data */
 	if (p_hwfn->hw_info.personality == personality)
@@ -159,7 +163,6 @@ ecore_dcbx_update_app_info(struct ecore_dcbx_results *p_data,
 {
 	enum ecore_pci_personality personality;
 	enum dcbx_protocol_type id;
-	const char *name;	/* @DPDK */
 	int i;
 
 	for (i = 0; i < OSAL_ARRAY_SIZE(ecore_dcbx_app_update); i++) {
@@ -169,7 +172,6 @@ ecore_dcbx_update_app_info(struct ecore_dcbx_results *p_data,
 			continue;
 
 		personality = ecore_dcbx_app_update[i].personality;
-		name = ecore_dcbx_app_update[i].name;
 
 		ecore_dcbx_set_params(p_data, p_hwfn, enable,
 				      prio, tc, type, personality);
@@ -224,7 +226,7 @@ ecore_dcbx_get_app_protocol_type(struct ecore_hwfn *p_hwfn,
 	return true;
 }
 
-/*  Parse app TLV's to update TC information in hw_info structure for
+/* Parse app TLV's to update TC information in hw_info structure for
  * reconfiguring QM. Get protocol specific data for PF update ramrod command.
  */
 static enum _ecore_status_t
@@ -234,8 +236,8 @@ ecore_dcbx_process_tlv(struct ecore_hwfn *p_hwfn,
 		       int count, u8 dcbx_version)
 {
 	enum dcbx_protocol_type type;
+	bool enable, ieee, eth_tlv;
 	u8 tc, priority_map;
-	bool enable, ieee;
 	u16 protocol_id;
 	u8 priority;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
@@ -246,12 +248,12 @@ ecore_dcbx_process_tlv(struct ecore_hwfn *p_hwfn,
 		   count, pri_tc_tbl, dcbx_version);
 
 	ieee = (dcbx_version == DCBX_CONFIG_VERSION_IEEE);
+	eth_tlv = false;
 	/* Parse APP TLV */
 	for (i = 0; i < count; i++) {
-		protocol_id = ECORE_MFW_GET_FIELD(p_tbl[i].entry,
-						  DCBX_APP_PROTOCOL_ID);
-		priority_map = ECORE_MFW_GET_FIELD(p_tbl[i].entry,
-						   DCBX_APP_PRI_MAP);
+		protocol_id = GET_MFW_FIELD(p_tbl[i].entry,
+					    DCBX_APP_PROTOCOL_ID);
+		priority_map = GET_MFW_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP);
 		DP_VERBOSE(p_hwfn, ECORE_MSG_DCB, "Id = 0x%x pri_map = %u\n",
 			   protocol_id, priority_map);
 		rc = ecore_dcbx_get_app_priority(priority_map, &priority);
@@ -270,12 +272,23 @@ ecore_dcbx_process_tlv(struct ecore_hwfn *p_hwfn,
 			 * indication, but we only got here if there was an
 			 * app tlv for the protocol, so dcbx must be enabled.
 			 */
-			enable = !(type == DCBX_PROTOCOL_ETH);
+			if (type == DCBX_PROTOCOL_ETH) {
+				enable = false;
+				eth_tlv = true;
+			} else {
+				enable = true;
+			}
 
 			ecore_dcbx_update_app_info(p_data, p_hwfn, enable,
 						   priority, tc, type);
 		}
 	}
+
+	/* If Eth TLV is not detected, use UFP TC as default TC */
+	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC,
+			  &p_hwfn->p_dev->mf_bits) && !eth_tlv)
+		p_data->arr[DCBX_PROTOCOL_ETH].tc = p_hwfn->ufp_info.tc;
+
 	/* Update ramrod protocol data and hw_info fields
 	 * with default info when corresponding APP TLV's are not detected.
 	 * The enabled field has a different logic for ethernet as only for
@@ -303,17 +316,17 @@ static enum _ecore_status_t
 ecore_dcbx_process_mib_info(struct ecore_hwfn *p_hwfn)
 {
 	struct dcbx_app_priority_feature *p_app;
-	enum _ecore_status_t rc = ECORE_SUCCESS;
-	struct ecore_dcbx_results data = { 0 };
 	struct dcbx_app_priority_entry *p_tbl;
+	struct ecore_dcbx_results data = { 0 };
 	struct dcbx_ets_feature *p_ets;
 	struct ecore_hw_info *p_info;
 	u32 pri_tc_tbl, flags;
 	u8 dcbx_version;
 	int num_entries;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
 
 	flags = p_hwfn->p_dcbx_info->operational.flags;
-	dcbx_version = ECORE_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
+	dcbx_version = GET_MFW_FIELD(flags, DCBX_CONFIG_VERSION);
 
 	p_app = &p_hwfn->p_dcbx_info->operational.features.app;
 	p_tbl = p_app->app_pri_tbl;
@@ -322,16 +335,15 @@ ecore_dcbx_process_mib_info(struct ecore_hwfn *p_hwfn)
 	pri_tc_tbl = p_ets->pri_tc_tbl[0];
 
 	p_info = &p_hwfn->hw_info;
-	num_entries = ECORE_MFW_GET_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES);
+	num_entries = GET_MFW_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES);
 
 	rc = ecore_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl,
 				    num_entries, dcbx_version);
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
-	p_info->num_active_tc = ECORE_MFW_GET_FIELD(p_ets->flags,
-						    DCBX_ETS_MAX_TCS);
-	p_hwfn->qm_info.ooo_tc = ECORE_MFW_GET_FIELD(p_ets->flags, DCBX_OOO_TC);
+	p_info->num_active_tc = GET_MFW_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS);
+	p_hwfn->qm_info.ooo_tc = GET_MFW_FIELD(p_ets->flags, DCBX_OOO_TC);
 	data.pf_id = p_hwfn->rel_pf_id;
 	data.dcbx_enabled = !!dcbx_version;
 
@@ -349,9 +361,9 @@ ecore_dcbx_copy_mib(struct ecore_hwfn *p_hwfn,
 		    struct ecore_dcbx_mib_meta_data *p_data,
 		    enum ecore_mib_read_type type)
 {
-	enum _ecore_status_t rc = ECORE_SUCCESS;
 	u32 prefix_seq_num, suffix_seq_num;
 	int read_count = 0;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
 
 	/* The data is considered to be valid only if both sequence numbers are
 	 * the same.
@@ -362,6 +374,12 @@ ecore_dcbx_copy_mib(struct ecore_hwfn *p_hwfn,
 					  p_data->addr, p_data->size);
 			prefix_seq_num = p_data->lldp_remote->prefix_seq_num;
 			suffix_seq_num = p_data->lldp_remote->suffix_seq_num;
+		} else if (type == ECORE_DCBX_LLDP_TLVS) {
+			ecore_memcpy_from(p_hwfn, p_ptt, p_data->lldp_tlvs,
+					  p_data->addr, p_data->size);
+			prefix_seq_num = p_data->lldp_tlvs->prefix_seq_num;
+			suffix_seq_num = p_data->lldp_tlvs->suffix_seq_num;
+
 		} else {
 			ecore_memcpy_from(p_hwfn, p_ptt, p_data->mib,
 					  p_data->addr, p_data->size);
@@ -414,26 +432,24 @@ ecore_dcbx_get_app_data(struct ecore_hwfn *p_hwfn,
 	u8 pri_map;
 	int i;
 
-	p_params->app_willing = ECORE_MFW_GET_FIELD(p_app->flags,
-						    DCBX_APP_WILLING);
-	p_params->app_valid = ECORE_MFW_GET_FIELD(p_app->flags,
-						  DCBX_APP_ENABLED);
-	p_params->app_error = ECORE_MFW_GET_FIELD(p_app->flags, DCBX_APP_ERROR);
-	p_params->num_app_entries = ECORE_MFW_GET_FIELD(p_app->flags,
-							DCBX_APP_NUM_ENTRIES);
-	for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
+	p_params->app_willing = GET_MFW_FIELD(p_app->flags, DCBX_APP_WILLING);
+	p_params->app_valid = GET_MFW_FIELD(p_app->flags, DCBX_APP_ENABLED);
+	p_params->app_error = GET_MFW_FIELD(p_app->flags, DCBX_APP_ERROR);
+	p_params->num_app_entries = GET_MFW_FIELD(p_app->flags,
+						  DCBX_APP_NUM_ENTRIES);
+	for (i = 0; i < p_params->num_app_entries; i++) {
 		entry = &p_params->app_entry[i];
 		if (ieee) {
 			u8 sf_ieee;
 			u32 val;
 
-			sf_ieee = ECORE_MFW_GET_FIELD(p_tbl[i].entry,
-						      DCBX_APP_SF_IEEE);
+			sf_ieee = GET_MFW_FIELD(p_tbl[i].entry,
+						DCBX_APP_SF_IEEE);
 			switch (sf_ieee) {
 			case DCBX_APP_SF_IEEE_RESERVED:
 				/* Old MFW */
-				val = ECORE_MFW_GET_FIELD(p_tbl[i].entry,
-							    DCBX_APP_SF);
+				val = GET_MFW_FIELD(p_tbl[i].entry,
+						    DCBX_APP_SF);
 				entry->sf_ieee = val ?
 					ECORE_DCBX_SF_IEEE_TCP_UDP_PORT :
 					ECORE_DCBX_SF_IEEE_ETHTYPE;
@@ -453,14 +469,14 @@ ecore_dcbx_get_app_data(struct ecore_hwfn *p_hwfn,
 				break;
 			}
 		} else {
-			entry->ethtype = !(ECORE_MFW_GET_FIELD(p_tbl[i].entry,
-							       DCBX_APP_SF));
+			entry->ethtype = !(GET_MFW_FIELD(p_tbl[i].entry,
+							 DCBX_APP_SF));
 		}
 
-		pri_map = ECORE_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP);
+		pri_map = GET_MFW_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP);
 		ecore_dcbx_get_app_priority(pri_map, &entry->prio);
-		entry->proto_id = ECORE_MFW_GET_FIELD(p_tbl[i].entry,
-						      DCBX_APP_PROTOCOL_ID);
+		entry->proto_id = GET_MFW_FIELD(p_tbl[i].entry,
+						DCBX_APP_PROTOCOL_ID);
 		ecore_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
 						 entry->proto_id,
 						 &entry->proto_type, ieee);
@@ -478,10 +494,10 @@ ecore_dcbx_get_pfc_data(struct ecore_hwfn *p_hwfn,
 {
 	u8 pfc_map;
 
-	p_params->pfc.willing = ECORE_MFW_GET_FIELD(pfc, DCBX_PFC_WILLING);
-	p_params->pfc.max_tc = ECORE_MFW_GET_FIELD(pfc, DCBX_PFC_CAPS);
-	p_params->pfc.enabled = ECORE_MFW_GET_FIELD(pfc, DCBX_PFC_ENABLED);
-	pfc_map = ECORE_MFW_GET_FIELD(pfc, DCBX_PFC_PRI_EN_BITMAP);
+	p_params->pfc.willing = GET_MFW_FIELD(pfc, DCBX_PFC_WILLING);
+	p_params->pfc.max_tc = GET_MFW_FIELD(pfc, DCBX_PFC_CAPS);
+	p_params->pfc.enabled = GET_MFW_FIELD(pfc, DCBX_PFC_ENABLED);
+	pfc_map = GET_MFW_FIELD(pfc, DCBX_PFC_PRI_EN_BITMAP);
 	p_params->pfc.prio[0] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_0);
 	p_params->pfc.prio[1] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_1);
 	p_params->pfc.prio[2] = !!(pfc_map & DCBX_PFC_PRI_EN_BITMAP_PRI_2);
@@ -505,13 +521,10 @@ ecore_dcbx_get_ets_data(struct ecore_hwfn *p_hwfn,
 	u32 bw_map[2], tsa_map[2], pri_map;
 	int i;
 
-	p_params->ets_willing = ECORE_MFW_GET_FIELD(p_ets->flags,
-						    DCBX_ETS_WILLING);
-	p_params->ets_enabled = ECORE_MFW_GET_FIELD(p_ets->flags,
-						    DCBX_ETS_ENABLED);
-	p_params->ets_cbs = ECORE_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_CBS);
-	p_params->max_ets_tc = ECORE_MFW_GET_FIELD(p_ets->flags,
-						   DCBX_ETS_MAX_TCS);
+	p_params->ets_willing = GET_MFW_FIELD(p_ets->flags, DCBX_ETS_WILLING);
+	p_params->ets_enabled = GET_MFW_FIELD(p_ets->flags, DCBX_ETS_ENABLED);
+	p_params->ets_cbs = GET_MFW_FIELD(p_ets->flags, DCBX_ETS_CBS);
+	p_params->max_ets_tc = GET_MFW_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS);
 	DP_VERBOSE(p_hwfn, ECORE_MSG_DCB,
 		   "ETS params: willing %d, enabled = %d ets_cbs %d pri_tc_tbl_0 %x max_ets_tc %d\n",
 		   p_params->ets_willing, p_params->ets_enabled,
@@ -552,7 +565,6 @@ ecore_dcbx_get_common_params(struct ecore_hwfn *p_hwfn,
 
 static void
 ecore_dcbx_get_local_params(struct ecore_hwfn *p_hwfn,
-			    struct ecore_ptt *p_ptt,
 			    struct ecore_dcbx_get *params)
 {
 	struct dcbx_features *p_feat;
@@ -566,7 +578,6 @@ ecore_dcbx_get_local_params(struct ecore_hwfn *p_hwfn,
 
 static void
 ecore_dcbx_get_remote_params(struct ecore_hwfn *p_hwfn,
-			     struct ecore_ptt *p_ptt,
 			     struct ecore_dcbx_get *params)
 {
 	struct dcbx_features *p_feat;
@@ -579,9 +590,33 @@ ecore_dcbx_get_remote_params(struct ecore_hwfn *p_hwfn,
 	params->remote.valid = true;
 }
 
-static enum _ecore_status_t
+static void  ecore_dcbx_get_dscp_params(struct ecore_hwfn *p_hwfn,
+					struct ecore_dcbx_get *params)
+{
+	struct ecore_dcbx_dscp_params *p_dscp;
+	struct dcb_dscp_map *p_dscp_map;
+	int i, j, entry;
+	u32 pri_map;
+
+	p_dscp = &params->dscp;
+	p_dscp_map = &p_hwfn->p_dcbx_info->dscp_map;
+	p_dscp->enabled = GET_MFW_FIELD(p_dscp_map->flags, DCB_DSCP_ENABLE);
+
+	/* MFW encodes 64 dscp entries into 8 element array of u32 entries,
+	 * where each entry holds the 4bit priority map for 8 dscp entries.
+	 */
+	for (i = 0, entry = 0; i < ECORE_DCBX_DSCP_SIZE / 8; i++) {
+		pri_map = OSAL_BE32_TO_CPU(p_dscp_map->dscp_pri_map[i]);
+		DP_VERBOSE(p_hwfn, ECORE_MSG_DCB, "elem %d pri_map 0x%x\n",
+			   entry, pri_map);
+		for (j = 0; j < ECORE_DCBX_DSCP_SIZE / 8; j++, entry++)
+			p_dscp->dscp_pri_map[entry] = (u32)(pri_map >>
+							   (j * 4)) & 0xf;
+	}
+}
+
+static void
 ecore_dcbx_get_operational_params(struct ecore_hwfn *p_hwfn,
-				  struct ecore_ptt *p_ptt,
 				  struct ecore_dcbx_get *params)
 {
 	struct ecore_dcbx_operational_params *p_operational;
@@ -597,27 +632,27 @@ ecore_dcbx_get_operational_params(struct ecore_hwfn *p_hwfn,
 	 * was successfuly performed
 	 */
 	p_operational = &params->operational;
-	enabled = !!(ECORE_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION) !=
+	enabled = !!(GET_MFW_FIELD(flags, DCBX_CONFIG_VERSION) !=
 		     DCBX_CONFIG_VERSION_DISABLED);
 	if (!enabled) {
 		p_operational->enabled = enabled;
 		p_operational->valid = false;
 		DP_VERBOSE(p_hwfn, ECORE_MSG_DCB, "Dcbx is disabled\n");
-		return ECORE_INVAL;
+		return;
 	}
 
 	p_feat = &p_hwfn->p_dcbx_info->operational.features;
 	p_results = &p_hwfn->p_dcbx_info->results;
 
-	val = !!(ECORE_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION) ==
+	val = !!(GET_MFW_FIELD(flags, DCBX_CONFIG_VERSION) ==
 		 DCBX_CONFIG_VERSION_IEEE);
 	p_operational->ieee = val;
 
-	val = !!(ECORE_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION) ==
+	val = !!(GET_MFW_FIELD(flags, DCBX_CONFIG_VERSION) ==
 		 DCBX_CONFIG_VERSION_CEE);
 	p_operational->cee = val;
 
-	val = !!(ECORE_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION) ==
+	val = !!(GET_MFW_FIELD(flags, DCBX_CONFIG_VERSION) ==
 		 DCBX_CONFIG_VERSION_STATIC);
 	p_operational->local = val;
 
@@ -632,45 +667,14 @@ ecore_dcbx_get_operational_params(struct ecore_hwfn *p_hwfn,
 				     p_operational->ieee);
 	ecore_dcbx_get_priority_info(p_hwfn, &p_operational->app_prio,
 				     p_results);
-	err = ECORE_MFW_GET_FIELD(p_feat->app.flags, DCBX_APP_ERROR);
+	err = GET_MFW_FIELD(p_feat->app.flags, DCBX_APP_ERROR);
 	p_operational->err = err;
 	p_operational->enabled = enabled;
 	p_operational->valid = true;
-
-	return ECORE_SUCCESS;
-}
-
-static void
-ecore_dcbx_get_dscp_params(struct ecore_hwfn *p_hwfn,
-			   struct ecore_ptt *p_ptt,
-			   struct ecore_dcbx_get *params)
-{
-	struct ecore_dcbx_dscp_params *p_dscp;
-	struct dcb_dscp_map *p_dscp_map;
-	int i, j, entry;
-	u32 pri_map;
-
-	p_dscp = &params->dscp;
-	p_dscp_map = &p_hwfn->p_dcbx_info->dscp_map;
-	p_dscp->enabled = ECORE_MFW_GET_FIELD(p_dscp_map->flags,
-					      DCB_DSCP_ENABLE);
-	/* MFW encodes 64 dscp entries into 8 element array of u32 entries,
-	 * where each entry holds the 4bit priority map for 8 dscp entries.
-	 */
-	for (i = 0, entry = 0; i < ECORE_DCBX_DSCP_SIZE / 8; i++) {
-		pri_map = OSAL_BE32_TO_CPU(p_dscp_map->dscp_pri_map[i]);
-		DP_VERBOSE(p_hwfn, ECORE_MSG_DCB, "elem %d pri_map 0x%x\n",
-			   entry, pri_map);
-		for (j = 0; j < ECORE_DCBX_DSCP_SIZE / 8; j++, entry++)
-			p_dscp->dscp_pri_map[entry] = (u32)(pri_map >>
-							   (j * 4)) & 0xf;
-	}
 }
 
-static void
-ecore_dcbx_get_local_lldp_params(struct ecore_hwfn *p_hwfn,
-				 struct ecore_ptt *p_ptt,
-				 struct ecore_dcbx_get *params)
+static void ecore_dcbx_get_local_lldp_params(struct ecore_hwfn *p_hwfn,
+					     struct ecore_dcbx_get *params)
 {
 	struct lldp_config_params_s *p_local;
 
@@ -678,15 +682,13 @@ ecore_dcbx_get_local_lldp_params(struct ecore_hwfn *p_hwfn,
 
 	OSAL_MEMCPY(params->lldp_local.local_chassis_id,
 		    p_local->local_chassis_id,
-		    OSAL_ARRAY_SIZE(p_local->local_chassis_id));
+		    sizeof(params->lldp_local.local_chassis_id));
 	OSAL_MEMCPY(params->lldp_local.local_port_id, p_local->local_port_id,
-		    OSAL_ARRAY_SIZE(p_local->local_port_id));
+		    sizeof(params->lldp_local.local_port_id));
 }
 
-static void
-ecore_dcbx_get_remote_lldp_params(struct ecore_hwfn *p_hwfn,
-				  struct ecore_ptt *p_ptt,
-				  struct ecore_dcbx_get *params)
+static void ecore_dcbx_get_remote_lldp_params(struct ecore_hwfn *p_hwfn,
+					      struct ecore_dcbx_get *params)
 {
 	struct lldp_status_params_s *p_remote;
 
@@ -694,40 +696,38 @@ ecore_dcbx_get_remote_lldp_params(struct ecore_hwfn *p_hwfn,
 
 	OSAL_MEMCPY(params->lldp_remote.peer_chassis_id,
 		    p_remote->peer_chassis_id,
-		    OSAL_ARRAY_SIZE(p_remote->peer_chassis_id));
+		    sizeof(params->lldp_remote.peer_chassis_id));
 	OSAL_MEMCPY(params->lldp_remote.peer_port_id, p_remote->peer_port_id,
-		    OSAL_ARRAY_SIZE(p_remote->peer_port_id));
+		    sizeof(params->lldp_remote.peer_port_id));
 }
 
 static enum _ecore_status_t
-ecore_dcbx_get_params(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+ecore_dcbx_get_params(struct ecore_hwfn *p_hwfn,
 		      struct ecore_dcbx_get *p_params,
 		      enum ecore_mib_read_type type)
 {
-	enum _ecore_status_t rc = ECORE_SUCCESS;
-
 	switch (type) {
 	case ECORE_DCBX_REMOTE_MIB:
-		ecore_dcbx_get_remote_params(p_hwfn, p_ptt, p_params);
+		ecore_dcbx_get_remote_params(p_hwfn, p_params);
 		break;
 	case ECORE_DCBX_LOCAL_MIB:
-		ecore_dcbx_get_local_params(p_hwfn, p_ptt, p_params);
+		ecore_dcbx_get_local_params(p_hwfn, p_params);
 		break;
 	case ECORE_DCBX_OPERATIONAL_MIB:
-		ecore_dcbx_get_operational_params(p_hwfn, p_ptt, p_params);
+		ecore_dcbx_get_operational_params(p_hwfn, p_params);
 		break;
 	case ECORE_DCBX_REMOTE_LLDP_MIB:
-		ecore_dcbx_get_remote_lldp_params(p_hwfn, p_ptt, p_params);
+		ecore_dcbx_get_remote_lldp_params(p_hwfn, p_params);
 		break;
 	case ECORE_DCBX_LOCAL_LLDP_MIB:
-		ecore_dcbx_get_local_lldp_params(p_hwfn, p_ptt, p_params);
+		ecore_dcbx_get_local_lldp_params(p_hwfn, p_params);
 		break;
 	default:
 		DP_ERR(p_hwfn, "MIB read err, unknown mib type %d\n", type);
 		return ECORE_INVAL;
 	}
 
-	return rc;
+	return ECORE_SUCCESS;
 }
 
 static enum _ecore_status_t
@@ -857,7 +857,7 @@ static enum _ecore_status_t ecore_dcbx_read_mib(struct ecore_hwfn *p_hwfn,
 		DP_ERR(p_hwfn, "MIB read err, unknown mib type %d\n", type);
 	}
 
-	return rc;
+	return ECORE_SUCCESS;
 }
 
 /*
@@ -876,33 +876,28 @@ ecore_dcbx_mib_update_event(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 		return rc;
 
 	if (type == ECORE_DCBX_OPERATIONAL_MIB) {
-		ecore_dcbx_get_dscp_params(p_hwfn, p_ptt,
-					   &p_hwfn->p_dcbx_info->get);
+		ecore_dcbx_get_dscp_params(p_hwfn, &p_hwfn->p_dcbx_info->get);
 
 		rc = ecore_dcbx_process_mib_info(p_hwfn);
 		if (!rc) {
-			bool enabled;
-
 			/* reconfigure tcs of QM queues according
 			 * to negotiation results
 			 */
 			ecore_qm_reconf(p_hwfn, p_ptt);
 
 			/* update storm FW with negotiation results */
-			ecore_sp_pf_update(p_hwfn);
-
-			/* set eagle enigne 1 flow control workaround
-			 * according to negotiation results
-			 */
-			enabled = p_hwfn->p_dcbx_info->results.dcbx_enabled;
+			ecore_sp_pf_update_dcbx(p_hwfn);
 		}
 	}
-	ecore_dcbx_get_params(p_hwfn, p_ptt, &p_hwfn->p_dcbx_info->get, type);
+
+	ecore_dcbx_get_params(p_hwfn, &p_hwfn->p_dcbx_info->get, type);
 
 	/* Update the DSCP to TC mapping bit if required */
 	if ((type == ECORE_DCBX_OPERATIONAL_MIB) &&
 	    p_hwfn->p_dcbx_info->dscp_nig_update) {
-		ecore_wr(p_hwfn, p_ptt, NIG_REG_DSCP_TO_TC_MAP_ENABLE, 0x1);
+		u8 val = !!p_hwfn->p_dcbx_info->get.dscp.enabled;
+
+		ecore_wr(p_hwfn, p_ptt, NIG_REG_DSCP_TO_TC_MAP_ENABLE, val);
 		p_hwfn->p_dcbx_info->dscp_nig_update = false;
 	}
 
@@ -927,8 +922,7 @@ enum _ecore_status_t ecore_dcbx_info_alloc(struct ecore_hwfn *p_hwfn)
 	return ECORE_SUCCESS;
 }
 
-void ecore_dcbx_info_free(struct ecore_hwfn *p_hwfn,
-			  struct ecore_dcbx_info *p_dcbx_info)
+void ecore_dcbx_info_free(struct ecore_hwfn *p_hwfn)
 {
 	OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_dcbx_info);
 }
@@ -951,8 +945,6 @@ void ecore_dcbx_set_pf_update_params(struct ecore_dcbx_results *p_src,
 	struct protocol_dcb_data *p_dcb_data;
 	u8 update_flag;
 
-	p_dest->pf_id = p_src->pf_id;
-
 	update_flag = p_src->arr[DCBX_PROTOCOL_ETH].update;
 	p_dest->update_eth_dcb_data_mode = update_flag;
 	update_flag = p_src->arr[DCBX_PROTOCOL_IWARP].update;
@@ -975,17 +967,16 @@ enum _ecore_status_t ecore_dcbx_query_params(struct ecore_hwfn *p_hwfn,
 		return ECORE_INVAL;
 
 	p_ptt = ecore_ptt_acquire(p_hwfn);
-	if (!p_ptt) {
-		rc = ECORE_TIMEOUT;
-		DP_ERR(p_hwfn, "rc = %d\n", rc);
-		return rc;
-	}
+	if (!p_ptt)
+		return ECORE_TIMEOUT;
 
 	rc = ecore_dcbx_read_mib(p_hwfn, p_ptt, type);
 	if (rc != ECORE_SUCCESS)
 		goto out;
 
-	rc = ecore_dcbx_get_params(p_hwfn, p_ptt, p_get, type);
+	ecore_dcbx_get_dscp_params(p_hwfn, p_get);
+
+	rc = ecore_dcbx_get_params(p_hwfn, p_get, type);
 
 out:
 	ecore_ptt_release(p_hwfn, p_ptt);
@@ -1010,13 +1001,13 @@ ecore_dcbx_set_pfc_data(struct ecore_hwfn *p_hwfn,
 		*pfc &= ~DCBX_PFC_ENABLED_MASK;
 
 	*pfc &= ~DCBX_PFC_CAPS_MASK;
-	*pfc |= (u32)p_params->pfc.max_tc << DCBX_PFC_CAPS_SHIFT;
+	*pfc |= (u32)p_params->pfc.max_tc << DCBX_PFC_CAPS_OFFSET;
 
 	for (i = 0; i < ECORE_MAX_PFC_PRIORITIES; i++)
 		if (p_params->pfc.prio[i])
 			pfc_map |= (1 << i);
 	*pfc &= ~DCBX_PFC_PRI_EN_BITMAP_MASK;
-	*pfc |= (pfc_map << DCBX_PFC_PRI_EN_BITMAP_SHIFT);
+	*pfc |= (pfc_map << DCBX_PFC_PRI_EN_BITMAP_OFFSET);
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_DCB, "pfc = 0x%x\n", *pfc);
 }
@@ -1046,7 +1037,7 @@ ecore_dcbx_set_ets_data(struct ecore_hwfn *p_hwfn,
 		p_ets->flags &= ~DCBX_ETS_ENABLED_MASK;
 
 	p_ets->flags &= ~DCBX_ETS_MAX_TCS_MASK;
-	p_ets->flags |= (u32)p_params->max_ets_tc << DCBX_ETS_MAX_TCS_SHIFT;
+	p_ets->flags |= (u32)p_params->max_ets_tc << DCBX_ETS_MAX_TCS_OFFSET;
 
 	bw_map = (u8 *)&p_ets->tc_bw_tbl[0];
 	tsa_map = (u8 *)&p_ets->tc_tsa_tbl[0];
@@ -1092,9 +1083,9 @@ ecore_dcbx_set_app_data(struct ecore_hwfn *p_hwfn,
 
 	p_app->flags &= ~DCBX_APP_NUM_ENTRIES_MASK;
 	p_app->flags |= (u32)p_params->num_app_entries <<
-					DCBX_APP_NUM_ENTRIES_SHIFT;
+			DCBX_APP_NUM_ENTRIES_OFFSET;
 
-	for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
+	for (i = 0; i < p_params->num_app_entries; i++) {
 		entry = &p_app->app_pri_tbl[i].entry;
 		*entry = 0;
 		if (ieee) {
@@ -1102,50 +1093,50 @@ ecore_dcbx_set_app_data(struct ecore_hwfn *p_hwfn,
 			switch (p_params->app_entry[i].sf_ieee) {
 			case ECORE_DCBX_SF_IEEE_ETHTYPE:
 				*entry  |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE <<
-					    DCBX_APP_SF_IEEE_SHIFT);
+					    DCBX_APP_SF_IEEE_OFFSET);
 				*entry  |= ((u32)DCBX_APP_SF_ETHTYPE <<
-					    DCBX_APP_SF_SHIFT);
+					    DCBX_APP_SF_OFFSET);
 				break;
 			case ECORE_DCBX_SF_IEEE_TCP_PORT:
 				*entry  |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT <<
-					    DCBX_APP_SF_IEEE_SHIFT);
+					    DCBX_APP_SF_IEEE_OFFSET);
 				*entry  |= ((u32)DCBX_APP_SF_PORT <<
-					    DCBX_APP_SF_SHIFT);
+					    DCBX_APP_SF_OFFSET);
 				break;
 			case ECORE_DCBX_SF_IEEE_UDP_PORT:
 				*entry  |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT <<
-					    DCBX_APP_SF_IEEE_SHIFT);
+					    DCBX_APP_SF_IEEE_OFFSET);
 				*entry  |= ((u32)DCBX_APP_SF_PORT <<
-					    DCBX_APP_SF_SHIFT);
+					    DCBX_APP_SF_OFFSET);
 				break;
 			case ECORE_DCBX_SF_IEEE_TCP_UDP_PORT:
 				*entry  |= (u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT <<
-					    DCBX_APP_SF_IEEE_SHIFT;
+					    DCBX_APP_SF_IEEE_OFFSET;
 				*entry  |= ((u32)DCBX_APP_SF_PORT <<
-					    DCBX_APP_SF_SHIFT);
+					    DCBX_APP_SF_OFFSET);
 				break;
 			}
 		} else {
 			*entry &= ~DCBX_APP_SF_MASK;
 			if (p_params->app_entry[i].ethtype)
 				*entry  |= ((u32)DCBX_APP_SF_ETHTYPE <<
-					    DCBX_APP_SF_SHIFT);
+					    DCBX_APP_SF_OFFSET);
 			else
 				*entry  |= ((u32)DCBX_APP_SF_PORT <<
-					    DCBX_APP_SF_SHIFT);
+					    DCBX_APP_SF_OFFSET);
 		}
 		*entry &= ~DCBX_APP_PROTOCOL_ID_MASK;
 		*entry |= ((u32)p_params->app_entry[i].proto_id <<
-				DCBX_APP_PROTOCOL_ID_SHIFT);
+			   DCBX_APP_PROTOCOL_ID_OFFSET);
 		*entry &= ~DCBX_APP_PRI_MAP_MASK;
 		*entry |= ((u32)(p_params->app_entry[i].prio) <<
-				DCBX_APP_PRI_MAP_SHIFT);
+			   DCBX_APP_PRI_MAP_OFFSET);
 	}
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_DCB, "flags = 0x%x\n", p_app->flags);
 }
 
-static enum _ecore_status_t
+static void
 ecore_dcbx_set_local_params(struct ecore_hwfn *p_hwfn,
 			    struct dcbx_local_params *local_admin,
 			    struct ecore_dcbx_set *params)
@@ -1164,6 +1155,9 @@ ecore_dcbx_set_local_params(struct ecore_hwfn *p_hwfn,
 		local_admin->config = DCBX_CONFIG_VERSION_DISABLED;
 	}
 
+	DP_VERBOSE(p_hwfn, ECORE_MSG_DCB, "Dcbx version = %d\n",
+		   local_admin->config);
+
 	if (params->override_flags & ECORE_DCBX_OVERRIDE_PFC_CFG)
 		ecore_dcbx_set_pfc_data(p_hwfn, &local_admin->features.pfc,
 					&params->config.params);
@@ -1175,8 +1169,6 @@ ecore_dcbx_set_local_params(struct ecore_hwfn *p_hwfn,
 	if (params->override_flags & ECORE_DCBX_OVERRIDE_APP_CFG)
 		ecore_dcbx_set_app_data(p_hwfn, &local_admin->features.app,
 					&params->config.params, ieee);
-
-	return ECORE_SUCCESS;
 }
 
 static enum _ecore_status_t
@@ -1206,6 +1198,12 @@ ecore_dcbx_set_dscp_params(struct ecore_hwfn *p_hwfn,
 	p_hwfn->p_dcbx_info->dscp_nig_update = true;
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_DCB, "flags = 0x%x\n", p_dscp_map->flags);
+	DP_VERBOSE(p_hwfn, ECORE_MSG_DCB,
+		   "pri_map[] = 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+		   p_dscp_map->dscp_pri_map[0], p_dscp_map->dscp_pri_map[1],
+		   p_dscp_map->dscp_pri_map[2], p_dscp_map->dscp_pri_map[3],
+		   p_dscp_map->dscp_pri_map[4], p_dscp_map->dscp_pri_map[5],
+		   p_dscp_map->dscp_pri_map[6], p_dscp_map->dscp_pri_map[7]);
 
 	return ECORE_SUCCESS;
 }
@@ -1221,15 +1219,10 @@ enum _ecore_status_t ecore_dcbx_config_params(struct ecore_hwfn *p_hwfn,
 	u32 resp = 0, param = 0;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 
-	if (!hw_commit) {
-		OSAL_MEMCPY(&p_hwfn->p_dcbx_info->set, params,
-			    sizeof(p_hwfn->p_dcbx_info->set));
+	OSAL_MEMCPY(&p_hwfn->p_dcbx_info->set, params,
+		    sizeof(p_hwfn->p_dcbx_info->set));
+	if (!hw_commit)
 		return ECORE_SUCCESS;
-	}
-
-	/* clear set-parmas cache */
-	OSAL_MEMSET(&p_hwfn->p_dcbx_info->set, 0,
-		    sizeof(struct ecore_dcbx_set));
 
 	OSAL_MEMSET(&local_admin, 0, sizeof(local_admin));
 	ecore_dcbx_set_local_params(p_hwfn, &local_admin, params);
@@ -1253,12 +1246,10 @@ enum _ecore_status_t ecore_dcbx_config_params(struct ecore_hwfn *p_hwfn,
 	}
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_DCBX,
-			   1 << DRV_MB_PARAM_LLDP_SEND_SHIFT, &resp, &param);
-	if (rc != ECORE_SUCCESS) {
+			   1 << DRV_MB_PARAM_LLDP_SEND_OFFSET, &resp, &param);
+	if (rc != ECORE_SUCCESS)
 		DP_NOTICE(p_hwfn, false,
 			  "Failed to send DCBX update request\n");
-		return rc;
-	}
 
 	return rc;
 }
@@ -1277,10 +1268,8 @@ enum _ecore_status_t ecore_dcbx_get_config_params(struct ecore_hwfn *p_hwfn,
 
 	dcbx_info = OSAL_ALLOC(p_hwfn->p_dev, GFP_KERNEL,
 			       sizeof(*dcbx_info));
-	if (!dcbx_info) {
-		DP_ERR(p_hwfn, "Failed to allocate struct ecore_dcbx_info\n");
+	if (!dcbx_info)
 		return ECORE_NOMEM;
-	}
 
 	OSAL_MEMSET(dcbx_info, 0, sizeof(*dcbx_info));
 	rc = ecore_dcbx_query_params(p_hwfn, dcbx_info,
@@ -1300,9 +1289,12 @@ enum _ecore_status_t ecore_dcbx_get_config_params(struct ecore_hwfn *p_hwfn,
 		p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_STATIC;
 
 	p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled;
+	OSAL_MEMCPY(&p_hwfn->p_dcbx_info->set.dscp,
+		    &p_hwfn->p_dcbx_info->get.dscp,
+		    sizeof(struct ecore_dcbx_dscp_params));
 	OSAL_MEMCPY(&p_hwfn->p_dcbx_info->set.config.params,
 		    &dcbx_info->operational.params,
-		    sizeof(struct ecore_dcbx_admin_params));
+		    sizeof(p_hwfn->p_dcbx_info->set.config.params));
 	p_hwfn->p_dcbx_info->set.config.valid = true;
 
 	OSAL_MEMCPY(params, &p_hwfn->p_dcbx_info->set,
@@ -1312,3 +1304,230 @@ enum _ecore_status_t ecore_dcbx_get_config_params(struct ecore_hwfn *p_hwfn,
 
 	return ECORE_SUCCESS;
 }
+
+enum _ecore_status_t ecore_lldp_register_tlv(struct ecore_hwfn *p_hwfn,
+					     struct ecore_ptt *p_ptt,
+					     enum ecore_lldp_agent agent,
+					     u8 tlv_type)
+{
+	u32 mb_param = 0, mcp_resp = 0, mcp_param = 0, val = 0;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
+
+	switch (agent) {
+	case ECORE_LLDP_NEAREST_BRIDGE:
+		val = LLDP_NEAREST_BRIDGE;
+		break;
+	case ECORE_LLDP_NEAREST_NON_TPMR_BRIDGE:
+		val = LLDP_NEAREST_NON_TPMR_BRIDGE;
+		break;
+	case ECORE_LLDP_NEAREST_CUSTOMER_BRIDGE:
+		val = LLDP_NEAREST_CUSTOMER_BRIDGE;
+		break;
+	default:
+		DP_ERR(p_hwfn, "Invalid agent type %d\n", agent);
+		return ECORE_INVAL;
+	}
+
+	SET_MFW_FIELD(mb_param, DRV_MB_PARAM_LLDP_AGENT, val);
+	SET_MFW_FIELD(mb_param, DRV_MB_PARAM_LLDP_TLV_RX_TYPE, tlv_type);
+
+	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_REGISTER_LLDP_TLVS_RX,
+			   mb_param, &mcp_resp, &mcp_param);
+	if (rc != ECORE_SUCCESS)
+		DP_NOTICE(p_hwfn, false, "Failed to register TLV\n");
+
+	return rc;
+}
+
+enum _ecore_status_t
+ecore_lldp_mib_update_event(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
+{
+	struct ecore_dcbx_mib_meta_data data;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
+	struct lldp_received_tlvs_s tlvs;
+	int i;
+
+	for (i = 0; i < LLDP_MAX_LLDP_AGENTS; i++) {
+		OSAL_MEM_ZERO(&data, sizeof(data));
+		data.addr = p_hwfn->mcp_info->port_addr +
+			    offsetof(struct public_port, lldp_received_tlvs[i]);
+		data.lldp_tlvs = &tlvs;
+		data.size = sizeof(tlvs);
+		rc = ecore_dcbx_copy_mib(p_hwfn, p_ptt, &data,
+					 ECORE_DCBX_LLDP_TLVS);
+		if (rc != ECORE_SUCCESS) {
+			DP_NOTICE(p_hwfn, false, "Failed to read lldp TLVs\n");
+			return rc;
+		}
+
+		if (!tlvs.length)
+			continue;
+
+		for (i = 0; i < MAX_TLV_BUFFER; i++)
+			tlvs.tlvs_buffer[i] =
+				OSAL_CPU_TO_BE32(tlvs.tlvs_buffer[i]);
+
+		OSAL_LLDP_RX_TLVS(p_hwfn, tlvs.tlvs_buffer, tlvs.length);
+	}
+
+	return rc;
+}
+
+enum _ecore_status_t
+ecore_lldp_get_params(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+		      struct ecore_lldp_config_params *p_params)
+{
+	struct lldp_config_params_s lldp_params;
+	u32 addr, val;
+	int i;
+
+	switch (p_params->agent) {
+	case ECORE_LLDP_NEAREST_BRIDGE:
+		val = LLDP_NEAREST_BRIDGE;
+		break;
+	case ECORE_LLDP_NEAREST_NON_TPMR_BRIDGE:
+		val = LLDP_NEAREST_NON_TPMR_BRIDGE;
+		break;
+	case ECORE_LLDP_NEAREST_CUSTOMER_BRIDGE:
+		val = LLDP_NEAREST_CUSTOMER_BRIDGE;
+		break;
+	default:
+		DP_ERR(p_hwfn, "Invalid agent type %d\n", p_params->agent);
+		return ECORE_INVAL;
+	}
+
+	addr = p_hwfn->mcp_info->port_addr +
+			offsetof(struct public_port, lldp_config_params[val]);
+
+	ecore_memcpy_from(p_hwfn, p_ptt, &lldp_params, addr,
+			  sizeof(lldp_params));
+
+	p_params->tx_interval = GET_MFW_FIELD(lldp_params.config,
+					      LLDP_CONFIG_TX_INTERVAL);
+	p_params->tx_hold = GET_MFW_FIELD(lldp_params.config, LLDP_CONFIG_HOLD);
+	p_params->tx_credit = GET_MFW_FIELD(lldp_params.config,
+					    LLDP_CONFIG_MAX_CREDIT);
+	p_params->rx_enable = GET_MFW_FIELD(lldp_params.config,
+					    LLDP_CONFIG_ENABLE_RX);
+	p_params->tx_enable = GET_MFW_FIELD(lldp_params.config,
+					    LLDP_CONFIG_ENABLE_TX);
+
+	OSAL_MEMCPY(p_params->chassis_id_tlv, lldp_params.local_chassis_id,
+		    sizeof(p_params->chassis_id_tlv));
+	for (i = 0; i < ECORE_LLDP_CHASSIS_ID_STAT_LEN; i++)
+		p_params->chassis_id_tlv[i] =
+				OSAL_BE32_TO_CPU(p_params->chassis_id_tlv[i]);
+
+	OSAL_MEMCPY(p_params->port_id_tlv, lldp_params.local_port_id,
+		    sizeof(p_params->port_id_tlv));
+	for (i = 0; i < ECORE_LLDP_PORT_ID_STAT_LEN; i++)
+		p_params->port_id_tlv[i] =
+				OSAL_BE32_TO_CPU(p_params->port_id_tlv[i]);
+
+	return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t
+ecore_lldp_set_params(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+		      struct ecore_lldp_config_params *p_params)
+{
+	u32 mb_param = 0, mcp_resp = 0, mcp_param = 0;
+	struct lldp_config_params_s lldp_params;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
+	u32 addr, val;
+	int i;
+
+	switch (p_params->agent) {
+	case ECORE_LLDP_NEAREST_BRIDGE:
+		val = LLDP_NEAREST_BRIDGE;
+		break;
+	case ECORE_LLDP_NEAREST_NON_TPMR_BRIDGE:
+		val = LLDP_NEAREST_NON_TPMR_BRIDGE;
+		break;
+	case ECORE_LLDP_NEAREST_CUSTOMER_BRIDGE:
+		val = LLDP_NEAREST_CUSTOMER_BRIDGE;
+		break;
+	default:
+		DP_ERR(p_hwfn, "Invalid agent type %d\n", p_params->agent);
+		return ECORE_INVAL;
+	}
+
+	SET_MFW_FIELD(mb_param, DRV_MB_PARAM_LLDP_AGENT, val);
+	addr = p_hwfn->mcp_info->port_addr +
+			offsetof(struct public_port, lldp_config_params[val]);
+
+	OSAL_MEMSET(&lldp_params, 0, sizeof(lldp_params));
+	SET_MFW_FIELD(lldp_params.config, LLDP_CONFIG_TX_INTERVAL,
+		      p_params->tx_interval);
+	SET_MFW_FIELD(lldp_params.config, LLDP_CONFIG_HOLD, p_params->tx_hold);
+	SET_MFW_FIELD(lldp_params.config, LLDP_CONFIG_MAX_CREDIT,
+		      p_params->tx_credit);
+	SET_MFW_FIELD(lldp_params.config, LLDP_CONFIG_ENABLE_RX,
+		      !!p_params->rx_enable);
+	SET_MFW_FIELD(lldp_params.config, LLDP_CONFIG_ENABLE_TX,
+		      !!p_params->tx_enable);
+
+	for (i = 0; i < ECORE_LLDP_CHASSIS_ID_STAT_LEN; i++)
+		p_params->chassis_id_tlv[i] =
+				OSAL_CPU_TO_BE32(p_params->chassis_id_tlv[i]);
+	OSAL_MEMCPY(lldp_params.local_chassis_id, p_params->chassis_id_tlv,
+		    sizeof(lldp_params.local_chassis_id));
+
+	for (i = 0; i < ECORE_LLDP_PORT_ID_STAT_LEN; i++)
+		p_params->port_id_tlv[i] =
+				OSAL_CPU_TO_BE32(p_params->port_id_tlv[i]);
+	OSAL_MEMCPY(lldp_params.local_port_id, p_params->port_id_tlv,
+		    sizeof(lldp_params.local_port_id));
+
+	ecore_memcpy_to(p_hwfn, p_ptt, addr, &lldp_params, sizeof(lldp_params));
+
+	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_LLDP,
+			   mb_param, &mcp_resp, &mcp_param);
+	if (rc != ECORE_SUCCESS)
+		DP_NOTICE(p_hwfn, false, "SET_LLDP failed, error = %d\n", rc);
+
+	return rc;
+}
+
+enum _ecore_status_t
+ecore_lldp_set_system_tlvs(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			   struct ecore_lldp_sys_tlvs *p_params)
+{
+	u32 mb_param = 0, mcp_resp = 0, mcp_param = 0;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
+	struct lldp_system_tlvs_buffer_s lld_tlv_buf;
+	u32 addr, *p_val;
+	u8 len;
+	int i;
+
+	p_val = (u32 *)p_params->buf;
+	for (i = 0; i < ECORE_LLDP_SYS_TLV_SIZE / 4; i++)
+		p_val[i] = OSAL_CPU_TO_BE32(p_val[i]);
+
+	OSAL_MEMSET(&lld_tlv_buf, 0, sizeof(lld_tlv_buf));
+	SET_MFW_FIELD(lld_tlv_buf.flags, LLDP_SYSTEM_TLV_VALID, 1);
+	SET_MFW_FIELD(lld_tlv_buf.flags, LLDP_SYSTEM_TLV_MANDATORY,
+		      !!p_params->discard_mandatory_tlv);
+	SET_MFW_FIELD(lld_tlv_buf.flags, LLDP_SYSTEM_TLV_LENGTH,
+		      p_params->buf_size);
+	len = ECORE_LLDP_SYS_TLV_SIZE / 2;
+	OSAL_MEMCPY(lld_tlv_buf.data, p_params->buf, len);
+
+	addr = p_hwfn->mcp_info->port_addr +
+		offsetof(struct public_port, system_lldp_tlvs_buf);
+	ecore_memcpy_to(p_hwfn, p_ptt, addr, &lld_tlv_buf, sizeof(lld_tlv_buf));
+
+	if  (p_params->buf_size > len) {
+		addr = p_hwfn->mcp_info->port_addr +
+			offsetof(struct public_port, system_lldp_tlvs_buf2);
+		ecore_memcpy_to(p_hwfn, p_ptt, addr, &p_params->buf[len],
+				ECORE_LLDP_SYS_TLV_SIZE / 2);
+	}
+
+	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_LLDP,
+			   mb_param, &mcp_resp, &mcp_param);
+	if (rc != ECORE_SUCCESS)
+		DP_NOTICE(p_hwfn, false, "SET_LLDP failed, error = %d\n", rc);
+
+	return rc;
+}
diff --git a/drivers/net/qede/base/ecore_dcbx.h b/drivers/net/qede/base/ecore_dcbx.h
index eba2d91b..469e42dd 100644
--- a/drivers/net/qede/base/ecore_dcbx.h
+++ b/drivers/net/qede/base/ecore_dcbx.h
@@ -17,6 +17,8 @@
 #include "ecore_hsi_common.h"
 #include "ecore_dcbx_api.h"
 
+#define ECORE_DCBX_DSCP_DISABLED 0XFF
+
 struct ecore_dcbx_info {
 	struct lldp_status_params_s lldp_remote[LLDP_MAX_LLDP_AGENTS];
 	struct lldp_config_params_s lldp_local[LLDP_MAX_LLDP_AGENTS];
@@ -35,6 +37,7 @@ struct ecore_dcbx_info {
 struct ecore_dcbx_mib_meta_data {
 	struct lldp_config_params_s *lldp_local;
 	struct lldp_status_params_s *lldp_remote;
+	struct lldp_received_tlvs_s *lldp_tlvs;
 	struct dcbx_local_params *local_admin;
 	struct dcb_dscp_map *dscp_map;
 	struct dcbx_mib *mib;
@@ -47,11 +50,15 @@ enum _ecore_status_t
 ecore_dcbx_mib_update_event(struct ecore_hwfn *, struct ecore_ptt *,
 			    enum ecore_mib_read_type);
 
-enum _ecore_status_t ecore_dcbx_read_lldp_params(struct ecore_hwfn *,
-						 struct ecore_ptt *);
 enum _ecore_status_t ecore_dcbx_info_alloc(struct ecore_hwfn *p_hwfn);
-void ecore_dcbx_info_free(struct ecore_hwfn *, struct ecore_dcbx_info *);
+void ecore_dcbx_info_free(struct ecore_hwfn *p_hwfn);
 void ecore_dcbx_set_pf_update_params(struct ecore_dcbx_results *p_src,
 				     struct pf_update_ramrod_data *p_dest);
 
+/* Returns TOS value for a given priority */
+u8 ecore_dcbx_get_dscp_value(struct ecore_hwfn *p_hwfn, u8 pri);
+
+enum _ecore_status_t
+ecore_lldp_mib_update_event(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt);
+
 #endif /* __ECORE_DCBX_H__ */
diff --git a/drivers/net/qede/base/ecore_dcbx_api.h b/drivers/net/qede/base/ecore_dcbx_api.h
index 2dc76796..9ff4df4c 100644
--- a/drivers/net/qede/base/ecore_dcbx_api.h
+++ b/drivers/net/qede/base/ecore_dcbx_api.h
@@ -18,7 +18,8 @@ enum ecore_mib_read_type {
 	ECORE_DCBX_REMOTE_MIB,
 	ECORE_DCBX_LOCAL_MIB,
 	ECORE_DCBX_REMOTE_LLDP_MIB,
-	ECORE_DCBX_LOCAL_LLDP_MIB
+	ECORE_DCBX_LOCAL_LLDP_MIB,
+	ECORE_DCBX_LLDP_TLVS
 };
 
 struct ecore_dcbx_app_data {
@@ -101,7 +102,6 @@ struct ecore_dcbx_params {
 	bool	ets_willing;
 	bool	ets_enabled;
 	bool	ets_cbs;
-	bool	valid;          /* Indicate validity of params */
 	u8	ets_pri_tc_tbl[ECORE_MAX_PFC_PRIORITIES];
 	u8	ets_tc_bw_tbl[ECORE_MAX_PFC_PRIORITIES];
 	u8	ets_tc_tsa_tbl[ECORE_MAX_PFC_PRIORITIES];
@@ -175,6 +175,31 @@ struct ecore_dcbx_app_metadata {
 	enum ecore_pci_personality personality;
 };
 
+enum ecore_lldp_agent {
+	ECORE_LLDP_NEAREST_BRIDGE = 0,
+	ECORE_LLDP_NEAREST_NON_TPMR_BRIDGE,
+	ECORE_LLDP_NEAREST_CUSTOMER_BRIDGE,
+	ECORE_LLDP_MAX_AGENTS
+};
+
+struct ecore_lldp_config_params {
+	enum ecore_lldp_agent agent;
+	u8 tx_interval;
+	u8 tx_hold;
+	u8 tx_credit;
+	bool rx_enable;
+	bool tx_enable;
+	u32 chassis_id_tlv[ECORE_LLDP_CHASSIS_ID_STAT_LEN];
+	u32 port_id_tlv[ECORE_LLDP_PORT_ID_STAT_LEN];
+};
+
+#define ECORE_LLDP_SYS_TLV_SIZE 256
+struct ecore_lldp_sys_tlvs {
+	bool discard_mandatory_tlv;
+	u8 buf[ECORE_LLDP_SYS_TLV_SIZE];
+	u16 buf_size;
+};
+
 enum _ecore_status_t ecore_dcbx_query_params(struct ecore_hwfn *,
 					     struct ecore_dcbx_get *,
 					     enum ecore_mib_read_type);
@@ -187,6 +212,23 @@ enum _ecore_status_t ecore_dcbx_config_params(struct ecore_hwfn *,
 					      struct ecore_dcbx_set *,
 					      bool);
 
+enum _ecore_status_t ecore_lldp_register_tlv(struct ecore_hwfn *p_hwfn,
+					     struct ecore_ptt *p_ptt,
+					     enum ecore_lldp_agent agent,
+					     u8 tlv_type);
+
+enum _ecore_status_t
+ecore_lldp_get_params(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+		      struct ecore_lldp_config_params *p_params);
+
+enum _ecore_status_t
+ecore_lldp_set_params(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+		      struct ecore_lldp_config_params *p_params);
+
+enum _ecore_status_t
+ecore_lldp_set_system_tlvs(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			   struct ecore_lldp_sys_tlvs *p_params);
+
 static const struct ecore_dcbx_app_metadata ecore_dcbx_app_update[] = {
 	{DCBX_PROTOCOL_ISCSI, "ISCSI", ECORE_PCI_ISCSI},
 	{DCBX_PROTOCOL_FCOE, "FCOE", ECORE_PCI_FCOE},
diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c
index 65b89b8f..da1830ce 100644
--- a/drivers/net/qede/base/ecore_dev.c
+++ b/drivers/net/qede/base/ecore_dev.c
@@ -28,7 +28,6 @@
 #include "mcp_public.h"
 #include "ecore_iro.h"
 #include "nvm_cfg.h"
-#include "ecore_dev_api.h"
 #include "ecore_dcbx.h"
 #include "ecore_l2.h"
 
@@ -42,6 +41,318 @@
 static osal_spinlock_t qm_lock;
 static bool qm_lock_init;
 
+/******************** Doorbell Recovery *******************/
+/* The doorbell recovery mechanism consists of a list of entries which represent
+ * doorbelling entities (l2 queues, roce sq/rq/cqs, the slowpath spq, etc). Each
+ * entity needs to register with the mechanism and provide the parameters
+ * describing it's doorbell, including a location where last used doorbell data
+ * can be found. The doorbell execute function will traverse the list and
+ * doorbell all of the registered entries.
+ */
+struct ecore_db_recovery_entry {
+	osal_list_entry_t	list_entry;
+	void OSAL_IOMEM		*db_addr;
+	void			*db_data;
+	enum ecore_db_rec_width	db_width;
+	enum ecore_db_rec_space	db_space;
+	u8			hwfn_idx;
+};
+
+/* display a single doorbell recovery entry */
+void ecore_db_recovery_dp_entry(struct ecore_hwfn *p_hwfn,
+				struct ecore_db_recovery_entry *db_entry,
+				const char *action)
+{
+	DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "(%s: db_entry %p, addr %p, data %p, width %s, %s space, hwfn %d)\n",
+		   action, db_entry, db_entry->db_addr, db_entry->db_data,
+		   db_entry->db_width == DB_REC_WIDTH_32B ? "32b" : "64b",
+		   db_entry->db_space == DB_REC_USER ? "user" : "kernel",
+		   db_entry->hwfn_idx);
+}
+
+/* doorbell address sanity (address within doorbell bar range) */
+bool ecore_db_rec_sanity(struct ecore_dev *p_dev, void OSAL_IOMEM *db_addr,
+			 void *db_data)
+{
+	/* make sure doorbell address  is within the doorbell bar */
+	if (db_addr < p_dev->doorbells || (u8 *)db_addr >
+			(u8 *)p_dev->doorbells + p_dev->db_size) {
+		OSAL_WARN(true,
+			  "Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n",
+			  db_addr, p_dev->doorbells,
+			  (u8 *)p_dev->doorbells + p_dev->db_size);
+		return false;
+	}
+
+	/* make sure doorbell data pointer is not null */
+	if (!db_data) {
+		OSAL_WARN(true, "Illegal doorbell data pointer: %p", db_data);
+		return false;
+	}
+
+	return true;
+}
+
+/* find hwfn according to the doorbell address */
+struct ecore_hwfn *ecore_db_rec_find_hwfn(struct ecore_dev *p_dev,
+					  void OSAL_IOMEM *db_addr)
+{
+	struct ecore_hwfn *p_hwfn;
+
+	/* In CMT doorbell bar is split down the middle between engine 0 and
+	 * enigne 1
+	 */
+	if (ECORE_IS_CMT(p_dev))
+		p_hwfn = db_addr < p_dev->hwfns[1].doorbells ?
+			&p_dev->hwfns[0] : &p_dev->hwfns[1];
+	else
+		p_hwfn = ECORE_LEADING_HWFN(p_dev);
+
+	return p_hwfn;
+}
+
+/* add a new entry to the doorbell recovery mechanism */
+enum _ecore_status_t ecore_db_recovery_add(struct ecore_dev *p_dev,
+					   void OSAL_IOMEM *db_addr,
+					   void *db_data,
+					   enum ecore_db_rec_width db_width,
+					   enum ecore_db_rec_space db_space)
+{
+	struct ecore_db_recovery_entry *db_entry;
+	struct ecore_hwfn *p_hwfn;
+
+	/* shortcircuit VFs, for now */
+	if (IS_VF(p_dev)) {
+		DP_VERBOSE(p_dev, ECORE_MSG_IOV, "db recovery - skipping VF doorbell\n");
+		return ECORE_SUCCESS;
+	}
+
+	/* sanitize doorbell address */
+	if (!ecore_db_rec_sanity(p_dev, db_addr, db_data))
+		return ECORE_INVAL;
+
+	/* obtain hwfn from doorbell address */
+	p_hwfn = ecore_db_rec_find_hwfn(p_dev, db_addr);
+
+	/* create entry */
+	db_entry = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL, sizeof(*db_entry));
+	if (!db_entry) {
+		DP_NOTICE(p_dev, false, "Failed to allocate a db recovery entry\n");
+		return ECORE_NOMEM;
+	}
+
+	/* populate entry */
+	db_entry->db_addr = db_addr;
+	db_entry->db_data = db_data;
+	db_entry->db_width = db_width;
+	db_entry->db_space = db_space;
+	db_entry->hwfn_idx = p_hwfn->my_id;
+
+	/* display */
+	ecore_db_recovery_dp_entry(p_hwfn, db_entry, "Adding");
+
+	/* protect the list */
+	OSAL_SPIN_LOCK(&p_hwfn->db_recovery_info.lock);
+	OSAL_LIST_PUSH_TAIL(&db_entry->list_entry,
+			    &p_hwfn->db_recovery_info.list);
+	OSAL_SPIN_UNLOCK(&p_hwfn->db_recovery_info.lock);
+
+	return ECORE_SUCCESS;
+}
+
+/* remove an entry from the doorbell recovery mechanism */
+enum _ecore_status_t ecore_db_recovery_del(struct ecore_dev *p_dev,
+					   void OSAL_IOMEM *db_addr,
+					   void *db_data)
+{
+	struct ecore_db_recovery_entry *db_entry = OSAL_NULL;
+	enum _ecore_status_t rc = ECORE_INVAL;
+	struct ecore_hwfn *p_hwfn;
+
+	/* shortcircuit VFs, for now */
+	if (IS_VF(p_dev)) {
+		DP_VERBOSE(p_dev, ECORE_MSG_IOV, "db recovery - skipping VF doorbell\n");
+		return ECORE_SUCCESS;
+	}
+
+	/* sanitize doorbell address */
+	if (!ecore_db_rec_sanity(p_dev, db_addr, db_data))
+		return ECORE_INVAL;
+
+	/* obtain hwfn from doorbell address */
+	p_hwfn = ecore_db_rec_find_hwfn(p_dev, db_addr);
+
+	/* protect the list */
+	OSAL_SPIN_LOCK(&p_hwfn->db_recovery_info.lock);
+	OSAL_LIST_FOR_EACH_ENTRY(db_entry,
+				 &p_hwfn->db_recovery_info.list,
+				 list_entry,
+				 struct ecore_db_recovery_entry) {
+		/* search according to db_data addr since db_addr is not unique
+		 * (roce)
+		 */
+		if (db_entry->db_data == db_data) {
+			ecore_db_recovery_dp_entry(p_hwfn, db_entry,
+						   "Deleting");
+			OSAL_LIST_REMOVE_ENTRY(&db_entry->list_entry,
+					       &p_hwfn->db_recovery_info.list);
+			rc = ECORE_SUCCESS;
+			break;
+		}
+	}
+
+	OSAL_SPIN_UNLOCK(&p_hwfn->db_recovery_info.lock);
+
+	if (rc == ECORE_INVAL)
+		/*OSAL_WARN(true,*/
+		DP_NOTICE(p_hwfn, false,
+			  "Failed to find element in list. Key (db_data addr) was %p. db_addr was %p\n",
+			  db_data, db_addr);
+	else
+		OSAL_FREE(p_dev, db_entry);
+
+	return rc;
+}
+
+/* initialize the doorbell recovery mechanism */
+enum _ecore_status_t ecore_db_recovery_setup(struct ecore_hwfn *p_hwfn)
+{
+	DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "Setting up db recovery\n");
+
+	/* make sure db_size was set in p_dev */
+	if (!p_hwfn->p_dev->db_size) {
+		DP_ERR(p_hwfn->p_dev, "db_size not set\n");
+		return ECORE_INVAL;
+	}
+
+	OSAL_LIST_INIT(&p_hwfn->db_recovery_info.list);
+#ifdef CONFIG_ECORE_LOCK_ALLOC
+	OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_hwfn->db_recovery_info.lock);
+#endif
+	OSAL_SPIN_LOCK_INIT(&p_hwfn->db_recovery_info.lock);
+	p_hwfn->db_recovery_info.db_recovery_counter = 0;
+
+	return ECORE_SUCCESS;
+}
+
+/* destroy the doorbell recovery mechanism */
+void ecore_db_recovery_teardown(struct ecore_hwfn *p_hwfn)
+{
+	struct ecore_db_recovery_entry *db_entry = OSAL_NULL;
+
+	DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "Tearing down db recovery\n");
+	if (!OSAL_LIST_IS_EMPTY(&p_hwfn->db_recovery_info.list)) {
+		DP_VERBOSE(p_hwfn, false, "Doorbell Recovery teardown found the doorbell recovery list was not empty (Expected in disorderly driver unload (e.g. recovery) otherwise this probably means some flow forgot to db_recovery_del). Prepare to purge doorbell recovery list...\n");
+		while (!OSAL_LIST_IS_EMPTY(&p_hwfn->db_recovery_info.list)) {
+			db_entry = OSAL_LIST_FIRST_ENTRY(
+						&p_hwfn->db_recovery_info.list,
+						struct ecore_db_recovery_entry,
+						list_entry);
+			ecore_db_recovery_dp_entry(p_hwfn, db_entry, "Purging");
+			OSAL_LIST_REMOVE_ENTRY(&db_entry->list_entry,
+					       &p_hwfn->db_recovery_info.list);
+			OSAL_FREE(p_hwfn->p_dev, db_entry);
+		}
+	}
+#ifdef CONFIG_ECORE_LOCK_ALLOC
+	OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->db_recovery_info.lock);
+#endif
+	p_hwfn->db_recovery_info.db_recovery_counter = 0;
+}
+
+/* print the content of the doorbell recovery mechanism */
+void ecore_db_recovery_dp(struct ecore_hwfn *p_hwfn)
+{
+	struct ecore_db_recovery_entry *db_entry = OSAL_NULL;
+
+	DP_NOTICE(p_hwfn, false,
+		  "Dispalying doorbell recovery database. Counter was %d\n",
+		  p_hwfn->db_recovery_info.db_recovery_counter);
+
+	/* protect the list */
+	OSAL_SPIN_LOCK(&p_hwfn->db_recovery_info.lock);
+	OSAL_LIST_FOR_EACH_ENTRY(db_entry,
+				 &p_hwfn->db_recovery_info.list,
+				 list_entry,
+				 struct ecore_db_recovery_entry) {
+		ecore_db_recovery_dp_entry(p_hwfn, db_entry, "Printing");
+	}
+
+	OSAL_SPIN_UNLOCK(&p_hwfn->db_recovery_info.lock);
+}
+
+/* ring the doorbell of a single doorbell recovery entry */
+void ecore_db_recovery_ring(struct ecore_hwfn *p_hwfn,
+			    struct ecore_db_recovery_entry *db_entry,
+			    enum ecore_db_rec_exec db_exec)
+{
+	/* Print according to width */
+	if (db_entry->db_width == DB_REC_WIDTH_32B)
+		DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "%s doorbell address %p data %x\n",
+			   db_exec == DB_REC_DRY_RUN ? "would have rung" : "ringing",
+			   db_entry->db_addr, *(u32 *)db_entry->db_data);
+	else
+		DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "%s doorbell address %p data %lx\n",
+			   db_exec == DB_REC_DRY_RUN ? "would have rung" : "ringing",
+			   db_entry->db_addr,
+			   *(unsigned long *)(db_entry->db_data));
+
+	/* Sanity */
+	if (!ecore_db_rec_sanity(p_hwfn->p_dev, db_entry->db_addr,
+				 db_entry->db_data))
+		return;
+
+	/* Flush the write combined buffer. Since there are multiple doorbelling
+	 * entities using the same address, if we don't flush, a transaction
+	 * could be lost.
+	 */
+	OSAL_WMB(p_hwfn->p_dev);
+
+	/* Ring the doorbell */
+	if (db_exec == DB_REC_REAL_DEAL || db_exec == DB_REC_ONCE) {
+		if (db_entry->db_width == DB_REC_WIDTH_32B)
+			DIRECT_REG_WR(p_hwfn, db_entry->db_addr,
+				      *(u32 *)(db_entry->db_data));
+		else
+			DIRECT_REG_WR64(p_hwfn, db_entry->db_addr,
+					*(u64 *)(db_entry->db_data));
+	}
+
+	/* Flush the write combined buffer. Next doorbell may come from a
+	 * different entity to the same address...
+	 */
+	OSAL_WMB(p_hwfn->p_dev);
+}
+
+/* traverse the doorbell recovery entry list and ring all the doorbells */
+void ecore_db_recovery_execute(struct ecore_hwfn *p_hwfn,
+			       enum ecore_db_rec_exec db_exec)
+{
+	struct ecore_db_recovery_entry *db_entry = OSAL_NULL;
+
+	if (db_exec != DB_REC_ONCE) {
+		DP_NOTICE(p_hwfn, false, "Executing doorbell recovery. Counter was %d\n",
+			  p_hwfn->db_recovery_info.db_recovery_counter);
+
+		/* track amount of times recovery was executed */
+		p_hwfn->db_recovery_info.db_recovery_counter++;
+	}
+
+	/* protect the list */
+	OSAL_SPIN_LOCK(&p_hwfn->db_recovery_info.lock);
+	OSAL_LIST_FOR_EACH_ENTRY(db_entry,
+				 &p_hwfn->db_recovery_info.list,
+				 list_entry,
+				 struct ecore_db_recovery_entry) {
+		ecore_db_recovery_ring(p_hwfn, db_entry, db_exec);
+		if (db_exec == DB_REC_ONCE)
+			break;
+	}
+
+	OSAL_SPIN_UNLOCK(&p_hwfn->db_recovery_info.lock);
+}
+/******************** Doorbell Recovery end ****************/
+
 /* Configurable */
 #define ECORE_MIN_DPIS		(4)	/* The minimal num of DPIs required to
 					 * load the driver. The number was
@@ -49,28 +360,20 @@ static bool qm_lock_init;
 					 */
 
 /* Derived */
-#define ECORE_MIN_PWM_REGION	((ECORE_WID_SIZE) * (ECORE_MIN_DPIS))
-
-enum BAR_ID {
-	BAR_ID_0,		/* used for GRC */
-	BAR_ID_1		/* Used for doorbells */
-};
+#define ECORE_MIN_PWM_REGION	(ECORE_WID_SIZE * ECORE_MIN_DPIS)
 
-static u32 ecore_hw_bar_size(struct ecore_hwfn *p_hwfn, enum BAR_ID bar_id)
+static u32 ecore_hw_bar_size(struct ecore_hwfn *p_hwfn,
+			     struct ecore_ptt *p_ptt,
+			     enum BAR_ID bar_id)
 {
 	u32 bar_reg = (bar_id == BAR_ID_0 ?
 		       PGLUE_B_REG_PF_BAR0_SIZE : PGLUE_B_REG_PF_BAR1_SIZE);
 	u32 val;
 
-	if (IS_VF(p_hwfn->p_dev)) {
-		/* TODO - assume each VF hwfn has 64Kb for Bar0; Bar1 can be
-		 * read from actual register, but we're currently not using
-		 * it for actual doorbelling.
-		 */
-		return 1 << 17;
-	}
+	if (IS_VF(p_hwfn->p_dev))
+		return ecore_vf_hw_bar_size(p_hwfn, bar_id);
 
-	val = ecore_rd(p_hwfn, p_hwfn->p_main_ptt, bar_reg);
+	val = ecore_rd(p_hwfn, p_ptt, bar_reg);
 	if (val)
 		return 1 << (val + 15);
 
@@ -78,15 +381,13 @@ static u32 ecore_hw_bar_size(struct ecore_hwfn *p_hwfn, enum BAR_ID bar_id)
 	 * they were found to be useful MFW started updating them from 8.7.7.0.
 	 * In older MFW versions they are set to 0 which means disabled.
 	 */
-	if (p_hwfn->p_dev->num_hwfns > 1) {
-		DP_NOTICE(p_hwfn, false,
-			  "BAR size not configured. Assuming BAR size of 256kB"
-			  " for GRC and 512kB for DB\n");
+	if (ECORE_IS_CMT(p_hwfn->p_dev)) {
+		DP_INFO(p_hwfn,
+			"BAR size not configured. Assuming BAR size of 256kB for GRC and 512kB for DB\n");
 		val = BAR_ID_0 ? 256 * 1024 : 512 * 1024;
 	} else {
-		DP_NOTICE(p_hwfn, false,
-			  "BAR size not configured. Assuming BAR size of 512kB"
-			  " for GRC and 512kB for DB\n");
+		DP_INFO(p_hwfn,
+			"BAR size not configured. Assuming BAR size of 512kB for GRC and 512kB for DB\n");
 		val = 512 * 1024;
 	}
 
@@ -121,7 +422,9 @@ void ecore_init_struct(struct ecore_dev *p_dev)
 		p_hwfn->my_id = i;
 		p_hwfn->b_active = false;
 
+#ifdef CONFIG_ECORE_LOCK_ALLOC
 		OSAL_MUTEX_ALLOC(p_hwfn, &p_hwfn->dmae_info.mutex);
+#endif
 		OSAL_MUTEX_INIT(&p_hwfn->dmae_info.mutex);
 	}
 
@@ -168,8 +471,11 @@ void ecore_resc_free(struct ecore_dev *p_dev)
 		ecore_iov_free(p_hwfn);
 		ecore_l2_free(p_hwfn);
 		ecore_dmae_info_free(p_hwfn);
-		ecore_dcbx_info_free(p_hwfn, p_hwfn->p_dcbx_info);
+		ecore_dcbx_info_free(p_hwfn);
 		/* @@@TBD Flush work-queue ? */
+
+		/* destroy doorbell recovery mechanism */
+		ecore_db_recovery_teardown(p_hwfn);
 	}
 }
 
@@ -307,7 +613,7 @@ static void ecore_init_qm_params(struct ecore_hwfn *p_hwfn)
 	qm_info->vport_wfq_en = 1;
 
 	/* TC config is different for AH 4 port */
-	four_port = p_hwfn->p_dev->num_ports_in_engines == MAX_NUM_PORTS_K2;
+	four_port = p_hwfn->p_dev->num_ports_in_engine == MAX_NUM_PORTS_K2;
 
 	/* in AH 4 port we have fewer TCs per port */
 	qm_info->max_phys_tcs_per_port = four_port ? NUM_PHYS_TCS_4PORT_K2 :
@@ -336,7 +642,7 @@ static void ecore_init_qm_vport_params(struct ecore_hwfn *p_hwfn)
 static void ecore_init_qm_port_params(struct ecore_hwfn *p_hwfn)
 {
 	/* Initialize qm port parameters */
-	u8 i, active_phys_tcs, num_ports = p_hwfn->p_dev->num_ports_in_engines;
+	u8 i, active_phys_tcs, num_ports = p_hwfn->p_dev->num_ports_in_engine;
 
 	/* indicate how ooo and high pri traffic is dealt with */
 	active_phys_tcs = num_ports == MAX_NUM_PORTS_K2 ?
@@ -348,7 +654,7 @@ static void ecore_init_qm_port_params(struct ecore_hwfn *p_hwfn)
 
 		p_qm_port->active = 1;
 		p_qm_port->active_phys_tcs = active_phys_tcs;
-		p_qm_port->num_pbf_cmd_lines = PBF_MAX_CMD_LINES / num_ports;
+		p_qm_port->num_pbf_cmd_lines = PBF_MAX_CMD_LINES_E4 / num_ports;
 		p_qm_port->num_btb_blocks = BTB_MAX_BLOCKS / num_ports;
 	}
 }
@@ -690,7 +996,7 @@ static void ecore_dp_init_qm_params(struct ecore_hwfn *p_hwfn)
 		   qm_info->num_pf_rls, ecore_get_pq_flags(p_hwfn));
 
 	/* port table */
-	for (i = 0; i < p_hwfn->p_dev->num_ports_in_engines; i++) {
+	for (i = 0; i < p_hwfn->p_dev->num_ports_in_engine; i++) {
 		port = &qm_info->qm_port_params[i];
 		DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
 			   "port idx %d, active %d, active_phys_tcs %d,"
@@ -777,7 +1083,7 @@ enum _ecore_status_t ecore_qm_reconf(struct ecore_hwfn *p_hwfn,
 	ecore_init_clear_rt_data(p_hwfn);
 
 	/* prepare QM portion of runtime array */
-	ecore_qm_init_pf(p_hwfn);
+	ecore_qm_init_pf(p_hwfn, p_ptt);
 
 	/* activate init tool on runtime array */
 	rc = ecore_init_run(p_hwfn, p_ptt, PHASE_QM_PF, p_hwfn->rel_pf_id,
@@ -819,7 +1125,7 @@ static enum _ecore_status_t ecore_alloc_qm_data(struct ecore_hwfn *p_hwfn)
 
 	qm_info->qm_port_params = OSAL_ZALLOC(p_hwfn->p_dev, GFP_KERNEL,
 				      sizeof(struct init_qm_port_params) *
-				      p_hwfn->p_dev->num_ports_in_engines);
+				      p_hwfn->p_dev->num_ports_in_engine);
 	if (!qm_info->qm_port_params)
 		goto alloc_err;
 
@@ -861,12 +1167,17 @@ enum _ecore_status_t ecore_resc_alloc(struct ecore_dev *p_dev)
 		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
 		u32 n_eqes, num_cons;
 
+		/* initialize the doorbell recovery mechanism */
+		rc = ecore_db_recovery_setup(p_hwfn);
+		if (rc)
+			goto alloc_err;
+
 		/* First allocate the context manager structure */
 		rc = ecore_cxt_mngr_alloc(p_hwfn);
 		if (rc)
 			goto alloc_err;
 
-		/* Set the HW cid/tid numbers (in the contest manager)
+		/* Set the HW cid/tid numbers (in the context manager)
 		 * Must be done prior to any further computations.
 		 */
 		rc = ecore_cxt_set_pf_params(p_hwfn);
@@ -1036,7 +1347,7 @@ void ecore_resc_setup(struct ecore_dev *p_dev)
 		ecore_int_setup(p_hwfn, p_hwfn->p_main_ptt);
 
 		ecore_l2_setup(p_hwfn);
-		ecore_iov_setup(p_hwfn, p_hwfn->p_main_ptt);
+		ecore_iov_setup(p_hwfn);
 	}
 }
 
@@ -1116,7 +1427,7 @@ static enum _ecore_status_t ecore_calc_hw_mode(struct ecore_hwfn *p_hwfn)
 	}
 
 	/* Ports per engine is based on the values in CNIG_REG_NW_PORT_MODE */
-	switch (p_hwfn->p_dev->num_ports_in_engines) {
+	switch (p_hwfn->p_dev->num_ports_in_engine) {
 	case 1:
 		hw_mode |= 1 << MODE_PORTS_PER_ENG_1;
 		break;
@@ -1129,23 +1440,15 @@ static enum _ecore_status_t ecore_calc_hw_mode(struct ecore_hwfn *p_hwfn)
 	default:
 		DP_NOTICE(p_hwfn, true,
 			  "num_ports_in_engine = %d not supported\n",
-			  p_hwfn->p_dev->num_ports_in_engines);
+			  p_hwfn->p_dev->num_ports_in_engine);
 		return ECORE_INVAL;
 	}
 
-	switch (p_hwfn->p_dev->mf_mode) {
-	case ECORE_MF_DEFAULT:
-	case ECORE_MF_NPAR:
-		hw_mode |= 1 << MODE_MF_SI;
-		break;
-	case ECORE_MF_OVLAN:
+	if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS,
+			  &p_hwfn->p_dev->mf_bits))
 		hw_mode |= 1 << MODE_MF_SD;
-		break;
-	default:
-		DP_NOTICE(p_hwfn, true,
-			  "Unsupported MF mode, init as DEFAULT\n");
+	else
 		hw_mode |= 1 << MODE_MF_SI;
-	}
 
 #ifndef ASIC_ONLY
 	if (CHIP_REV_IS_SLOW(p_hwfn->p_dev)) {
@@ -1161,7 +1464,7 @@ static enum _ecore_status_t ecore_calc_hw_mode(struct ecore_hwfn *p_hwfn)
 #endif
 		hw_mode |= 1 << MODE_ASIC;
 
-	if (p_hwfn->p_dev->num_hwfns > 1)
+	if (ECORE_IS_CMT(p_hwfn->p_dev))
 		hw_mode |= 1 << MODE_100G;
 
 	p_hwfn->hw_info.hw_mode = hw_mode;
@@ -1203,10 +1506,10 @@ static enum _ecore_status_t ecore_hw_init_chip(struct ecore_hwfn *p_hwfn,
 		if (ECORE_IS_AH(p_dev)) {
 			/* 2 for 4-port, 1 for 2-port, 0 for 1-port */
 			ecore_wr(p_hwfn, p_ptt, MISC_REG_PORT_MODE,
-				 (p_dev->num_ports_in_engines >> 1));
+				 (p_dev->num_ports_in_engine >> 1));
 
 			ecore_wr(p_hwfn, p_ptt, MISC_REG_BLOCK_256B_EN,
-				 p_dev->num_ports_in_engines == 4 ? 0 : 3);
+				 p_dev->num_ports_in_engine == 4 ? 0 : 3);
 		}
 	}
 
@@ -1232,7 +1535,7 @@ static enum _ecore_status_t ecore_hw_init_chip(struct ecore_hwfn *p_hwfn,
 static void ecore_init_cau_rt_data(struct ecore_dev *p_dev)
 {
 	u32 offset = CAU_REG_SB_VAR_MEMORY_RT_OFFSET;
-	int i, sb_id;
+	int i, igu_sb_id;
 
 	for_each_hwfn(p_dev, i) {
 		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
@@ -1242,20 +1545,77 @@ static void ecore_init_cau_rt_data(struct ecore_dev *p_dev)
 
 		p_igu_info = p_hwfn->hw_info.p_igu_info;
 
-		for (sb_id = 0; sb_id < ECORE_MAPPING_MEMORY_SIZE(p_dev);
-		     sb_id++) {
-			p_block = &p_igu_info->igu_map.igu_blocks[sb_id];
+		for (igu_sb_id = 0;
+		     igu_sb_id < ECORE_MAPPING_MEMORY_SIZE(p_dev);
+		     igu_sb_id++) {
+			p_block = &p_igu_info->entry[igu_sb_id];
 
 			if (!p_block->is_pf)
 				continue;
 
 			ecore_init_cau_sb_entry(p_hwfn, &sb_entry,
 						p_block->function_id, 0, 0);
-			STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2, sb_entry);
+			STORE_RT_REG_AGG(p_hwfn, offset + igu_sb_id * 2,
+					 sb_entry);
 		}
 	}
 }
 
+static void ecore_init_cache_line_size(struct ecore_hwfn *p_hwfn,
+				       struct ecore_ptt *p_ptt)
+{
+	u32 val, wr_mbs, cache_line_size;
+
+	val = ecore_rd(p_hwfn, p_ptt, PSWRQ2_REG_WR_MBS0);
+	switch (val) {
+	case 0:
+		wr_mbs = 128;
+		break;
+	case 1:
+		wr_mbs = 256;
+		break;
+	case 2:
+		wr_mbs = 512;
+		break;
+	default:
+		DP_INFO(p_hwfn,
+			"Unexpected value of PSWRQ2_REG_WR_MBS0 [0x%x]. Avoid configuring PGLUE_B_REG_CACHE_LINE_SIZE.\n",
+			val);
+		return;
+	}
+
+	cache_line_size = OSAL_MIN_T(u32, OSAL_CACHE_LINE_SIZE, wr_mbs);
+	switch (cache_line_size) {
+	case 32:
+		val = 0;
+		break;
+	case 64:
+		val = 1;
+		break;
+	case 128:
+		val = 2;
+		break;
+	case 256:
+		val = 3;
+		break;
+	default:
+		DP_INFO(p_hwfn,
+			"Unexpected value of cache line size [0x%x]. Avoid configuring PGLUE_B_REG_CACHE_LINE_SIZE.\n",
+			cache_line_size);
+	}
+
+	if (wr_mbs < OSAL_CACHE_LINE_SIZE)
+		DP_INFO(p_hwfn,
+			"The cache line size for padding is suboptimal for performance [OS cache line size 0x%x, wr mbs 0x%x]\n",
+			OSAL_CACHE_LINE_SIZE, wr_mbs);
+
+	STORE_RT_REG(p_hwfn, PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET, val);
+	if (val > 0) {
+		STORE_RT_REG(p_hwfn, PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET, val);
+		STORE_RT_REG(p_hwfn, PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET, val);
+	}
+}
+
 static enum _ecore_status_t ecore_hw_init_common(struct ecore_hwfn *p_hwfn,
 						 struct ecore_ptt *p_ptt,
 						 int hw_mode)
@@ -1270,11 +1630,11 @@ static enum _ecore_status_t ecore_hw_init_common(struct ecore_hwfn *p_hwfn,
 	ecore_init_cau_rt_data(p_dev);
 
 	/* Program GTT windows */
-	ecore_gtt_init(p_hwfn);
+	ecore_gtt_init(p_hwfn, p_ptt);
 
 #ifndef ASIC_ONLY
 	if (CHIP_REV_IS_EMUL(p_dev)) {
-		rc = ecore_hw_init_chip(p_hwfn, p_hwfn->p_main_ptt);
+		rc = ecore_hw_init_chip(p_hwfn, p_ptt);
 		if (rc != ECORE_SUCCESS)
 			return rc;
 	}
@@ -1288,7 +1648,7 @@ static enum _ecore_status_t ecore_hw_init_common(struct ecore_hwfn *p_hwfn,
 	}
 
 	ecore_qm_common_rt_init(p_hwfn,
-				p_dev->num_ports_in_engines,
+				p_dev->num_ports_in_engine,
 				qm_info->max_phys_tcs_per_port,
 				qm_info->pf_rl_en, qm_info->pf_wfq_en,
 				qm_info->vport_rl_en, qm_info->vport_wfq_en,
@@ -1296,6 +1656,8 @@ static enum _ecore_status_t ecore_hw_init_common(struct ecore_hwfn *p_hwfn,
 
 	ecore_cxt_hw_init_common(p_hwfn);
 
+	ecore_init_cache_line_size(p_hwfn, p_ptt);
+
 	rc = ecore_init_run(p_hwfn, p_ptt, PHASE_ENGINE, ANY_PHASE_ID, hw_mode);
 	if (rc != ECORE_SUCCESS)
 		return rc;
@@ -1511,9 +1873,9 @@ static enum _ecore_status_t
 ecore_hw_init_dpi_size(struct ecore_hwfn *p_hwfn,
 		       struct ecore_ptt *p_ptt, u32 pwm_region_size, u32 n_cpus)
 {
-	u32 dpi_page_size_1, dpi_page_size_2, dpi_page_size;
-	u32 dpi_bit_shift, dpi_count;
+	u32 dpi_bit_shift, dpi_count, dpi_page_size;
 	u32 min_dpis;
+	u32 n_wids;
 
 	/* Calculate DPI size
 	 * ------------------
@@ -1536,12 +1898,11 @@ ecore_hw_init_dpi_size(struct ecore_hwfn *p_hwfn,
 	 * 0 is 4kB, 1 is 8kB and etc. Hence the minimum size is 4,096
 	 * containing 4 WIDs.
 	 */
-	dpi_page_size_1 = ECORE_WID_SIZE * n_cpus;
-	dpi_page_size_2 = OSAL_MAX_T(u32, ECORE_WID_SIZE, OSAL_PAGE_SIZE);
-	dpi_page_size = OSAL_MAX_T(u32, dpi_page_size_1, dpi_page_size_2);
-	dpi_page_size = OSAL_ROUNDUP_POW_OF_TWO(dpi_page_size);
+	n_wids = OSAL_MAX_T(u32, ECORE_MIN_WIDS, n_cpus);
+	dpi_page_size = ECORE_WID_SIZE * OSAL_ROUNDUP_POW_OF_TWO(n_wids);
+	dpi_page_size = (dpi_page_size + OSAL_PAGE_SIZE - 1) &
+			~(OSAL_PAGE_SIZE - 1);
 	dpi_bit_shift = OSAL_LOG2(dpi_page_size / 4096);
-
 	dpi_count = pwm_region_size / dpi_page_size;
 
 	min_dpis = p_hwfn->pf_params.rdma_pf_params.min_dpis;
@@ -1578,8 +1939,8 @@ ecore_hw_init_pf_doorbell_bar(struct ecore_hwfn *p_hwfn,
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 	u8 cond;
 
-	db_bar_size = ecore_hw_bar_size(p_hwfn, BAR_ID_1);
-	if (p_hwfn->p_dev->num_hwfns > 1)
+	db_bar_size = ecore_hw_bar_size(p_hwfn, p_ptt, BAR_ID_1);
+	if (ECORE_IS_CMT(p_hwfn->p_dev))
 		db_bar_size /= 2;
 
 	/* Calculate doorbell regions
@@ -1600,7 +1961,8 @@ ecore_hw_init_pf_doorbell_bar(struct ecore_hwfn *p_hwfn,
 	    ecore_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_CORE,
 					  OSAL_NULL) +
 	    ecore_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ETH, OSAL_NULL);
-	norm_regsize = ROUNDUP(ECORE_PF_DEMS_SIZE * non_pwm_conn, 4096);
+	norm_regsize = ROUNDUP(ECORE_PF_DEMS_SIZE * non_pwm_conn,
+			       OSAL_PAGE_SIZE);
 	min_addr_reg1 = norm_regsize / 4096;
 	pwm_regsize = db_bar_size - norm_regsize;
 
@@ -1626,7 +1988,7 @@ ecore_hw_init_pf_doorbell_bar(struct ecore_hwfn *p_hwfn,
 		/* Either EDPM is mandatory, or we are attempting to allocate a
 		 * WID per CPU.
 		 */
-		n_cpus = OSAL_NUM_ACTIVE_CPU();
+		n_cpus = OSAL_NUM_CPUS();
 		rc = ecore_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus);
 	}
 
@@ -1678,12 +2040,29 @@ static enum _ecore_status_t ecore_hw_init_port(struct ecore_hwfn *p_hwfn,
 					       struct ecore_ptt *p_ptt,
 					       int hw_mode)
 {
+	u32 ppf_to_eng_sel[NIG_REG_PPF_TO_ENGINE_SEL_RT_SIZE];
+	u32 val;
 	enum _ecore_status_t rc	= ECORE_SUCCESS;
+	u8 i;
+
+	/* In CMT for non-RoCE packets - use connection based classification */
+	val = ECORE_IS_CMT(p_hwfn->p_dev) ? 0x8 : 0x0;
+	for (i = 0; i < NIG_REG_PPF_TO_ENGINE_SEL_RT_SIZE; i++)
+		ppf_to_eng_sel[i] = val;
+	STORE_RT_REG_AGG(p_hwfn, NIG_REG_PPF_TO_ENGINE_SEL_RT_OFFSET,
+			 ppf_to_eng_sel);
+
+	/* In CMT the gate should be cleared by the 2nd hwfn */
+	if (!ECORE_IS_CMT(p_hwfn->p_dev) || !IS_LEAD_HWFN(p_hwfn))
+		STORE_RT_REG(p_hwfn, NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET, 0);
 
 	rc = ecore_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id,
 			    hw_mode);
 	if (rc != ECORE_SUCCESS)
 		return rc;
+
+	ecore_wr(p_hwfn, p_ptt, PGLUE_B_REG_MASTER_WRITE_PAD_ENABLE, 0);
+
 #ifndef ASIC_ONLY
 	if (CHIP_REV_IS_ASIC(p_hwfn->p_dev))
 		return ECORE_SUCCESS;
@@ -1694,7 +2073,7 @@ static enum _ecore_status_t ecore_hw_init_port(struct ecore_hwfn *p_hwfn,
 		else if (ECORE_IS_BB(p_hwfn->p_dev))
 			ecore_link_init_bb(p_hwfn, p_ptt, p_hwfn->port_id);
 	} else if (CHIP_REV_IS_EMUL(p_hwfn->p_dev)) {
-		if (p_hwfn->p_dev->num_hwfns > 1) {
+		if (ECORE_IS_CMT(p_hwfn->p_dev)) {
 			/* Activate OPTE in CMT */
 			u32 val;
 
@@ -1746,7 +2125,7 @@ ecore_hw_init_pf(struct ecore_hwfn *p_hwfn,
 		/* Update rate limit once we'll actually have a link */
 		p_hwfn->qm_info.pf_rl = 100000;
 	}
-	ecore_cxt_hw_init_pf(p_hwfn);
+	ecore_cxt_hw_init_pf(p_hwfn, p_ptt);
 
 	ecore_int_igu_init_rt(p_hwfn);
 
@@ -1756,6 +2135,11 @@ ecore_hw_init_pf(struct ecore_hwfn *p_hwfn,
 		STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET, 1);
 		STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET,
 			     p_hwfn->hw_info.ovlan);
+
+		DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
+			   "Configuring LLH_FUNC_FILTER_HDR_SEL\n");
+		STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET,
+			     1);
 	}
 
 	/* Enable classification by MAC if needed */
@@ -1815,7 +2199,7 @@ ecore_hw_init_pf(struct ecore_hwfn *p_hwfn,
 			return rc;
 
 		/* send function start command */
-		rc = ecore_sp_pf_start(p_hwfn, p_tunn, p_hwfn->p_dev->mf_mode,
+		rc = ecore_sp_pf_start(p_hwfn, p_ptt, p_tunn,
 				       allow_npar_tx_switch);
 		if (rc) {
 			DP_NOTICE(p_hwfn, true,
@@ -1898,6 +2282,37 @@ static void ecore_reset_mb_shadow(struct ecore_hwfn *p_hwfn,
 		    p_hwfn->mcp_info->mfw_mb_length);
 }
 
+static void ecore_pglueb_clear_err(struct ecore_hwfn *p_hwfn,
+				     struct ecore_ptt *p_ptt)
+{
+	ecore_wr(p_hwfn, p_ptt, PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
+		 1 << p_hwfn->abs_pf_id);
+}
+
+static void
+ecore_fill_load_req_params(struct ecore_load_req_params *p_load_req,
+			   struct ecore_drv_load_params *p_drv_load)
+{
+	/* Make sure that if ecore-client didn't provide inputs, all the
+	 * expected defaults are indeed zero.
+	 */
+	OSAL_BUILD_BUG_ON(ECORE_DRV_ROLE_OS != 0);
+	OSAL_BUILD_BUG_ON(ECORE_LOAD_REQ_LOCK_TO_DEFAULT != 0);
+	OSAL_BUILD_BUG_ON(ECORE_OVERRIDE_FORCE_LOAD_NONE != 0);
+
+	OSAL_MEM_ZERO(p_load_req, sizeof(*p_load_req));
+
+	if (p_drv_load != OSAL_NULL) {
+		p_load_req->drv_role = p_drv_load->is_crash_kernel ?
+				       ECORE_DRV_ROLE_KDUMP :
+				       ECORE_DRV_ROLE_OS;
+		p_load_req->timeout_val = p_drv_load->mfw_timeout_val;
+		p_load_req->avoid_eng_reset = p_drv_load->avoid_eng_reset;
+		p_load_req->override_force_load =
+			p_drv_load->override_force_load;
+	}
+}
+
 enum _ecore_status_t ecore_vf_start(struct ecore_hwfn *p_hwfn,
 				    struct ecore_hw_init_params *p_params)
 {
@@ -1911,13 +2326,6 @@ enum _ecore_status_t ecore_vf_start(struct ecore_hwfn *p_hwfn,
 	return ECORE_SUCCESS;
 }
 
-static void ecore_pglueb_clear_err(struct ecore_hwfn *p_hwfn,
-				     struct ecore_ptt *p_ptt)
-{
-	ecore_wr(p_hwfn, p_ptt, PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
-		 1 << p_hwfn->abs_pf_id);
-}
-
 enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 				   struct ecore_hw_init_params *p_params)
 {
@@ -1928,8 +2336,7 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 	int i;
 
-	if ((p_params->int_mode == ECORE_INT_MODE_MSI) &&
-	    (p_dev->num_hwfns > 1)) {
+	if ((p_params->int_mode == ECORE_INT_MODE_MSI) && ECORE_IS_CMT(p_dev)) {
 		DP_NOTICE(p_dev, false,
 			  "MSI mode is not supported for CMT devices\n");
 		return ECORE_INVAL;
@@ -1959,12 +2366,8 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 		if (rc != ECORE_SUCCESS)
 			return rc;
 
-		OSAL_MEM_ZERO(&load_req_params, sizeof(load_req_params));
-		load_req_params.drv_role = p_params->is_crash_kernel ?
-					   ECORE_DRV_ROLE_KDUMP :
-					   ECORE_DRV_ROLE_OS;
-		load_req_params.timeout_val = p_params->mfw_timeout_val;
-		load_req_params.avoid_eng_reset = p_params->avoid_eng_reset;
+		ecore_fill_load_req_params(&load_req_params,
+					   p_params->p_drv_load_params);
 		rc = ecore_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt,
 					&load_req_params);
 		if (rc != ECORE_SUCCESS) {
@@ -1978,6 +2381,8 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 			   "Load request was sent. Load code: 0x%x\n",
 			   load_code);
 
+		ecore_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt);
+
 		/* CQ75580:
 		 * When coming back from hiberbate state, the registers from
 		 * which shadow is read initially are not initialized. It turns
@@ -2072,7 +2477,7 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 			   "sending phony dcbx set command to trigger DCBx attention handling\n");
 		rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
 				   DRV_MSG_CODE_SET_DCBX,
-				   1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT, &resp,
+				   1 << DRV_MB_PARAM_DCBX_NOTIFY_OFFSET, &resp,
 				   &param);
 		if (rc != ECORE_SUCCESS) {
 			DP_NOTICE(p_hwfn, true,
@@ -2255,6 +2660,13 @@ enum _ecore_status_t ecore_hw_stop(struct ecore_dev *p_dev)
 		ecore_wr(p_hwfn, p_ptt, IGU_REG_LEADING_EDGE_LATCH, 0);
 		ecore_wr(p_hwfn, p_ptt, IGU_REG_TRAILING_EDGE_LATCH, 0);
 		ecore_int_igu_init_pure_rt(p_hwfn, p_ptt, false, true);
+		rc = ecore_int_igu_reset_cam_default(p_hwfn, p_ptt);
+		if (rc != ECORE_SUCCESS) {
+			DP_NOTICE(p_hwfn, true,
+				  "Failed to return IGU CAM to default\n");
+			rc2 = ECORE_UNKNOWN_ERROR;
+		}
+
 		/* Need to wait 1ms to guarantee SBs are cleared */
 		OSAL_MSLEEP(1);
 
@@ -2303,18 +2715,21 @@ enum _ecore_status_t ecore_hw_stop(struct ecore_dev *p_dev)
 	return rc2;
 }
 
-void ecore_hw_stop_fastpath(struct ecore_dev *p_dev)
+enum _ecore_status_t ecore_hw_stop_fastpath(struct ecore_dev *p_dev)
 {
 	int j;
 
 	for_each_hwfn(p_dev, j) {
 		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[j];
-		struct ecore_ptt *p_ptt = p_hwfn->p_main_ptt;
+		struct ecore_ptt *p_ptt;
 
 		if (IS_VF(p_dev)) {
 			ecore_vf_pf_int_cleanup(p_hwfn);
 			continue;
 		}
+		p_ptt = ecore_ptt_acquire(p_hwfn);
+		if (!p_ptt)
+			return ECORE_AGAIN;
 
 		DP_VERBOSE(p_hwfn, ECORE_MSG_IFDOWN,
 			   "Shutting down the fastpath\n");
@@ -2336,15 +2751,22 @@ void ecore_hw_stop_fastpath(struct ecore_dev *p_dev)
 		ecore_int_igu_init_pure_rt(p_hwfn, p_ptt, false, false);
 		/* Need to wait 1ms to guarantee SBs are cleared */
 		OSAL_MSLEEP(1);
+		ecore_ptt_release(p_hwfn, p_ptt);
 	}
+
+	return ECORE_SUCCESS;
 }
 
-void ecore_hw_start_fastpath(struct ecore_hwfn *p_hwfn)
+enum _ecore_status_t ecore_hw_start_fastpath(struct ecore_hwfn *p_hwfn)
 {
-	struct ecore_ptt *p_ptt = p_hwfn->p_main_ptt;
+	struct ecore_ptt *p_ptt;
 
 	if (IS_VF(p_hwfn->p_dev))
-		return;
+		return ECORE_SUCCESS;
+
+	p_ptt = ecore_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return ECORE_AGAIN;
 
 	/* If roce info is allocated it means roce is initialized and should
 	 * be enabled in searcher.
@@ -2357,8 +2779,11 @@ void ecore_hw_start_fastpath(struct ecore_hwfn *p_hwfn)
 	}
 
 	/* Re-open incoming traffic */
-	ecore_wr(p_hwfn, p_hwfn->p_main_ptt,
+	ecore_wr(p_hwfn, p_ptt,
 		 NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0);
+	ecore_ptt_release(p_hwfn, p_ptt);
+
+	return ECORE_SUCCESS;
 }
 
 /* Free hwfn memory and resources acquired in hw_hwfn_prepare */
@@ -2423,27 +2848,35 @@ static void get_function_id(struct ecore_hwfn *p_hwfn)
 static void ecore_hw_set_feat(struct ecore_hwfn *p_hwfn)
 {
 	u32 *feat_num = p_hwfn->hw_info.feat_num;
-	struct ecore_sb_cnt_info sb_cnt_info;
-	int num_features = 1;
+	struct ecore_sb_cnt_info sb_cnt;
+	u32 non_l2_sbs = 0;
+
+	OSAL_MEM_ZERO(&sb_cnt, sizeof(sb_cnt));
+	ecore_int_get_num_sbs(p_hwfn, &sb_cnt);
 
 	/* L2 Queues require each: 1 status block. 1 L2 queue */
-	feat_num[ECORE_PF_L2_QUE] =
-	    OSAL_MIN_T(u32,
-		       RESC_NUM(p_hwfn, ECORE_SB) / num_features,
-		       RESC_NUM(p_hwfn, ECORE_L2_QUEUE));
-
-	OSAL_MEM_ZERO(&sb_cnt_info, sizeof(sb_cnt_info));
-	ecore_int_get_num_sbs(p_hwfn, &sb_cnt_info);
-	feat_num[ECORE_VF_L2_QUE] =
-		OSAL_MIN_T(u32,
-			   RESC_NUM(p_hwfn, ECORE_L2_QUEUE) -
-			   FEAT_NUM(p_hwfn, ECORE_PF_L2_QUE),
-			   sb_cnt_info.sb_iov_cnt);
-
-	feat_num[ECORE_FCOE_CQ] = OSAL_MIN_T(u32, RESC_NUM(p_hwfn, ECORE_SB),
-					     RESC_NUM(p_hwfn, ECORE_CMDQS_CQS));
-	feat_num[ECORE_ISCSI_CQ] = OSAL_MIN_T(u32, RESC_NUM(p_hwfn, ECORE_SB),
-					     RESC_NUM(p_hwfn, ECORE_CMDQS_CQS));
+	if (ECORE_IS_L2_PERSONALITY(p_hwfn)) {
+		/* Start by allocating VF queues, then PF's */
+		feat_num[ECORE_VF_L2_QUE] =
+			OSAL_MIN_T(u32,
+				   RESC_NUM(p_hwfn, ECORE_L2_QUEUE),
+				   sb_cnt.iov_cnt);
+		feat_num[ECORE_PF_L2_QUE] =
+			OSAL_MIN_T(u32,
+				   sb_cnt.cnt - non_l2_sbs,
+				   RESC_NUM(p_hwfn, ECORE_L2_QUEUE) -
+				   FEAT_NUM(p_hwfn, ECORE_VF_L2_QUE));
+	}
+
+	if (ECORE_IS_FCOE_PERSONALITY(p_hwfn))
+		feat_num[ECORE_FCOE_CQ] =
+			OSAL_MIN_T(u32, sb_cnt.cnt, RESC_NUM(p_hwfn,
+							     ECORE_CMDQS_CQS));
+
+	if (ECORE_IS_ISCSI_PERSONALITY(p_hwfn))
+		feat_num[ECORE_ISCSI_CQ] =
+			OSAL_MIN_T(u32, sb_cnt.cnt, RESC_NUM(p_hwfn,
+							     ECORE_CMDQS_CQS));
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_PROBE,
 		   "#PF_L2_QUEUE=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d #FCOE_CQ=%d #ISCSI_CQ=%d #SB=%d\n",
@@ -2452,14 +2885,12 @@ static void ecore_hw_set_feat(struct ecore_hwfn *p_hwfn)
 		   (int)FEAT_NUM(p_hwfn, ECORE_RDMA_CNQ),
 		   (int)FEAT_NUM(p_hwfn, ECORE_FCOE_CQ),
 		   (int)FEAT_NUM(p_hwfn, ECORE_ISCSI_CQ),
-		   RESC_NUM(p_hwfn, ECORE_SB));
+		   (int)sb_cnt.cnt);
 }
 
 const char *ecore_hw_get_resc_name(enum ecore_resources res_id)
 {
 	switch (res_id) {
-	case ECORE_SB:
-		return "SB";
 	case ECORE_L2_QUEUE:
 		return "L2_QUEUE";
 	case ECORE_VPORT:
@@ -2486,6 +2917,8 @@ const char *ecore_hw_get_resc_name(enum ecore_resources res_id)
 		return "RDMA_STATS_QUEUE";
 	case ECORE_BDQ:
 		return "BDQ";
+	case ECORE_SB:
+		return "SB";
 	default:
 		return "UNKNOWN_RESOURCE";
 	}
@@ -2493,12 +2926,14 @@ const char *ecore_hw_get_resc_name(enum ecore_resources res_id)
 
 static enum _ecore_status_t
 __ecore_hw_set_soft_resc_size(struct ecore_hwfn *p_hwfn,
-			      enum ecore_resources res_id, u32 resc_max_val,
+			      struct ecore_ptt *p_ptt,
+			      enum ecore_resources res_id,
+			      u32 resc_max_val,
 			      u32 *p_mcp_resp)
 {
 	enum _ecore_status_t rc;
 
-	rc = ecore_mcp_set_resc_max_val(p_hwfn, p_hwfn->p_main_ptt, res_id,
+	rc = ecore_mcp_set_resc_max_val(p_hwfn, p_ptt, res_id,
 					resc_max_val, p_mcp_resp);
 	if (rc != ECORE_SUCCESS) {
 		DP_NOTICE(p_hwfn, true,
@@ -2516,7 +2951,8 @@ __ecore_hw_set_soft_resc_size(struct ecore_hwfn *p_hwfn,
 }
 
 static enum _ecore_status_t
-ecore_hw_set_soft_resc_size(struct ecore_hwfn *p_hwfn)
+ecore_hw_set_soft_resc_size(struct ecore_hwfn *p_hwfn,
+			    struct ecore_ptt *p_ptt)
 {
 	bool b_ah = ECORE_IS_AH(p_hwfn->p_dev);
 	u32 resc_max_val, mcp_resp;
@@ -2536,7 +2972,7 @@ ecore_hw_set_soft_resc_size(struct ecore_hwfn *p_hwfn)
 			continue;
 		}
 
-		rc = __ecore_hw_set_soft_resc_size(p_hwfn, res_id,
+		rc = __ecore_hw_set_soft_resc_size(p_hwfn, p_ptt, res_id,
 						   resc_max_val, &mcp_resp);
 		if (rc != ECORE_SUCCESS)
 			return rc;
@@ -2561,14 +2997,8 @@ enum _ecore_status_t ecore_hw_get_dflt_resc(struct ecore_hwfn *p_hwfn,
 {
 	u8 num_funcs = p_hwfn->num_funcs_on_engine;
 	bool b_ah = ECORE_IS_AH(p_hwfn->p_dev);
-	struct ecore_sb_cnt_info sb_cnt_info;
 
 	switch (res_id) {
-	case ECORE_SB:
-		OSAL_MEM_ZERO(&sb_cnt_info, sizeof(sb_cnt_info));
-		ecore_int_get_num_sbs(p_hwfn, &sb_cnt_info);
-		*p_resc_num = sb_cnt_info.sb_cnt;
-		break;
 	case ECORE_L2_QUEUE:
 		*p_resc_num = (b_ah ? MAX_NUM_L2_QUEUES_K2 :
 				 MAX_NUM_L2_QUEUES_BB) / num_funcs;
@@ -2625,6 +3055,12 @@ enum _ecore_status_t ecore_hw_get_dflt_resc(struct ecore_hwfn *p_hwfn,
 		if (!*p_resc_num)
 			*p_resc_start = 0;
 		break;
+	case ECORE_SB:
+		/* Since we want its value to reflect whether MFW supports
+		 * the new scheme, have a default of 0.
+		 */
+		*p_resc_num = 0;
+		break;
 	default:
 		*p_resc_start = *p_resc_num * p_hwfn->enabled_func_idx;
 		break;
@@ -2689,14 +3125,9 @@ __ecore_hw_set_resc_info(struct ecore_hwfn *p_hwfn, enum ecore_resources res_id,
 		goto out;
 	}
 
-	/* TBD - remove this when revising the handling of the SB resource */
-	if (res_id == ECORE_SB) {
-		/* Excluding the slowpath SB */
-		*p_resc_num -= 1;
-		*p_resc_start -= p_hwfn->enabled_func_idx;
-	}
-
-	if (*p_resc_num != dflt_resc_num || *p_resc_start != dflt_resc_start) {
+	if ((*p_resc_num != dflt_resc_num ||
+	     *p_resc_start != dflt_resc_start) &&
+	    res_id != ECORE_SB) {
 		DP_INFO(p_hwfn,
 			"MFW allocation for resource %d [%s] differs from default values [%d,%d vs. %d,%d]%s\n",
 			res_id, ecore_hw_get_resc_name(res_id), *p_resc_num,
@@ -2726,10 +3157,8 @@ static enum _ecore_status_t ecore_hw_set_resc_info(struct ecore_hwfn *p_hwfn,
 	return ECORE_SUCCESS;
 }
 
-#define ECORE_RESC_ALLOC_LOCK_RETRY_CNT		10
-#define ECORE_RESC_ALLOC_LOCK_RETRY_INTVL_US	10000 /* 10 msec */
-
 static enum _ecore_status_t ecore_hw_get_resc(struct ecore_hwfn *p_hwfn,
+					      struct ecore_ptt *p_ptt,
 					      bool drv_resc_alloc)
 {
 	struct ecore_resc_unlock_params resc_unlock_params;
@@ -2759,15 +3188,10 @@ static enum _ecore_status_t ecore_hw_get_resc(struct ecore_hwfn *p_hwfn,
 	 * Old drivers that don't acquire the lock can run in parallel, and
 	 * their allocation values won't be affected by the updated max values.
 	 */
-	OSAL_MEM_ZERO(&resc_lock_params, sizeof(resc_lock_params));
-	resc_lock_params.resource = ECORE_RESC_LOCK_RESC_ALLOC;
-	resc_lock_params.retry_num = ECORE_RESC_ALLOC_LOCK_RETRY_CNT;
-	resc_lock_params.retry_interval = ECORE_RESC_ALLOC_LOCK_RETRY_INTVL_US;
-	resc_lock_params.sleep_b4_retry = true;
-	OSAL_MEM_ZERO(&resc_unlock_params, sizeof(resc_unlock_params));
-	resc_unlock_params.resource = ECORE_RESC_LOCK_RESC_ALLOC;
-
-	rc = ecore_mcp_resc_lock(p_hwfn, p_hwfn->p_main_ptt, &resc_lock_params);
+	ecore_mcp_resc_lock_default_init(&resc_lock_params, &resc_unlock_params,
+					 ECORE_RESC_LOCK_RESC_ALLOC, false);
+
+	rc = ecore_mcp_resc_lock(p_hwfn, p_ptt, &resc_lock_params);
 	if (rc != ECORE_SUCCESS && rc != ECORE_NOTIMPL) {
 		return rc;
 	} else if (rc == ECORE_NOTIMPL) {
@@ -2779,7 +3203,7 @@ static enum _ecore_status_t ecore_hw_get_resc(struct ecore_hwfn *p_hwfn,
 		rc = ECORE_BUSY;
 		goto unlock_and_exit;
 	} else {
-		rc = ecore_hw_set_soft_resc_size(p_hwfn);
+		rc = ecore_hw_set_soft_resc_size(p_hwfn, p_ptt);
 		if (rc != ECORE_SUCCESS && rc != ECORE_NOTIMPL) {
 			DP_NOTICE(p_hwfn, false,
 				  "Failed to set the max values of the soft resources\n");
@@ -2787,7 +3211,7 @@ static enum _ecore_status_t ecore_hw_get_resc(struct ecore_hwfn *p_hwfn,
 		} else if (rc == ECORE_NOTIMPL) {
 			DP_INFO(p_hwfn,
 				"Skip the max values setting of the soft resources since it is not supported by the MFW\n");
-			rc = ecore_mcp_resc_unlock(p_hwfn, p_hwfn->p_main_ptt,
+			rc = ecore_mcp_resc_unlock(p_hwfn, p_ptt,
 						   &resc_unlock_params);
 			if (rc != ECORE_SUCCESS)
 				DP_INFO(p_hwfn,
@@ -2800,7 +3224,7 @@ static enum _ecore_status_t ecore_hw_get_resc(struct ecore_hwfn *p_hwfn,
 		goto unlock_and_exit;
 
 	if (resc_lock_params.b_granted && !resc_unlock_params.b_released) {
-		rc = ecore_mcp_resc_unlock(p_hwfn, p_hwfn->p_main_ptt,
+		rc = ecore_mcp_resc_unlock(p_hwfn, p_ptt,
 					   &resc_unlock_params);
 		if (rc != ECORE_SUCCESS)
 			DP_INFO(p_hwfn,
@@ -2846,6 +3270,10 @@ static enum _ecore_status_t ecore_hw_get_resc(struct ecore_hwfn *p_hwfn,
 		return ECORE_INVAL;
 	}
 
+	/* This will also learn the number of SBs from MFW */
+	if (ecore_int_igu_reset_cam(p_hwfn, p_ptt))
+		return ECORE_INVAL;
+
 	ecore_hw_set_feat(p_hwfn);
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_PROBE,
@@ -2859,7 +3287,9 @@ static enum _ecore_status_t ecore_hw_get_resc(struct ecore_hwfn *p_hwfn,
 	return ECORE_SUCCESS;
 
 unlock_and_exit:
-	ecore_mcp_resc_unlock(p_hwfn, p_hwfn->p_main_ptt, &resc_unlock_params);
+	if (resc_lock_params.b_granted && !resc_unlock_params.b_released)
+		ecore_mcp_resc_unlock(p_hwfn, p_ptt,
+				      &resc_unlock_params);
 	return rc;
 }
 
@@ -2870,6 +3300,7 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
 {
 	u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg, dcbx_mode;
 	u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities;
+	struct ecore_mcp_link_capabilities *p_caps;
 	struct ecore_mcp_link_params *link;
 	enum _ecore_status_t rc;
 
@@ -2889,8 +3320,8 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
 	nvm_cfg1_offset = ecore_rd(p_hwfn, p_ptt, nvm_cfg_addr + 4);
 
 	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
-	    OFFSETOF(struct nvm_cfg1, glob) + OFFSETOF(struct nvm_cfg1_glob,
-						       core_cfg);
+		   OFFSETOF(struct nvm_cfg1, glob) +
+		   OFFSETOF(struct nvm_cfg1_glob, core_cfg);
 
 	core_cfg = ecore_rd(p_hwfn, p_ptt, addr);
 
@@ -2959,6 +3390,7 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
 
 	/* Read default link configuration */
 	link = &p_hwfn->mcp_info->link_input;
+	p_caps = &p_hwfn->mcp_info->link_capabilities;
 	port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
 	    OFFSETOF(struct nvm_cfg1, port[MFW_PORT(p_hwfn)]);
 	link_temp = ecore_rd(p_hwfn, p_ptt,
@@ -2966,13 +3398,11 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
 			     OFFSETOF(struct nvm_cfg1_port, speed_cap_mask));
 	link_temp &= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK;
 	link->speed.advertised_speeds = link_temp;
-
-	link_temp = link->speed.advertised_speeds;
-	p_hwfn->mcp_info->link_capabilities.speed_capabilities = link_temp;
+	p_caps->speed_capabilities = link->speed.advertised_speeds;
 
 	link_temp = ecore_rd(p_hwfn, p_ptt,
-			     port_cfg_addr +
-			     OFFSETOF(struct nvm_cfg1_port, link_settings));
+				 port_cfg_addr +
+				 OFFSETOF(struct nvm_cfg1_port, link_settings));
 	switch ((link_temp & NVM_CFG1_PORT_DRV_LINK_SPEED_MASK) >>
 		NVM_CFG1_PORT_DRV_LINK_SPEED_OFFSET) {
 	case NVM_CFG1_PORT_DRV_LINK_SPEED_AUTONEG:
@@ -3000,10 +3430,8 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
 		DP_NOTICE(p_hwfn, true, "Unknown Speed in 0x%08x\n", link_temp);
 	}
 
-	p_hwfn->mcp_info->link_capabilities.default_speed =
-	    link->speed.forced_speed;
-	p_hwfn->mcp_info->link_capabilities.default_speed_autoneg =
-	    link->speed.autoneg;
+	p_caps->default_speed = link->speed.forced_speed;
+	p_caps->default_speed_autoneg = link->speed.autoneg;
 
 	link_temp &= NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK;
 	link_temp >>= NVM_CFG1_PORT_DRV_FLOW_CONTROL_OFFSET;
@@ -3015,15 +3443,47 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
 				    NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX);
 	link->loopback_mode = 0;
 
+	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) {
+		link_temp = ecore_rd(p_hwfn, p_ptt, port_cfg_addr +
+				     OFFSETOF(struct nvm_cfg1_port, ext_phy));
+		link_temp &= NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_MASK;
+		link_temp >>= NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_OFFSET;
+		p_caps->default_eee = ECORE_MCP_EEE_ENABLED;
+		link->eee.enable = true;
+		switch (link_temp) {
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_DISABLED:
+			p_caps->default_eee = ECORE_MCP_EEE_DISABLED;
+			link->eee.enable = false;
+			break;
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_BALANCED:
+			p_caps->eee_lpi_timer = EEE_TX_TIMER_USEC_BALANCED_TIME;
+			break;
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_AGGRESSIVE:
+			p_caps->eee_lpi_timer =
+				EEE_TX_TIMER_USEC_AGGRESSIVE_TIME;
+			break;
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_LOW_LATENCY:
+			p_caps->eee_lpi_timer = EEE_TX_TIMER_USEC_LATENCY_TIME;
+			break;
+		}
+
+		link->eee.tx_lpi_timer = p_caps->eee_lpi_timer;
+		link->eee.tx_lpi_enable = link->eee.enable;
+		link->eee.adv_caps = ECORE_EEE_1G_ADV | ECORE_EEE_10G_ADV;
+	} else {
+		p_caps->default_eee = ECORE_MCP_EEE_UNSUPPORTED;
+	}
+
 	DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
-		   "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x\n",
+		   "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x\n EEE: %02x [%08x usec]",
 		   link->speed.forced_speed, link->speed.advertised_speeds,
-		   link->speed.autoneg, link->pause.autoneg);
+		   link->speed.autoneg, link->pause.autoneg,
+		   p_caps->default_eee, p_caps->eee_lpi_timer);
 
 	/* Read Multi-function information from shmem */
 	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
-	    OFFSETOF(struct nvm_cfg1, glob) +
-	    OFFSETOF(struct nvm_cfg1_glob, generic_cont0);
+		   OFFSETOF(struct nvm_cfg1, glob) +
+		   OFFSETOF(struct nvm_cfg1_glob, generic_cont0);
 
 	generic_cont0 = ecore_rd(p_hwfn, p_ptt, addr);
 
@@ -3032,6 +3492,41 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
 
 	switch (mf_mode) {
 	case NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED:
+		p_hwfn->p_dev->mf_bits = 1 << ECORE_MF_OVLAN_CLSS;
+		break;
+	case NVM_CFG1_GLOB_MF_MODE_UFP:
+		p_hwfn->p_dev->mf_bits = 1 << ECORE_MF_OVLAN_CLSS |
+					 1 << ECORE_MF_UFP_SPECIFIC;
+		break;
+
+	case NVM_CFG1_GLOB_MF_MODE_NPAR1_0:
+		p_hwfn->p_dev->mf_bits = 1 << ECORE_MF_LLH_MAC_CLSS |
+					 1 << ECORE_MF_LLH_PROTO_CLSS |
+					 1 << ECORE_MF_LL2_NON_UNICAST |
+					 1 << ECORE_MF_INTER_PF_SWITCH |
+					 1 << ECORE_MF_DISABLE_ARFS;
+		break;
+	case NVM_CFG1_GLOB_MF_MODE_DEFAULT:
+		p_hwfn->p_dev->mf_bits = 1 << ECORE_MF_LLH_MAC_CLSS |
+					 1 << ECORE_MF_LLH_PROTO_CLSS |
+					 1 << ECORE_MF_LL2_NON_UNICAST;
+		if (ECORE_IS_BB(p_hwfn->p_dev))
+			p_hwfn->p_dev->mf_bits |= 1 << ECORE_MF_NEED_DEF_PF;
+		break;
+	}
+	DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n",
+		p_hwfn->p_dev->mf_bits);
+
+	if (ECORE_IS_CMT(p_hwfn->p_dev))
+		p_hwfn->p_dev->mf_bits |= (1 << ECORE_MF_DISABLE_ARFS);
+
+	/* It's funny since we have another switch, but it's easier
+	 * to throw this away in linux this way. Long term, it might be
+	 * better to have have getters for needed ECORE_MF_* fields,
+	 * convert client code and eliminate this.
+	 */
+	switch (mf_mode) {
+	case NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED:
 		p_hwfn->p_dev->mf_mode = ECORE_MF_OVLAN;
 		break;
 	case NVM_CFG1_GLOB_MF_MODE_NPAR1_0:
@@ -3040,31 +3535,32 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
 	case NVM_CFG1_GLOB_MF_MODE_DEFAULT:
 		p_hwfn->p_dev->mf_mode = ECORE_MF_DEFAULT;
 		break;
+	case NVM_CFG1_GLOB_MF_MODE_UFP:
+		p_hwfn->p_dev->mf_mode = ECORE_MF_UFP;
+		break;
 	}
-	DP_INFO(p_hwfn, "Multi function mode is %08x\n",
-		p_hwfn->p_dev->mf_mode);
 
 	/* Read Multi-function information from shmem */
 	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
-	    OFFSETOF(struct nvm_cfg1, glob) +
-	    OFFSETOF(struct nvm_cfg1_glob, device_capabilities);
+		   OFFSETOF(struct nvm_cfg1, glob) +
+		   OFFSETOF(struct nvm_cfg1_glob, device_capabilities);
 
 	device_capabilities = ecore_rd(p_hwfn, p_ptt, addr);
 	if (device_capabilities & NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ETHERNET)
 		OSAL_SET_BIT(ECORE_DEV_CAP_ETH,
-			     &p_hwfn->hw_info.device_capabilities);
+				&p_hwfn->hw_info.device_capabilities);
 	if (device_capabilities & NVM_CFG1_GLOB_DEVICE_CAPABILITIES_FCOE)
 		OSAL_SET_BIT(ECORE_DEV_CAP_FCOE,
-			     &p_hwfn->hw_info.device_capabilities);
+				&p_hwfn->hw_info.device_capabilities);
 	if (device_capabilities & NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ISCSI)
 		OSAL_SET_BIT(ECORE_DEV_CAP_ISCSI,
-			     &p_hwfn->hw_info.device_capabilities);
+				&p_hwfn->hw_info.device_capabilities);
 	if (device_capabilities & NVM_CFG1_GLOB_DEVICE_CAPABILITIES_ROCE)
 		OSAL_SET_BIT(ECORE_DEV_CAP_ROCE,
-			     &p_hwfn->hw_info.device_capabilities);
+				&p_hwfn->hw_info.device_capabilities);
 	if (device_capabilities & NVM_CFG1_GLOB_DEVICE_CAPABILITIES_IWARP)
 		OSAL_SET_BIT(ECORE_DEV_CAP_IWARP,
-			     &p_hwfn->hw_info.device_capabilities);
+				&p_hwfn->hw_info.device_capabilities);
 
 	rc = ecore_mcp_fill_shmem_func_info(p_hwfn, p_ptt);
 	if (rc != ECORE_SUCCESS && p_params->b_relaxed_probe) {
@@ -3101,7 +3597,7 @@ static void ecore_get_num_funcs(struct ecore_hwfn *p_hwfn,
 
 	if (reg_function_hide & 0x1) {
 		if (ECORE_IS_BB(p_dev)) {
-			if (ECORE_PATH_ID(p_hwfn) && p_dev->num_hwfns == 1) {
+			if (ECORE_PATH_ID(p_hwfn) && !ECORE_IS_CMT(p_dev)) {
 				num_funcs = 0;
 				eng_mask = 0xaaaa;
 			} else {
@@ -3151,14 +3647,14 @@ static void ecore_get_num_funcs(struct ecore_hwfn *p_hwfn,
 static void ecore_hw_info_port_num_bb(struct ecore_hwfn *p_hwfn,
 				      struct ecore_ptt *p_ptt)
 {
+	struct ecore_dev *p_dev = p_hwfn->p_dev;
 	u32 port_mode;
 
 #ifndef ASIC_ONLY
 	/* Read the port mode */
-	if (CHIP_REV_IS_FPGA(p_hwfn->p_dev))
+	if (CHIP_REV_IS_FPGA(p_dev))
 		port_mode = 4;
-	else if (CHIP_REV_IS_EMUL(p_hwfn->p_dev) &&
-		 (p_hwfn->p_dev->num_hwfns > 1))
+	else if (CHIP_REV_IS_EMUL(p_dev) && ECORE_IS_CMT(p_dev))
 		/* In CMT on emulation, assume 1 port */
 		port_mode = 1;
 	else
@@ -3166,38 +3662,39 @@ static void ecore_hw_info_port_num_bb(struct ecore_hwfn *p_hwfn,
 	port_mode = ecore_rd(p_hwfn, p_ptt, CNIG_REG_NW_PORT_MODE_BB);
 
 	if (port_mode < 3) {
-		p_hwfn->p_dev->num_ports_in_engines = 1;
+		p_dev->num_ports_in_engine = 1;
 	} else if (port_mode <= 5) {
-		p_hwfn->p_dev->num_ports_in_engines = 2;
+		p_dev->num_ports_in_engine = 2;
 	} else {
 		DP_NOTICE(p_hwfn, true, "PORT MODE: %d not supported\n",
-			  p_hwfn->p_dev->num_ports_in_engines);
+			  p_dev->num_ports_in_engine);
 
-		/* Default num_ports_in_engines to something */
-		p_hwfn->p_dev->num_ports_in_engines = 1;
+		/* Default num_ports_in_engine to something */
+		p_dev->num_ports_in_engine = 1;
 	}
 }
 
 static void ecore_hw_info_port_num_ah_e5(struct ecore_hwfn *p_hwfn,
 					 struct ecore_ptt *p_ptt)
 {
+	struct ecore_dev *p_dev = p_hwfn->p_dev;
 	u32 port;
 	int i;
 
-	p_hwfn->p_dev->num_ports_in_engines = 0;
+	p_dev->num_ports_in_engine = 0;
 
 #ifndef ASIC_ONLY
-	if (CHIP_REV_IS_EMUL(p_hwfn->p_dev)) {
+	if (CHIP_REV_IS_EMUL(p_dev)) {
 		port = ecore_rd(p_hwfn, p_ptt, MISCS_REG_ECO_RESERVED);
 		switch ((port & 0xf000) >> 12) {
 		case 1:
-			p_hwfn->p_dev->num_ports_in_engines = 1;
+			p_dev->num_ports_in_engine = 1;
 			break;
 		case 3:
-			p_hwfn->p_dev->num_ports_in_engines = 2;
+			p_dev->num_ports_in_engine = 2;
 			break;
 		case 0xf:
-			p_hwfn->p_dev->num_ports_in_engines = 4;
+			p_dev->num_ports_in_engine = 4;
 			break;
 		default:
 			DP_NOTICE(p_hwfn, false,
@@ -3211,17 +3708,68 @@ static void ecore_hw_info_port_num_ah_e5(struct ecore_hwfn *p_hwfn,
 					CNIG_REG_NIG_PORT0_CONF_K2_E5 +
 					(i * 4));
 			if (port & 1)
-				p_hwfn->p_dev->num_ports_in_engines++;
+				p_dev->num_ports_in_engine++;
 		}
+
+	if (!p_dev->num_ports_in_engine) {
+		DP_NOTICE(p_hwfn, true, "All NIG ports are inactive\n");
+
+		/* Default num_ports_in_engine to something */
+		p_dev->num_ports_in_engine = 1;
+	}
 }
 
 static void ecore_hw_info_port_num(struct ecore_hwfn *p_hwfn,
 				   struct ecore_ptt *p_ptt)
 {
-	if (ECORE_IS_BB(p_hwfn->p_dev))
+	struct ecore_dev *p_dev = p_hwfn->p_dev;
+
+	/* Determine the number of ports per engine */
+	if (ECORE_IS_BB(p_dev))
 		ecore_hw_info_port_num_bb(p_hwfn, p_ptt);
 	else
 		ecore_hw_info_port_num_ah_e5(p_hwfn, p_ptt);
+
+	/* Get the total number of ports of the device */
+	if (ECORE_IS_CMT(p_dev)) {
+		/* In CMT there is always only one port */
+		p_dev->num_ports = 1;
+#ifndef ASIC_ONLY
+	} else if (CHIP_REV_IS_EMUL(p_dev) || CHIP_REV_IS_TEDIBEAR(p_dev)) {
+		p_dev->num_ports = p_dev->num_ports_in_engine *
+				   ecore_device_num_engines(p_dev);
+#endif
+	} else {
+		u32 addr, global_offsize, global_addr;
+
+		addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
+					    PUBLIC_GLOBAL);
+		global_offsize = ecore_rd(p_hwfn, p_ptt, addr);
+		global_addr = SECTION_ADDR(global_offsize, 0);
+		addr = global_addr + OFFSETOF(struct public_global, max_ports);
+		p_dev->num_ports = (u8)ecore_rd(p_hwfn, p_ptt, addr);
+	}
+}
+
+static void ecore_mcp_get_eee_caps(struct ecore_hwfn *p_hwfn,
+				   struct ecore_ptt *p_ptt)
+{
+	struct ecore_mcp_link_capabilities *p_caps;
+	u32 eee_status;
+
+	p_caps = &p_hwfn->mcp_info->link_capabilities;
+	if (p_caps->default_eee == ECORE_MCP_EEE_UNSUPPORTED)
+		return;
+
+	p_caps->eee_speed_caps = 0;
+	eee_status = ecore_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
+			      OFFSETOF(struct public_port, eee_status));
+	eee_status = (eee_status & EEE_SUPPORTED_SPEED_MASK) >>
+			EEE_SUPPORTED_SPEED_OFFSET;
+	if (eee_status & EEE_1G_SUPPORTED)
+		p_caps->eee_speed_caps |= ECORE_EEE_1G_ADV;
+	if (eee_status & EEE_10G_ADV)
+		p_caps->eee_speed_caps |= ECORE_EEE_10G_ADV;
 }
 
 static enum _ecore_status_t
@@ -3244,14 +3792,10 @@ ecore_get_hw_info(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 		}
 	}
 
-	/* TODO In get_hw_info, amoungst others:
-	 * Get MCP FW revision and determine according to it the supported
-	 * featrues (e.g. DCB)
-	 * Get boot mode
-	 * ecore_get_pcie_width_speed, WOL capability.
-	 * Number of global CQ-s (for storage
-	 */
-	ecore_hw_info_port_num(p_hwfn, p_ptt);
+	if (IS_LEAD_HWFN(p_hwfn))
+		ecore_hw_info_port_num(p_hwfn, p_ptt);
+
+	ecore_mcp_get_capabilities(p_hwfn, p_ptt);
 
 #ifndef ASIC_ONLY
 	if (CHIP_REV_IS_ASIC(p_hwfn->p_dev)) {
@@ -3291,6 +3835,10 @@ ecore_get_hw_info(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 			    p_hwfn->mcp_info->func_info.ovlan;
 
 		ecore_mcp_cmd_port_init(p_hwfn, p_ptt);
+
+		ecore_mcp_get_eee_caps(p_hwfn, p_ptt);
+
+		ecore_mcp_read_ufp_config(p_hwfn, p_ptt);
 	}
 
 	if (personality != ECORE_PCI_DEFAULT) {
@@ -3336,7 +3884,7 @@ ecore_get_hw_info(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	 * the resources/features depends on them.
 	 * This order is not harmful if not forcing.
 	 */
-	rc = ecore_hw_get_resc(p_hwfn, drv_resc_alloc);
+	rc = ecore_hw_get_resc(p_hwfn, p_ptt, drv_resc_alloc);
 	if (rc != ECORE_SUCCESS && p_params->b_relaxed_probe) {
 		rc = ECORE_SUCCESS;
 		p_params->p_relaxed_res = ECORE_HW_PREPARE_BAD_MCP;
@@ -3345,9 +3893,11 @@ ecore_get_hw_info(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	return rc;
 }
 
-static enum _ecore_status_t ecore_get_dev_info(struct ecore_dev *p_dev)
+static enum _ecore_status_t ecore_get_dev_info(struct ecore_hwfn *p_hwfn,
+					       struct ecore_ptt *p_ptt)
 {
-	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+	struct ecore_dev *p_dev = p_hwfn->p_dev;
+	u16 device_id_mask;
 	u32 tmp;
 
 	/* Read Vendor Id / Device Id */
@@ -3357,21 +3907,27 @@ static enum _ecore_status_t ecore_get_dev_info(struct ecore_dev *p_dev)
 				  &p_dev->device_id);
 
 	/* Determine type */
-	if ((p_dev->device_id & ECORE_DEV_ID_MASK) == ECORE_DEV_ID_MASK_AH)
-		p_dev->type = ECORE_DEV_TYPE_AH;
-	else
+	device_id_mask = p_dev->device_id & ECORE_DEV_ID_MASK;
+	switch (device_id_mask) {
+	case ECORE_DEV_ID_MASK_BB:
 		p_dev->type = ECORE_DEV_TYPE_BB;
+		break;
+	case ECORE_DEV_ID_MASK_AH:
+		p_dev->type = ECORE_DEV_TYPE_AH;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, true, "Unknown device id 0x%x\n",
+			  p_dev->device_id);
+		return ECORE_ABORTED;
+	}
 
-	p_dev->chip_num = (u16)ecore_rd(p_hwfn, p_hwfn->p_main_ptt,
-					 MISCS_REG_CHIP_NUM);
-	p_dev->chip_rev = (u16)ecore_rd(p_hwfn, p_hwfn->p_main_ptt,
-					 MISCS_REG_CHIP_REV);
-
-	MASK_FIELD(CHIP_REV, p_dev->chip_rev);
+	tmp = ecore_rd(p_hwfn, p_ptt, MISCS_REG_CHIP_NUM);
+	p_dev->chip_num = (u16)GET_FIELD(tmp, CHIP_NUM);
+	tmp = ecore_rd(p_hwfn, p_ptt, MISCS_REG_CHIP_REV);
+	p_dev->chip_rev = (u8)GET_FIELD(tmp, CHIP_REV);
 
 	/* Learn number of HW-functions */
-	tmp = ecore_rd(p_hwfn, p_hwfn->p_main_ptt,
-		       MISCS_REG_CMT_ENABLED_FOR_PAIR);
+	tmp = ecore_rd(p_hwfn, p_ptt, MISCS_REG_CMT_ENABLED_FOR_PAIR);
 
 	if (tmp & (1 << p_hwfn->rel_pf_id)) {
 		DP_NOTICE(p_dev->hwfns, false, "device in CMT mode\n");
@@ -3391,32 +3947,29 @@ static enum _ecore_status_t ecore_get_dev_info(struct ecore_dev *p_dev)
 	}
 #endif
 
-	p_dev->chip_bond_id = ecore_rd(p_hwfn, p_hwfn->p_main_ptt,
-				       MISCS_REG_CHIP_TEST_REG) >> 4;
-	MASK_FIELD(CHIP_BOND_ID, p_dev->chip_bond_id);
-	p_dev->chip_metal = (u16)ecore_rd(p_hwfn, p_hwfn->p_main_ptt,
-					   MISCS_REG_CHIP_METAL);
-	MASK_FIELD(CHIP_METAL, p_dev->chip_metal);
+	tmp = ecore_rd(p_hwfn, p_ptt, MISCS_REG_CHIP_TEST_REG);
+	p_dev->chip_bond_id = (u8)GET_FIELD(tmp, CHIP_BOND_ID);
+	tmp = ecore_rd(p_hwfn, p_ptt, MISCS_REG_CHIP_METAL);
+	p_dev->chip_metal = (u8)GET_FIELD(tmp, CHIP_METAL);
+
 	DP_INFO(p_dev->hwfns,
-		"Chip details - %s %c%d, Num: %04x Rev: %04x Bond id: %04x Metal: %04x\n",
+		"Chip details - %s %c%d, Num: %04x Rev: %02x Bond id: %02x Metal: %02x\n",
 		ECORE_IS_BB(p_dev) ? "BB" : "AH",
 		'A' + p_dev->chip_rev, (int)p_dev->chip_metal,
 		p_dev->chip_num, p_dev->chip_rev, p_dev->chip_bond_id,
 		p_dev->chip_metal);
 
-	if (ECORE_IS_BB(p_dev) && CHIP_REV_IS_A0(p_dev)) {
+	if (ECORE_IS_BB_A0(p_dev)) {
 		DP_NOTICE(p_dev->hwfns, false,
 			  "The chip type/rev (BB A0) is not supported!\n");
 		return ECORE_ABORTED;
 	}
 #ifndef ASIC_ONLY
 	if (CHIP_REV_IS_EMUL(p_dev) && ECORE_IS_AH(p_dev))
-		ecore_wr(p_hwfn, p_hwfn->p_main_ptt,
-			 MISCS_REG_PLL_MAIN_CTRL_4, 0x1);
+		ecore_wr(p_hwfn, p_ptt, MISCS_REG_PLL_MAIN_CTRL_4, 0x1);
 
 	if (CHIP_REV_IS_EMUL(p_dev)) {
-		tmp = ecore_rd(p_hwfn, p_hwfn->p_main_ptt,
-			       MISCS_REG_ECO_RESERVED);
+		tmp = ecore_rd(p_hwfn, p_ptt, MISCS_REG_ECO_RESERVED);
 		if (tmp & (1 << 29)) {
 			DP_NOTICE(p_hwfn, false,
 				  "Emulation: Running on a FULL build\n");
@@ -3446,7 +3999,6 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev)
 			   "Mark hw/fw uninitialized\n");
 
 		p_hwfn->hw_init_done = false;
-		p_hwfn->first_on_engine = false;
 
 		ecore_ptt_invalidate(p_hwfn);
 	}
@@ -3459,6 +4011,7 @@ ecore_hw_prepare_single(struct ecore_hwfn *p_hwfn,
 			void OSAL_IOMEM * p_doorbells,
 			struct ecore_hw_prepare_params *p_params)
 {
+	struct ecore_mdump_retain_data mdump_retain;
 	struct ecore_dev *p_dev = p_hwfn->p_dev;
 	struct ecore_mdump_info mdump_info;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
@@ -3495,7 +4048,7 @@ ecore_hw_prepare_single(struct ecore_hwfn *p_hwfn,
 
 	/* First hwfn learns basic information, e.g., number of hwfns */
 	if (!p_hwfn->my_id) {
-		rc = ecore_get_dev_info(p_dev);
+		rc = ecore_get_dev_info(p_hwfn, p_hwfn->p_main_ptt);
 		if (rc != ECORE_SUCCESS) {
 			if (p_params->b_relaxed_probe)
 				p_params->p_relaxed_res =
@@ -3526,24 +4079,37 @@ ecore_hw_prepare_single(struct ecore_hwfn *p_hwfn,
 	/* Sending a mailbox to the MFW should be after ecore_get_hw_info() is
 	 * called, since among others it sets the ports number in an engine.
 	 */
-	if (p_params->initiate_pf_flr && p_hwfn == ECORE_LEADING_HWFN(p_dev) &&
+	if (p_params->initiate_pf_flr && IS_LEAD_HWFN(p_hwfn) &&
 	    !p_dev->recov_in_prog) {
 		rc = ecore_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
 		if (rc != ECORE_SUCCESS)
 			DP_NOTICE(p_hwfn, false, "Failed to initiate PF FLR\n");
 	}
 
-	/* Check if mdump logs are present and update the epoch value */
-	if (p_hwfn == ECORE_LEADING_HWFN(p_hwfn->p_dev)) {
+	/* Check if mdump logs/data are present and update the epoch value */
+	if (IS_LEAD_HWFN(p_hwfn)) {
+#ifndef ASIC_ONLY
+		if (!CHIP_REV_IS_EMUL(p_dev)) {
+#endif
 		rc = ecore_mcp_mdump_get_info(p_hwfn, p_hwfn->p_main_ptt,
 					      &mdump_info);
-		if (rc == ECORE_SUCCESS && mdump_info.num_of_logs > 0) {
+		if (rc == ECORE_SUCCESS && mdump_info.num_of_logs)
 			DP_NOTICE(p_hwfn, false,
 				  "* * * IMPORTANT - HW ERROR register dump captured by device * * *\n");
-		}
+
+		rc = ecore_mcp_mdump_get_retain(p_hwfn, p_hwfn->p_main_ptt,
+						&mdump_retain);
+		if (rc == ECORE_SUCCESS && mdump_retain.valid)
+			DP_NOTICE(p_hwfn, false,
+				  "mdump retained data: epoch 0x%08x, pf 0x%x, status 0x%08x\n",
+				  mdump_retain.epoch, mdump_retain.pf,
+				  mdump_retain.status);
 
 		ecore_mcp_mdump_set_values(p_hwfn, p_hwfn->p_main_ptt,
 					   p_params->epoch);
+#ifndef ASIC_ONLY
+		}
+#endif
 	}
 
 	/* Allocate the init RT array and initialize the init-ops engine */
@@ -3605,17 +4171,21 @@ enum _ecore_status_t ecore_hw_prepare(struct ecore_dev *p_dev,
 	p_params->personality = p_hwfn->hw_info.personality;
 
 	/* initilalize 2nd hwfn if necessary */
-	if (p_dev->num_hwfns > 1) {
+	if (ECORE_IS_CMT(p_dev)) {
 		void OSAL_IOMEM *p_regview, *p_doorbell;
 		u8 OSAL_IOMEM *addr;
 
 		/* adjust bar offset for second engine */
 		addr = (u8 OSAL_IOMEM *)p_dev->regview +
-		    ecore_hw_bar_size(p_hwfn, BAR_ID_0) / 2;
+					ecore_hw_bar_size(p_hwfn,
+							  p_hwfn->p_main_ptt,
+							  BAR_ID_0) / 2;
 		p_regview = (void OSAL_IOMEM *)addr;
 
 		addr = (u8 OSAL_IOMEM *)p_dev->doorbells +
-		    ecore_hw_bar_size(p_hwfn, BAR_ID_1) / 2;
+					ecore_hw_bar_size(p_hwfn,
+							  p_hwfn->p_main_ptt,
+							  BAR_ID_1) / 2;
 		p_doorbell = (void OSAL_IOMEM *)addr;
 
 		/* prepare second hw function */
@@ -3666,7 +4236,9 @@ void ecore_hw_remove(struct ecore_dev *p_dev)
 		ecore_hw_hwfn_free(p_hwfn);
 		ecore_mcp_free(p_hwfn);
 
+#ifdef CONFIG_ECORE_LOCK_ALLOC
 		OSAL_MUTEX_DEALLOC(&p_hwfn->dmae_info.mutex);
+#endif
 	}
 
 	ecore_iov_free_hw_info(p_dev);
@@ -3844,11 +4416,11 @@ ecore_chain_alloc_pbl(struct ecore_dev *p_dev,
 		      struct ecore_chain *p_chain,
 		      struct ecore_chain_ext_pbl *ext_pbl)
 {
-	void *p_virt = OSAL_NULL;
-	u8 *p_pbl_virt = OSAL_NULL;
-	void **pp_virt_addr_tbl = OSAL_NULL;
-	dma_addr_t p_phys = 0, p_pbl_phys = 0;
 	u32 page_cnt = p_chain->page_cnt, size, i;
+	dma_addr_t p_phys = 0, p_pbl_phys = 0;
+	void **pp_virt_addr_tbl = OSAL_NULL;
+	u8 *p_pbl_virt = OSAL_NULL;
+	void *p_virt = OSAL_NULL;
 
 	size = page_cnt * sizeof(*pp_virt_addr_tbl);
 	pp_virt_addr_tbl = (void **)OSAL_VZALLOC(p_dev, size);
@@ -4061,9 +4633,10 @@ enum _ecore_status_t ecore_llh_add_mac_filter(struct ecore_hwfn *p_hwfn,
 					  struct ecore_ptt *p_ptt, u8 *p_filter)
 {
 	u32 high, low, entry_num;
-	enum _ecore_status_t rc;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
 
-	if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
+	if (!OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS,
+			   &p_hwfn->p_dev->mf_bits))
 		return ECORE_SUCCESS;
 
 	high = p_filter[1] | (p_filter[0] << 8);
@@ -4084,7 +4657,7 @@ enum _ecore_status_t ecore_llh_add_mac_filter(struct ecore_hwfn *p_hwfn,
 		   p_filter[0], p_filter[1], p_filter[2], p_filter[3],
 		   p_filter[4], p_filter[5], entry_num);
 
-	return ECORE_SUCCESS;
+	return rc;
 }
 
 static enum _ecore_status_t
@@ -4128,9 +4701,10 @@ void ecore_llh_remove_mac_filter(struct ecore_hwfn *p_hwfn,
 			     struct ecore_ptt *p_ptt, u8 *p_filter)
 {
 	u32 high, low, entry_num;
-	enum _ecore_status_t rc;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
 
-	if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
+	if (!OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS,
+			   &p_hwfn->p_dev->mf_bits))
 		return;
 
 	high = p_filter[1] | (p_filter[0] << 8);
@@ -4202,10 +4776,11 @@ ecore_llh_add_protocol_filter(struct ecore_hwfn *p_hwfn,
 			      enum ecore_llh_port_filter_type_t type)
 {
 	u32 high, low, entry_num;
-	enum _ecore_status_t rc;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
 
-	if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
-		return ECORE_SUCCESS;
+	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS,
+			   &p_hwfn->p_dev->mf_bits))
+		return rc;
 
 	high = 0;
 	low = 0;
@@ -4278,7 +4853,7 @@ ecore_llh_add_protocol_filter(struct ecore_hwfn *p_hwfn,
 		break;
 	}
 
-	return ECORE_SUCCESS;
+	return rc;
 }
 
 static enum _ecore_status_t
@@ -4345,9 +4920,10 @@ ecore_llh_remove_protocol_filter(struct ecore_hwfn *p_hwfn,
 				 enum ecore_llh_port_filter_type_t type)
 {
 	u32 high, low, entry_num;
-	enum _ecore_status_t rc;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
 
-	if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
+	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS,
+			   &p_hwfn->p_dev->mf_bits))
 		return;
 
 	high = 0;
@@ -4415,7 +4991,10 @@ static void ecore_llh_clear_all_filters_bb_ah(struct ecore_hwfn *p_hwfn,
 void ecore_llh_clear_all_filters(struct ecore_hwfn *p_hwfn,
 			     struct ecore_ptt *p_ptt)
 {
-	if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
+	if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS,
+			   &p_hwfn->p_dev->mf_bits) &&
+	    !OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS,
+			   &p_hwfn->p_dev->mf_bits))
 		return;
 
 	if (ECORE_IS_BB(p_hwfn->p_dev) || ECORE_IS_AH(p_hwfn->p_dev))
@@ -4426,7 +5005,7 @@ enum _ecore_status_t
 ecore_llh_set_function_as_default(struct ecore_hwfn *p_hwfn,
 				  struct ecore_ptt *p_ptt)
 {
-	if (IS_MF_DEFAULT(p_hwfn) && ECORE_IS_BB(p_hwfn->p_dev)) {
+	if (OSAL_TEST_BIT(ECORE_MF_NEED_DEF_PF, &p_hwfn->p_dev->mf_bits)) {
 		ecore_wr(p_hwfn, p_ptt,
 			 NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR,
 			 1 << p_hwfn->abs_pf_id / 2);
@@ -4526,7 +5105,7 @@ enum _ecore_status_t ecore_set_rxq_coalesce(struct ecore_hwfn *p_hwfn,
 	timeset = (u8)(coalesce >> timer_res);
 
 	rc = ecore_int_set_timer_res(p_hwfn, p_ptt, timer_res,
-				     p_cid->abs.sb_idx, false);
+				     p_cid->sb_igu_id, false);
 	if (rc != ECORE_SUCCESS)
 		goto out;
 
@@ -4567,7 +5146,7 @@ enum _ecore_status_t ecore_set_txq_coalesce(struct ecore_hwfn *p_hwfn,
 	timeset = (u8)(coalesce >> timer_res);
 
 	rc = ecore_int_set_timer_res(p_hwfn, p_ptt, timer_res,
-				     p_cid->abs.sb_idx, true);
+				     p_cid->sb_igu_id, true);
 	if (rc != ECORE_SUCCESS)
 		goto out;
 
@@ -4604,8 +5183,7 @@ static void ecore_configure_wfq_for_all_vports(struct ecore_hwfn *p_hwfn,
 	}
 }
 
-static void
-ecore_init_wfq_default_param(struct ecore_hwfn *p_hwfn, u32 min_pf_rate)
+static void ecore_init_wfq_default_param(struct ecore_hwfn *p_hwfn)
 {
 	int i;
 
@@ -4614,8 +5192,7 @@ ecore_init_wfq_default_param(struct ecore_hwfn *p_hwfn, u32 min_pf_rate)
 }
 
 static void ecore_disable_wfq_for_all_vports(struct ecore_hwfn *p_hwfn,
-					     struct ecore_ptt *p_ptt,
-					     u32 min_pf_rate)
+					     struct ecore_ptt *p_ptt)
 {
 	struct init_qm_vport_params *vport_params;
 	int i;
@@ -4623,7 +5200,7 @@ static void ecore_disable_wfq_for_all_vports(struct ecore_hwfn *p_hwfn,
 	vport_params = p_hwfn->qm_info.qm_vport_params;
 
 	for (i = 0; i < p_hwfn->qm_info.num_vports; i++) {
-		ecore_init_wfq_default_param(p_hwfn, min_pf_rate);
+		ecore_init_wfq_default_param(p_hwfn);
 		ecore_init_vport_wfq(p_hwfn, p_ptt,
 				     vport_params[i].first_tx_pq_id,
 				     vport_params[i].vport_wfq);
@@ -4664,13 +5241,6 @@ static enum _ecore_status_t ecore_init_wfq_param(struct ecore_hwfn *p_hwfn,
 	non_requested_count = num_vports - req_count;
 
 	/* validate possible error cases */
-	if (req_rate > min_pf_rate) {
-		DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
-			   "Vport [%d] - Requested rate[%d Mbps] is greater than configured PF min rate[%d Mbps]\n",
-			   vport_id, req_rate, min_pf_rate);
-		return ECORE_INVAL;
-	}
-
 	if (req_rate < min_pf_rate / ECORE_WFQ_UNIT) {
 		DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
 			   "Vport [%d] - Requested rate[%d Mbps] is less than one percent of configured PF min rate[%d Mbps]\n",
@@ -4777,7 +5347,7 @@ static int __ecore_configure_vp_wfq_on_link_change(struct ecore_hwfn *p_hwfn,
 	if (rc == ECORE_SUCCESS && use_wfq)
 		ecore_configure_wfq_for_all_vports(p_hwfn, p_ptt, min_pf_rate);
 	else
-		ecore_disable_wfq_for_all_vports(p_hwfn, p_ptt, min_pf_rate);
+		ecore_disable_wfq_for_all_vports(p_hwfn, p_ptt);
 
 	return rc;
 }
@@ -4791,7 +5361,7 @@ int ecore_configure_vport_wfq(struct ecore_dev *p_dev, u16 vp_id, u32 rate)
 	int i, rc = ECORE_INVAL;
 
 	/* TBD - for multiple hardware functions - that is 100 gig */
-	if (p_dev->num_hwfns > 1) {
+	if (ECORE_IS_CMT(p_dev)) {
 		DP_NOTICE(p_dev, false,
 			  "WFQ configuration is not supported for this device\n");
 		return rc;
@@ -4820,12 +5390,13 @@ int ecore_configure_vport_wfq(struct ecore_dev *p_dev, u16 vp_id, u32 rate)
 
 /* API to configure WFQ from mcp link change */
 void ecore_configure_vp_wfq_on_link_change(struct ecore_dev *p_dev,
+					   struct ecore_ptt *p_ptt,
 					   u32 min_pf_rate)
 {
 	int i;
 
 	/* TBD - for multiple hardware functions - that is 100 gig */
-	if (p_dev->num_hwfns > 1) {
+	if (ECORE_IS_CMT(p_dev)) {
 		DP_VERBOSE(p_dev, ECORE_MSG_LINK,
 			   "WFQ configuration is not supported for this device\n");
 		return;
@@ -4834,8 +5405,7 @@ void ecore_configure_vp_wfq_on_link_change(struct ecore_dev *p_dev,
 	for_each_hwfn(p_dev, i) {
 		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
 
-		__ecore_configure_vp_wfq_on_link_change(p_hwfn,
-							p_hwfn->p_dpc_ptt,
+		__ecore_configure_vp_wfq_on_link_change(p_hwfn, p_ptt,
 							min_pf_rate);
 	}
 }
@@ -4980,8 +5550,7 @@ void ecore_clean_wfq_db(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 	p_link = &p_hwfn->mcp_info->link_output;
 
 	if (p_link->min_pf_rate)
-		ecore_disable_wfq_for_all_vports(p_hwfn, p_ptt,
-						 p_link->min_pf_rate);
+		ecore_disable_wfq_for_all_vports(p_hwfn, p_ptt);
 
 	OSAL_MEMSET(p_hwfn->qm_info.wfq_data, 0,
 		    sizeof(*p_hwfn->qm_info.wfq_data) *
@@ -4995,11 +5564,7 @@ int ecore_device_num_engines(struct ecore_dev *p_dev)
 
 int ecore_device_num_ports(struct ecore_dev *p_dev)
 {
-	/* in CMT always only one port */
-	if (p_dev->num_hwfns > 1)
-		return 1;
-
-	return p_dev->num_ports_in_engines * ecore_device_num_engines(p_dev);
+	return p_dev->num_ports;
 }
 
 void ecore_set_fw_mac_addr(__le16 *fw_msb,
diff --git a/drivers/net/qede/base/ecore_dev_api.h b/drivers/net/qede/base/ecore_dev_api.h
index 9126cf95..98bcabe8 100644
--- a/drivers/net/qede/base/ecore_dev_api.h
+++ b/drivers/net/qede/base/ecore_dev_api.h
@@ -57,22 +57,13 @@ enum _ecore_status_t ecore_resc_alloc(struct ecore_dev *p_dev);
  */
 void ecore_resc_setup(struct ecore_dev *p_dev);
 
-struct ecore_hw_init_params {
-	/* Tunnelling parameters */
-	struct ecore_tunnel_info *p_tunn;
-
-	bool b_hw_start;
-
-	/* Interrupt mode [msix, inta, etc.] to use */
-	enum ecore_int_mode int_mode;
-
-	/* NPAR tx switching to be used for vports configured for tx-switching
-	 */
-	bool allow_npar_tx_switch;
-
-	/* Binary fw data pointer in binary fw file */
-	const u8 *bin_fw_data;
+enum ecore_override_force_load {
+	ECORE_OVERRIDE_FORCE_LOAD_NONE,
+	ECORE_OVERRIDE_FORCE_LOAD_ALWAYS,
+	ECORE_OVERRIDE_FORCE_LOAD_NEVER,
+};
 
+struct ecore_drv_load_params {
 	/* Indicates whether the driver is running over a crash kernel.
 	 * As part of the load request, this will be used for providing the
 	 * driver role to the MFW.
@@ -90,6 +81,29 @@ struct ecore_hw_init_params {
 
 	/* Avoid engine reset when first PF loads on it */
 	bool avoid_eng_reset;
+
+	/* Allow overriding the default force load behavior */
+	enum ecore_override_force_load override_force_load;
+};
+
+struct ecore_hw_init_params {
+	/* Tunneling parameters */
+	struct ecore_tunnel_info *p_tunn;
+
+	bool b_hw_start;
+
+	/* Interrupt mode [msix, inta, etc.] to use */
+	enum ecore_int_mode int_mode;
+
+	/* NPAR tx switching to be used for vports configured for tx-switching
+	 */
+	bool allow_npar_tx_switch;
+
+	/* Binary fw data pointer in binary fw file */
+	const u8 *bin_fw_data;
+
+	/* Driver load parameters */
+	struct ecore_drv_load_params *p_drv_load_params;
 };
 
 /**
@@ -128,8 +142,9 @@ enum _ecore_status_t ecore_hw_stop(struct ecore_dev *p_dev);
  *
  * @param p_dev
  *
+ * @return enum _ecore_status_t
  */
-void ecore_hw_stop_fastpath(struct ecore_dev *p_dev);
+enum _ecore_status_t ecore_hw_stop_fastpath(struct ecore_dev *p_dev);
 
 #ifndef LINUX_REMOVE
 /**
@@ -140,16 +155,62 @@ void ecore_hw_stop_fastpath(struct ecore_dev *p_dev);
  *
  */
 void ecore_prepare_hibernate(struct ecore_dev *p_dev);
+
+enum ecore_db_rec_width {
+	DB_REC_WIDTH_32B,
+	DB_REC_WIDTH_64B,
+};
+
+enum ecore_db_rec_space {
+	DB_REC_KERNEL,
+	DB_REC_USER,
+};
+
+/**
+ * @brief db_recovery_add - add doorbell information to the doorbell
+ * recovery mechanism.
+ *
+ * @param p_dev
+ * @param db_addr - doorbell address
+ * @param db_data - address of where db_data is stored
+ * @param db_width - doorbell is 32b pr 64b
+ * @param db_space - doorbell recovery addresses are user or kernel space
+ */
+enum _ecore_status_t ecore_db_recovery_add(struct ecore_dev *p_dev,
+					   void OSAL_IOMEM *db_addr,
+					   void *db_data,
+					   enum ecore_db_rec_width db_width,
+					   enum ecore_db_rec_space db_space);
+
+/**
+ * @brief db_recovery_del - remove doorbell information from the doorbell
+ * recovery mechanism. db_data serves as key (db_addr is not unique).
+ *
+ * @param cdev
+ * @param db_addr - doorbell address
+ * @param db_data - address where db_data is stored. Serves as key for the
+ *                  entry to delete.
+ */
+enum _ecore_status_t ecore_db_recovery_del(struct ecore_dev *p_dev,
+					   void OSAL_IOMEM *db_addr,
+					   void *db_data);
+
+static OSAL_INLINE bool ecore_is_mf_ufp(struct ecore_hwfn *p_hwfn)
+{
+	return !!OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits);
+}
+
 #endif
 
 /**
  * @brief ecore_hw_start_fastpath -restart fastpath traffic,
  *        only if hw_stop_fastpath was called
 
- * @param p_dev
+ * @param p_hwfn
  *
+ * @return enum _ecore_status_t
  */
-void ecore_hw_start_fastpath(struct ecore_hwfn *p_hwfn);
+enum _ecore_status_t ecore_hw_start_fastpath(struct ecore_hwfn *p_hwfn);
 
 enum ecore_hw_prepare_result {
 	ECORE_HW_PREPARE_SUCCESS,
@@ -240,7 +301,6 @@ struct ecore_ptt *ecore_ptt_acquire(struct ecore_hwfn *p_hwfn);
 void ecore_ptt_release(struct ecore_hwfn *p_hwfn,
 		       struct ecore_ptt *p_ptt);
 
-#ifndef __EXTRACT__LINUX__
 struct ecore_eth_stats_common {
 	u64 no_buff_discards;
 	u64 packet_too_big_discard;
@@ -331,7 +391,6 @@ struct ecore_eth_stats {
 		struct ecore_eth_stats_ah ah;
 	};
 };
-#endif
 
 enum ecore_dmae_address_type_t {
 	ECORE_DMAE_ADDRESS_HOST_VIRT,
@@ -580,6 +639,20 @@ enum _ecore_status_t ecore_final_cleanup(struct ecore_hwfn	*p_hwfn,
 					 struct ecore_ptt	*p_ptt,
 					 u16			id,
 					 bool			is_vf);
+
+/**
+ * @brief ecore_get_queue_coalesce - Retrieve coalesce value for a given queue.
+ *
+ * @param p_hwfn
+ * @param p_coal - store coalesce value read from the hardware.
+ * @param p_handle
+ *
+ * @return enum _ecore_status_t
+ **/
+enum _ecore_status_t
+ecore_get_queue_coalesce(struct ecore_hwfn *p_hwfn, u16 *coal,
+			 void *handle);
+
 /**
  * @brief ecore_set_queue_coalesce - Configure coalesce parameters for Rx and
  *    Tx queue. The fact that we can configure coalescing to up to 511, but on
diff --git a/drivers/net/qede/base/ecore_hsi_common.h b/drivers/net/qede/base/ecore_hsi_common.h
index 5c2a08f9..d8abd604 100644
--- a/drivers/net/qede/base/ecore_hsi_common.h
+++ b/drivers/net/qede/base/ecore_hsi_common.h
@@ -618,7 +618,7 @@ struct ustorm_core_conn_st_ctx {
 /*
  * core connection context
  */
-struct core_conn_context {
+struct e4_core_conn_context {
 /* ystorm storm context */
 	struct ystorm_core_conn_st_ctx ystorm_st_context;
 	struct regpair ystorm_st_padding[2] /* padding */;
@@ -661,6 +661,7 @@ enum core_event_opcode {
 	CORE_EVENT_RX_QUEUE_START,
 	CORE_EVENT_RX_QUEUE_STOP,
 	CORE_EVENT_RX_QUEUE_FLUSH,
+	CORE_EVENT_TX_QUEUE_UPDATE,
 	MAX_CORE_EVENT_OPCODE
 };
 
@@ -745,6 +746,7 @@ enum core_ramrod_cmd_id {
 	CORE_RAMROD_RX_QUEUE_STOP /* RX Queue Stop Ramrod */,
 	CORE_RAMROD_TX_QUEUE_STOP /* TX Queue Stop Ramrod */,
 	CORE_RAMROD_RX_QUEUE_FLUSH /* RX Flush queue Ramrod */,
+	CORE_RAMROD_TX_QUEUE_UPDATE /* TX Queue Update Ramrod */,
 	MAX_CORE_RAMROD_CMD_ID
 };
 
@@ -858,7 +860,8 @@ struct core_rx_gsi_offload_cqe {
 	__le16 src_mac_addrlo /* lo 2 bytes of source mac address */;
 /* These are the lower 16 bit of QP id in RoCE BTH header */
 	__le16 qp_id;
-	__le32 gid_dst[4] /* Gid destination address */;
+	__le32 src_qp /* Source QP from DETH header */;
+	__le32 reserved[3];
 };
 
 /*
@@ -899,7 +902,10 @@ struct core_rx_start_ramrod_data {
 	u8 drop_ttl0_flg /* drop packet with ttl0 if set */;
 	__le16 num_of_pbl_pages /* Num of pages in CQE PBL */;
 /* if set, 802.1q tags will be removed and copied to CQE */
-	u8 inner_vlan_removal_en;
+/* if set, 802.1q tags will be removed and copied to CQE */
+	u8 inner_vlan_stripping_en;
+/* if set, outer tag wont be stripped, valid only in MF OVLAN. */
+	u8 outer_vlan_stripping_dis;
 	u8 queue_id /* Light L2 RX Queue ID */;
 	u8 main_func_queue /* Is this the main queue for the PF */;
 /* Duplicate broadcast packets to LL2 main queue in mf_si mode. Valid if
@@ -916,7 +922,7 @@ struct core_rx_start_ramrod_data {
 	struct core_rx_action_on_error action_on_error;
 /* set when in GSI offload mode on ROCE connection */
 	u8 gsi_offload_flag;
-	u8 reserved[7];
+	u8 reserved[6];
 };
 
 
@@ -938,48 +944,51 @@ struct core_rx_stop_ramrod_data {
 struct core_tx_bd_data {
 	__le16 as_bitfield;
 /* Do not allow additional VLAN manipulations on this packet (DCB) */
-#define CORE_TX_BD_DATA_FORCE_VLAN_MODE_MASK      0x1
-#define CORE_TX_BD_DATA_FORCE_VLAN_MODE_SHIFT     0
+#define CORE_TX_BD_DATA_FORCE_VLAN_MODE_MASK         0x1
+#define CORE_TX_BD_DATA_FORCE_VLAN_MODE_SHIFT        0
 /* Insert VLAN into packet */
-#define CORE_TX_BD_DATA_VLAN_INSERTION_MASK       0x1
-#define CORE_TX_BD_DATA_VLAN_INSERTION_SHIFT      1
+#define CORE_TX_BD_DATA_VLAN_INSERTION_MASK          0x1
+#define CORE_TX_BD_DATA_VLAN_INSERTION_SHIFT         1
 /* This is the first BD of the packet (for debug) */
-#define CORE_TX_BD_DATA_START_BD_MASK             0x1
-#define CORE_TX_BD_DATA_START_BD_SHIFT            2
+#define CORE_TX_BD_DATA_START_BD_MASK                0x1
+#define CORE_TX_BD_DATA_START_BD_SHIFT               2
 /* Calculate the IP checksum for the packet */
-#define CORE_TX_BD_DATA_IP_CSUM_MASK              0x1
-#define CORE_TX_BD_DATA_IP_CSUM_SHIFT             3
+#define CORE_TX_BD_DATA_IP_CSUM_MASK                 0x1
+#define CORE_TX_BD_DATA_IP_CSUM_SHIFT                3
 /* Calculate the L4 checksum for the packet */
-#define CORE_TX_BD_DATA_L4_CSUM_MASK              0x1
-#define CORE_TX_BD_DATA_L4_CSUM_SHIFT             4
+#define CORE_TX_BD_DATA_L4_CSUM_MASK                 0x1
+#define CORE_TX_BD_DATA_L4_CSUM_SHIFT                4
 /* Packet is IPv6 with extensions */
-#define CORE_TX_BD_DATA_IPV6_EXT_MASK             0x1
-#define CORE_TX_BD_DATA_IPV6_EXT_SHIFT            5
+#define CORE_TX_BD_DATA_IPV6_EXT_MASK                0x1
+#define CORE_TX_BD_DATA_IPV6_EXT_SHIFT               5
 /* If IPv6+ext, and if l4_csum is 1, than this field indicates L4 protocol:
  * 0-TCP, 1-UDP
  */
-#define CORE_TX_BD_DATA_L4_PROTOCOL_MASK          0x1
-#define CORE_TX_BD_DATA_L4_PROTOCOL_SHIFT         6
+#define CORE_TX_BD_DATA_L4_PROTOCOL_MASK             0x1
+#define CORE_TX_BD_DATA_L4_PROTOCOL_SHIFT            6
 /* The pseudo checksum mode to place in the L4 checksum field. Required only
  * when IPv6+ext and l4_csum is set. (use enum core_l4_pseudo_checksum_mode)
  */
-#define CORE_TX_BD_DATA_L4_PSEUDO_CSUM_MODE_MASK  0x1
-#define CORE_TX_BD_DATA_L4_PSEUDO_CSUM_MODE_SHIFT 7
+#define CORE_TX_BD_DATA_L4_PSEUDO_CSUM_MODE_MASK     0x1
+#define CORE_TX_BD_DATA_L4_PSEUDO_CSUM_MODE_SHIFT    7
 /* Number of BDs that make up one packet - width wide enough to present
  * CORE_LL2_TX_MAX_BDS_PER_PACKET
  */
-#define CORE_TX_BD_DATA_NBDS_MASK                 0xF
-#define CORE_TX_BD_DATA_NBDS_SHIFT                8
+#define CORE_TX_BD_DATA_NBDS_MASK                    0xF
+#define CORE_TX_BD_DATA_NBDS_SHIFT                   8
 /* Use roce_flavor enum - Differentiate between Roce flavors is valid when
  * connType is ROCE (use enum core_roce_flavor_type)
  */
-#define CORE_TX_BD_DATA_ROCE_FLAV_MASK            0x1
-#define CORE_TX_BD_DATA_ROCE_FLAV_SHIFT           12
+#define CORE_TX_BD_DATA_ROCE_FLAV_MASK               0x1
+#define CORE_TX_BD_DATA_ROCE_FLAV_SHIFT              12
 /* Calculate ip length */
-#define CORE_TX_BD_DATA_IP_LEN_MASK               0x1
-#define CORE_TX_BD_DATA_IP_LEN_SHIFT              13
-#define CORE_TX_BD_DATA_RESERVED0_MASK            0x3
-#define CORE_TX_BD_DATA_RESERVED0_SHIFT           14
+#define CORE_TX_BD_DATA_IP_LEN_MASK                  0x1
+#define CORE_TX_BD_DATA_IP_LEN_SHIFT                 13
+/* disables the STAG insertion, relevant only in MF OVLAN mode. */
+#define CORE_TX_BD_DATA_DISABLE_STAG_INSERTION_MASK  0x1
+#define CORE_TX_BD_DATA_DISABLE_STAG_INSERTION_SHIFT 14
+#define CORE_TX_BD_DATA_RESERVED0_MASK               0x1
+#define CORE_TX_BD_DATA_RESERVED0_SHIFT              15
 };
 
 /*
@@ -1046,6 +1055,17 @@ struct core_tx_stop_ramrod_data {
 
 
 /*
+ * Ramrod data for tx queue update ramrod
+ */
+struct core_tx_update_ramrod_data {
+	u8 update_qm_pq_id_flg /* Flag to Update QM PQ ID */;
+	u8 reserved0;
+	__le16 qm_pq_id /* Updated QM PQ ID */;
+	__le32 reserved1[1];
+};
+
+
+/*
  * Enum flag for what type of dcb data to update
  */
 enum dcb_dscp_update_mode {
@@ -1182,6 +1202,63 @@ struct eth_ustorm_per_queue_stat {
 
 
 /*
+ * Event Ring VF-PF Channel data
+ */
+struct vf_pf_channel_eqe_data {
+	struct regpair msg_addr /* VF-PF message address */;
+};
+
+/*
+ * Event Ring malicious VF data
+ */
+struct malicious_vf_eqe_data {
+	u8 vf_id /* Malicious VF ID */;
+	u8 err_id /* Malicious VF error (use enum malicious_vf_error_id) */;
+	__le16 reserved[3];
+};
+
+/*
+ * Event Ring initial cleanup data
+ */
+struct initial_cleanup_eqe_data {
+	u8 vf_id /* VF ID */;
+	u8 reserved[7];
+};
+
+/*
+ * Event Data Union
+ */
+union event_ring_data {
+	u8 bytes[8] /* Byte Array */;
+	struct vf_pf_channel_eqe_data vf_pf_channel /* VF-PF Channel data */;
+	struct iscsi_eqe_data iscsi_info /* Dedicated fields to iscsi data */;
+/* Dedicated fields to iscsi connect done results */
+	struct iscsi_connect_done_results iscsi_conn_done_info;
+	struct malicious_vf_eqe_data malicious_vf /* Malicious VF data */;
+/* VF Initial Cleanup data */
+	struct initial_cleanup_eqe_data vf_init_cleanup;
+};
+
+
+/*
+ * Event Ring Entry
+ */
+struct event_ring_entry {
+	u8 protocol_id /* Event Protocol ID (use enum protocol_type) */;
+	u8 opcode /* Event Opcode */;
+	__le16 reserved0 /* Reserved */;
+	__le16 echo /* Echo value from ramrod data on the host */;
+	u8 fw_return_code /* FW return code for SP ramrods */;
+	u8 flags;
+/* 0: synchronous EQE - a completion of SP message. 1: asynchronous EQE */
+#define EVENT_RING_ENTRY_ASYNC_MASK      0x1
+#define EVENT_RING_ENTRY_ASYNC_SHIFT     0
+#define EVENT_RING_ENTRY_RESERVED1_MASK  0x7F
+#define EVENT_RING_ENTRY_RESERVED1_SHIFT 1
+	union event_ring_data data;
+};
+
+/*
  * Event Ring Next Page Address
  */
 struct event_ring_next_addr {
@@ -1211,6 +1288,18 @@ enum fw_flow_ctrl_mode {
 
 
 /*
+ * GFT profile type.
+ */
+enum gft_profile_type {
+	GFT_PROFILE_TYPE_4_TUPLE /* 4 tuple, IP type and L4 type match. */,
+/* L4 destination port, IP type and L4 type match. */
+	GFT_PROFILE_TYPE_L4_DST_PORT,
+	GFT_PROFILE_TYPE_IP_DST_PORT /* IP destination port and IP type. */,
+	MAX_GFT_PROFILE_TYPE
+};
+
+
+/*
  * Major and Minor hsi Versions
  */
 struct hsi_fp_ver_struct {
@@ -1311,6 +1400,34 @@ struct mstorm_vf_zone {
 
 
 /*
+ * vlan header including TPID and TCI fields
+ */
+struct vlan_header {
+	__le16 tpid /* Tag Protocol Identifier */;
+	__le16 tci /* Tag Control Information */;
+};
+
+/*
+ * outer tag configurations
+ */
+struct outer_tag_config_struct {
+/* Enables the STAG Priority Change , Should be 1 for Bette Davis and UFP with
+ * Host Control mode. Else - 0
+ */
+	u8 enable_stag_pri_change;
+/* If inner_to_outer_pri_map is initialize then set pri_map_valid */
+	u8 pri_map_valid;
+	u8 reserved[2];
+/* In case mf_mode is MF_OVLAN, this field specifies the outer tag protocol
+ * identifier and outer tag control information
+ */
+	struct vlan_header outer_tag;
+/* Map from inner to outer priority. Set pri_map_valid when init map */
+	u8 inner_to_outer_pri_map[8];
+};
+
+
+/*
  * personality per PF
  */
 enum personality_type {
@@ -1361,7 +1478,6 @@ struct pf_start_ramrod_data {
 	struct regpair consolid_q_pbl_addr;
 /* tunnel configuration. */
 	struct pf_start_tunnel_config tunnel_config;
-	__le32 reserved;
 	__le16 event_ring_sb_id /* Status block ID */;
 /* All VfIds owned by Pf will be from baseVfId till baseVfId+numVfs */
 	u8 base_vf_id;
@@ -1381,16 +1497,11 @@ struct pf_start_ramrod_data {
 	u8 integ_phase /* Integration phase */;
 /* If set, inter-pf tx switching is allowed in Switch Independent func mode */
 	u8 allow_npar_tx_switching;
-/* Map from inner to outer priority. Set pri_map_valid when init map */
-	u8 inner_to_outer_pri_map[8];
-/* If inner_to_outer_pri_map is initialize then set pri_map_valid */
-	u8 pri_map_valid;
-/* In case mf_mode is MF_OVLAN, this field specifies the outer vlan
- * (lower 16 bits) and ethType to use (higher 16 bits)
- */
-	__le32 outer_tag;
+	u8 reserved0;
 /* FP HSI version to be used by FW */
 	struct hsi_fp_ver_struct hsi_fp_ver;
+/* Outer tag configurations */
+	struct outer_tag_config_struct outer_tag_config;
 };
 
 
@@ -1441,15 +1552,19 @@ struct pf_update_tunnel_config {
  * Data for port update ramrod
  */
 struct pf_update_ramrod_data {
-	u8 pf_id;
-	u8 update_eth_dcb_data_mode /* Update Eth DCB  data indication */;
-	u8 update_fcoe_dcb_data_mode /* Update FCOE DCB  data indication */;
-	u8 update_iscsi_dcb_data_mode /* Update iSCSI DCB  data indication */;
+/* Update Eth DCB  data indication (use enum dcb_dscp_update_mode) */
+	u8 update_eth_dcb_data_mode;
+/* Update FCOE DCB  data indication (use enum dcb_dscp_update_mode) */
+	u8 update_fcoe_dcb_data_mode;
+/* Update iSCSI DCB  data indication (use enum dcb_dscp_update_mode) */
+	u8 update_iscsi_dcb_data_mode;
 	u8 update_roce_dcb_data_mode /* Update ROCE DCB  data indication */;
 /* Update RROCE (RoceV2) DCB  data indication */
 	u8 update_rroce_dcb_data_mode;
 	u8 update_iwarp_dcb_data_mode /* Update IWARP DCB  data indication */;
 	u8 update_mf_vlan_flag /* Update MF outer vlan Id */;
+/* Update Enable STAG Priority Change indication */
+	u8 update_enable_stag_pri_change;
 	struct protocol_dcb_data eth_dcb_data /* core eth related fields */;
 	struct protocol_dcb_data fcoe_dcb_data /* core fcoe related fields */;
 /* core iscsi related fields */
@@ -1460,7 +1575,11 @@ struct pf_update_ramrod_data {
 /* core iwarp related fields */
 	struct protocol_dcb_data iwarp_dcb_data;
 	__le16 mf_vlan /* new outer vlan id value */;
-	__le16 reserved;
+/* enables the inner to outer TAG priority mapping. Should be 1 for Bette Davis
+ * and UFP with Host Control mode, else - 0.
+ */
+	u8 enable_stag_pri_change;
+	u8 reserved;
 /* tunnel configuration. */
 	struct pf_update_tunnel_config tunnel_config;
 };
@@ -1745,6 +1864,7 @@ enum vf_zone_size_mode {
 
 
 
+
 /*
  * Attentions status block
  */
@@ -1758,17 +1878,6 @@ struct atten_status_block {
 
 
 /*
- * Igu cleanup bit values to distinguish between clean or producer consumer
- * update.
- */
-enum command_type_bit {
-	IGU_COMMAND_TYPE_NOP = 0,
-	IGU_COMMAND_TYPE_SET = 1,
-	MAX_COMMAND_TYPE_BIT
-};
-
-
-/*
  * DMAE command
  */
 struct dmae_cmd {
@@ -2200,23 +2309,23 @@ struct qm_rf_opportunistic_mask {
 /*
  * QM hardware structure of QM map memory
  */
-struct qm_rf_pq_map {
+struct qm_rf_pq_map_e4 {
 	__le32 reg;
-#define QM_RF_PQ_MAP_PQ_VALID_MASK          0x1 /* PQ active */
-#define QM_RF_PQ_MAP_PQ_VALID_SHIFT         0
-#define QM_RF_PQ_MAP_RL_ID_MASK             0xFF /* RL ID */
-#define QM_RF_PQ_MAP_RL_ID_SHIFT            1
+#define QM_RF_PQ_MAP_E4_PQ_VALID_MASK          0x1 /* PQ active */
+#define QM_RF_PQ_MAP_E4_PQ_VALID_SHIFT         0
+#define QM_RF_PQ_MAP_E4_RL_ID_MASK             0xFF /* RL ID */
+#define QM_RF_PQ_MAP_E4_RL_ID_SHIFT            1
 /* the first PQ associated with the VPORT and VOQ of this PQ */
-#define QM_RF_PQ_MAP_VP_PQ_ID_MASK          0x1FF
-#define QM_RF_PQ_MAP_VP_PQ_ID_SHIFT         9
-#define QM_RF_PQ_MAP_VOQ_MASK               0x1F /* VOQ */
-#define QM_RF_PQ_MAP_VOQ_SHIFT              18
-#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_MASK  0x3 /* WRR weight */
-#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_SHIFT 23
-#define QM_RF_PQ_MAP_RL_VALID_MASK          0x1 /* RL active */
-#define QM_RF_PQ_MAP_RL_VALID_SHIFT         25
-#define QM_RF_PQ_MAP_RESERVED_MASK          0x3F
-#define QM_RF_PQ_MAP_RESERVED_SHIFT         26
+#define QM_RF_PQ_MAP_E4_VP_PQ_ID_MASK          0x1FF
+#define QM_RF_PQ_MAP_E4_VP_PQ_ID_SHIFT         9
+#define QM_RF_PQ_MAP_E4_VOQ_MASK               0x1F /* VOQ */
+#define QM_RF_PQ_MAP_E4_VOQ_SHIFT              18
+#define QM_RF_PQ_MAP_E4_WRR_WEIGHT_GROUP_MASK  0x3 /* WRR weight */
+#define QM_RF_PQ_MAP_E4_WRR_WEIGHT_GROUP_SHIFT 23
+#define QM_RF_PQ_MAP_E4_RL_VALID_MASK          0x1 /* RL active */
+#define QM_RF_PQ_MAP_E4_RL_VALID_SHIFT         25
+#define QM_RF_PQ_MAP_E4_RESERVED_MASK          0x3F
+#define QM_RF_PQ_MAP_E4_RESERVED_SHIFT         26
 };
 
 
diff --git a/drivers/net/qede/base/ecore_hsi_debug_tools.h b/drivers/net/qede/base/ecore_hsi_debug_tools.h
index 7443ff9d..ebb66482 100644
--- a/drivers/net/qede/base/ecore_hsi_debug_tools.h
+++ b/drivers/net/qede/base/ecore_hsi_debug_tools.h
@@ -1053,7 +1053,7 @@ enum dbg_status {
 	DBG_STATUS_MCP_TRACE_NO_META,
 	DBG_STATUS_MCP_COULD_NOT_HALT,
 	DBG_STATUS_MCP_COULD_NOT_RESUME,
-	DBG_STATUS_DMAE_FAILED,
+	DBG_STATUS_RESERVED2,
 	DBG_STATUS_SEMI_FIFO_NOT_EMPTY,
 	DBG_STATUS_IGU_FIFO_BAD_DATA,
 	DBG_STATUS_MCP_COULD_NOT_MASK_PRTY,
@@ -1107,7 +1107,9 @@ struct dbg_tools_data {
 	u8 chip_id /* Chip ID (from enum chip_ids) */;
 	u8 platform_id /* Platform ID */;
 	u8 initialized /* Indicates if the data was initialized */;
-	u8 reserved;
+	u8 use_dmae /* Indicates if DMAE should be used */;
+/* Numbers of registers that were read since last log */
+	__le32 num_regs_read;
 };
 
 
diff --git a/drivers/net/qede/base/ecore_hsi_eth.h b/drivers/net/qede/base/ecore_hsi_eth.h
index 397c408d..ffbf5c71 100644
--- a/drivers/net/qede/base/ecore_hsi_eth.h
+++ b/drivers/net/qede/base/ecore_hsi_eth.h
@@ -669,7 +669,7 @@ struct mstorm_eth_conn_st_ctx {
 /*
  * eth connection context
  */
-struct eth_conn_context {
+struct e4_eth_conn_context {
 /* tstorm storm context */
 	struct tstorm_eth_conn_st_ctx tstorm_st_context;
 	struct regpair tstorm_st_padding[2] /* padding */;
@@ -765,6 +765,7 @@ enum eth_event_opcode {
 	ETH_EVENT_RX_DELETE_UDP_FILTER,
 	ETH_EVENT_RX_CREATE_GFT_ACTION,
 	ETH_EVENT_RX_GFT_UPDATE_FILTER,
+	ETH_EVENT_TX_QUEUE_UPDATE,
 	MAX_ETH_EVENT_OPCODE
 };
 
@@ -882,6 +883,7 @@ enum eth_ramrod_cmd_id {
 	ETH_RAMROD_RX_CREATE_GFT_ACTION /* RX - Create a Gft Action */,
 /* RX - Add/Delete a GFT Filter to the Searcher */
 	ETH_RAMROD_GFT_UPDATE_FILTER,
+	ETH_RAMROD_TX_QUEUE_UPDATE /* TX Queue Update Ramrod */,
 	MAX_ETH_RAMROD_CMD_ID
 };
 
@@ -1092,7 +1094,7 @@ struct eth_vport_tx_mode {
 
 
 /*
- * Ramrod data for rx create gft action
+ * GFT filter update action type.
  */
 enum gft_filter_update_action {
 	GFT_ADD_FILTER,
@@ -1101,16 +1103,6 @@ enum gft_filter_update_action {
 };
 
 
-/*
- * Ramrod data for rx create gft action
- */
-enum gft_logic_filter_type {
-	GFT_FILTER_TYPE /* flow FW is GFT-logic as well */,
-	RFS_FILTER_TYPE /* flow FW is A-RFS-logic */,
-	MAX_GFT_LOGIC_FILTER_TYPE
-};
-
-
 
 
 /*
@@ -1166,7 +1158,7 @@ struct rx_create_openflow_action_data {
  */
 struct rx_queue_start_ramrod_data {
 	__le16 rx_queue_id /* ID of RX queue */;
-	__le16 num_of_pbl_pages /* Num of pages in CQE PBL */;
+	__le16 num_of_pbl_pages /* Number of pages in CQE PBL */;
 	__le16 bd_max_bytes /* maximal bytes that can be places on the bd */;
 	__le16 sb_id /* Status block ID */;
 	u8 sb_index /* index of the protocol index */;
@@ -1254,26 +1246,34 @@ struct rx_udp_filter_data {
 
 
 /*
- * Ramrod to add filter - filter is packet headr of type of packet wished to
- * pass certin FW flow
+ * add or delete GFT filter - filter is packet header of type of packet wished
+ * to pass certain FW flow
  */
 struct rx_update_gft_filter_data {
 /* Pointer to Packet Header That Defines GFT Filter */
 	struct regpair pkt_hdr_addr;
 	__le16 pkt_hdr_length /* Packet Header Length */;
-/* If is_rfs flag is set: Queue Id to associate filter with else: action icid */
-	__le16 rx_qid_or_action_icid;
-/* Field is used if is_rfs flag is set: vport Id of which to associate filter
- * with
- */
-	u8 vport_id;
-/* Use enum to set type of flow using gft HW logic blocks */
-	u8 filter_type;
+/* Action icid. Valid if action_icid_valid flag set. */
+	__le16 action_icid;
+	__le16 rx_qid /* RX queue ID. Valid if rx_qid_valid set. */;
+	__le16 flow_id /* RX flow ID. Valid if flow_id_valid set. */;
+	u8 vport_id /* RX vport Id. */;
+/* If set, action_icid will used for GFT filter update. */
+	u8 action_icid_valid;
+/* If set, rx_qid will used for traffic steering, in additional to vport_id.
+ * flow_id_valid must be cleared. If cleared, queue ID will selected by RSS.
+ */
+	u8 rx_qid_valid;
+/* If set, flow_id will reported by CQE, rx_qid_valid must be cleared. If
+ * cleared, flow_id 0 will reported by CQE.
+ */
+	u8 flow_id_valid;
 	u8 filter_action /* Use to set type of action on filter */;
 /* 0 - dont assert in case of error. Just return an error code. 1 - assert in
  * case of error.
  */
 	u8 assert_on_error;
+	u8 reserved[2];
 };
 
 
@@ -1344,6 +1344,17 @@ struct tx_queue_stop_ramrod_data {
 };
 
 
+/*
+ * Ramrod data for tx queue update ramrod
+ */
+struct tx_queue_update_ramrod_data {
+	__le16 update_qm_pq_id_flg /* Flag to Update QM PQ ID */;
+	__le16 qm_pq_id /* Updated QM PQ ID */;
+	__le32 reserved0;
+	struct regpair reserved1[5];
+};
+
+
 
 /*
  * Ramrod data for vport update ramrod
@@ -1388,9 +1399,9 @@ struct vport_start_ramrod_data {
 /* If set, ETH header padding will not inserted. placement_offset will be zero.
  */
 	u8 zero_placement_offset;
-/* If set, Contorl frames will be filtered according to MAC check. */
+/* If set, control frames will be filtered according to MAC check. */
 	u8 ctl_frame_mac_check_en;
-/* If set, Contorl frames will be filtered according to ethtype check. */
+/* If set, control frames will be filtered according to ethtype check. */
 	u8 ctl_frame_ethtype_check_en;
 	u8 reserved[5];
 };
@@ -1456,9 +1467,9 @@ struct vport_update_ramrod_data_cmn {
  * updated
  */
 	u8 update_ctl_frame_checks_en_flg;
-/* If set, Contorl frames will be filtered according to MAC check. */
+/* If set, control frames will be filtered according to MAC check. */
 	u8 ctl_frame_mac_check_en;
-/* If set, Contorl frames will be filtered according to ethtype check. */
+/* If set, control frames will be filtered according to ethtype check. */
 	u8 ctl_frame_ethtype_check_en;
 	u8 reserved[15];
 };
diff --git a/drivers/net/qede/base/ecore_hw.c b/drivers/net/qede/base/ecore_hw.c
index 2bcc32d3..84f273b0 100644
--- a/drivers/net/qede/base/ecore_hw.c
+++ b/drivers/net/qede/base/ecore_hw.c
@@ -64,7 +64,9 @@ enum _ecore_status_t ecore_ptt_pool_alloc(struct ecore_hwfn *p_hwfn)
 	}
 
 	p_hwfn->p_ptt_pool = p_pool;
+#ifdef CONFIG_ECORE_LOCK_ALLOC
 	OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_pool->lock);
+#endif
 	OSAL_SPIN_LOCK_INIT(&p_pool->lock);
 
 	return ECORE_SUCCESS;
@@ -83,8 +85,10 @@ void ecore_ptt_invalidate(struct ecore_hwfn *p_hwfn)
 
 void ecore_ptt_pool_free(struct ecore_hwfn *p_hwfn)
 {
+#ifdef CONFIG_ECORE_LOCK_ALLOC
 	if (p_hwfn->p_ptt_pool)
 		OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->p_ptt_pool->lock);
+#endif
 	OSAL_FREE(p_hwfn->p_dev, p_hwfn->p_ptt_pool);
 }
 
@@ -132,7 +136,7 @@ void ecore_ptt_release(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 	OSAL_SPIN_UNLOCK(&p_hwfn->p_ptt_pool->lock);
 }
 
-u32 ecore_ptt_get_hw_addr(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
+static u32 ecore_ptt_get_hw_addr(struct ecore_ptt *p_ptt)
 {
 	/* The HW is using DWORDS and we need to translate it to Bytes */
 	return OSAL_LE32_TO_CPU(p_ptt->pxp.offset) << 2;
@@ -155,7 +159,7 @@ void ecore_ptt_set_win(struct ecore_hwfn *p_hwfn,
 {
 	u32 prev_hw_addr;
 
-	prev_hw_addr = ecore_ptt_get_hw_addr(p_hwfn, p_ptt);
+	prev_hw_addr = ecore_ptt_get_hw_addr(p_ptt);
 
 	if (new_hw_addr == prev_hw_addr)
 		return;
@@ -177,7 +181,7 @@ void ecore_ptt_set_win(struct ecore_hwfn *p_hwfn,
 static u32 ecore_set_ptt(struct ecore_hwfn *p_hwfn,
 			 struct ecore_ptt *p_ptt, u32 hw_addr)
 {
-	u32 win_hw_addr = ecore_ptt_get_hw_addr(p_hwfn, p_ptt);
+	u32 win_hw_addr = ecore_ptt_get_hw_addr(p_ptt);
 	u32 offset;
 
 	offset = hw_addr - win_hw_addr;
@@ -740,10 +744,10 @@ ecore_dmae_execute_sub_operation(struct ecore_hwfn *p_hwfn,
 
 	if (ecore_status != ECORE_SUCCESS) {
 		DP_NOTICE(p_hwfn, ECORE_MSG_HW,
-			  "ecore_dmae_host2grc: Wait Failed. source_addr"
-			  " 0x%lx, grc_addr 0x%lx, size_in_dwords 0x%x\n",
+			  "Wait Failed. source_addr 0x%lx, grc_addr 0x%lx, size_in_dwords 0x%x, intermediate buffer 0x%lx.\n",
 			  (unsigned long)src_addr, (unsigned long)dst_addr,
-			  length_dw);
+			  length_dw,
+			  (unsigned long)p_hwfn->dmae_info.intermediate_buffer_phys_addr);
 		return ecore_status;
 	}
 
@@ -785,6 +789,15 @@ ecore_dmae_execute_command(struct ecore_hwfn *p_hwfn,
 		return ECORE_SUCCESS;
 	}
 
+	if (!cmd) {
+		DP_NOTICE(p_hwfn, true,
+			  "ecore_dmae_execute_sub_operation failed. Invalid state. source_addr 0x%lx, destination addr 0x%lx, size_in_dwords 0x%x\n",
+			  (unsigned long)src_addr,
+			  (unsigned long)dst_addr,
+			  length_cur);
+		return ECORE_INVAL;
+	}
+
 	ecore_dmae_opcode(p_hwfn,
 			  (src_type == ECORE_DMAE_ADDRESS_GRC),
 			  (dst_type == ECORE_DMAE_ADDRESS_GRC), p_params);
diff --git a/drivers/net/qede/base/ecore_hw.h b/drivers/net/qede/base/ecore_hw.h
index 0750b2ed..0b9814f5 100644
--- a/drivers/net/qede/base/ecore_hw.h
+++ b/drivers/net/qede/base/ecore_hw.h
@@ -71,8 +71,10 @@ enum _dmae_cmd_crc_mask {
 * @brief ecore_gtt_init - Initialize GTT windows
 *
 * @param p_hwfn
+* @param p_ptt
 */
-void ecore_gtt_init(struct ecore_hwfn *p_hwfn);
+void ecore_gtt_init(struct ecore_hwfn *p_hwfn,
+		    struct ecore_ptt *p_ptt);
 
 /**
  * @brief ecore_ptt_invalidate - Forces all ptt entries to be re-configured
@@ -98,17 +100,6 @@ enum _ecore_status_t ecore_ptt_pool_alloc(struct ecore_hwfn *p_hwfn);
 void ecore_ptt_pool_free(struct ecore_hwfn *p_hwfn);
 
 /**
- * @brief ecore_ptt_get_hw_addr - Get PTT's GRC/HW address
- *
- * @param p_hwfn
- * @param p_ptt
- *
- * @return u32
- */
-u32 ecore_ptt_get_hw_addr(struct ecore_hwfn	*p_hwfn,
-			  struct ecore_ptt	*p_ptt);
-
-/**
  * @brief ecore_ptt_get_bar_addr - Get PPT's external BAR address
  *
  * @param p_hwfn
@@ -258,35 +249,6 @@ enum _ecore_status_t ecore_dmae_info_alloc(struct ecore_hwfn	*p_hwfn);
 */
 void ecore_dmae_info_free(struct ecore_hwfn	*p_hwfn);
 
-union ecore_qm_pq_params {
-	struct {
-		u8 q_idx;
-	} iscsi;
-
-	struct {
-		u8 tc;
-	} core;
-
-	struct {
-		u8 is_vf;
-		u8 vf_id;
-		u8 tc;
-	} eth;
-
-	struct {
-		u8 dcqcn;
-		u8 qpid; /* roce relative */
-	} roce;
-
-	struct {
-		u8 qidx;
-	} iwarp;
-};
-
-u16 ecore_get_qm_pq(struct ecore_hwfn	*p_hwfn,
-		    enum protocol_type	proto,
-		    union ecore_qm_pq_params *params);
-
 enum _ecore_status_t ecore_init_fw_data(struct ecore_dev *p_dev,
 					const u8 *fw_data);
 
diff --git a/drivers/net/qede/base/ecore_init_fw_funcs.c b/drivers/net/qede/base/ecore_init_fw_funcs.c
index b5ef173e..1da80a65 100644
--- a/drivers/net/qede/base/ecore_init_fw_funcs.c
+++ b/drivers/net/qede/base/ecore_init_fw_funcs.c
@@ -20,12 +20,12 @@
 
 #define CDU_VALIDATION_DEFAULT_CFG 61
 
-static u16 con_region_offsets[3][E4_NUM_OF_CONNECTION_TYPES] = {
+static u16 con_region_offsets[3][NUM_OF_CONNECTION_TYPES_E4] = {
 	{ 400,  336,  352,  304,  304,  384,  416,  352}, /* region 3 offsets */
 	{ 528,  496,  416,  448,  448,  512,  544,  480}, /* region 4 offsets */
 	{ 608,  544,  496,  512,  576,  592,  624,  560}  /* region 5 offsets */
 };
-static u16 task_region_offsets[1][E4_NUM_OF_CONNECTION_TYPES] = {
+static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES_E4] = {
 	{ 240,  240,  112,    0,    0,    0,    0,   96}  /* region 1 offsets */
 };
 
@@ -43,6 +43,9 @@ static u16 task_region_offsets[1][E4_NUM_OF_CONNECTION_TYPES] = {
 /* Other PQ constants */
 #define QM_OTHER_PQS_PER_PF		4
 
+/* VOQ constants */
+#define QM_E5_NUM_EXT_VOQ		(MAX_NUM_PORTS_E5 * NUM_OF_TCS)
+
 /* WFQ constants: */
 
 /* Upper bound in MB, 10 * burst size of 1ms in 50Gbps */
@@ -52,18 +55,19 @@ static u16 task_region_offsets[1][E4_NUM_OF_CONNECTION_TYPES] = {
 #define QM_WFQ_VP_PQ_VOQ_SHIFT		0
 
 /* Bit  of PF in WFQ VP PQ map */
-#define QM_WFQ_VP_PQ_PF_SHIFT		5
+#define QM_WFQ_VP_PQ_PF_E4_SHIFT	5
+#define QM_WFQ_VP_PQ_PF_E5_SHIFT	6
 
 /* 0x9000 = 4*9*1024 */
 #define QM_WFQ_INC_VAL(weight)		((weight) * 0x9000)
 
-/* 0.7 * upper bound (62500000) */
-#define QM_WFQ_MAX_INC_VAL		43750000
+/* Max WFQ increment value is 0.7 * upper bound */
+#define QM_WFQ_MAX_INC_VAL		((QM_WFQ_UPPER_BOUND * 7) / 10)
 
-/* RL constants: */
+/* Number of VOQs in E5 QmWfqCrd register */
+#define QM_WFQ_CRD_E5_NUM_VOQS		16
 
-/* Upper bound is set to 10 * burst size of 1ms in 50Gbps */
-#define QM_RL_UPPER_BOUND		62500000
+/* RL constants: */
 
 /* Period in us */
 #define QM_RL_PERIOD			5
@@ -71,18 +75,32 @@ static u16 task_region_offsets[1][E4_NUM_OF_CONNECTION_TYPES] = {
 /* Period in 25MHz cycles */
 #define QM_RL_PERIOD_CLK_25M		(25 * QM_RL_PERIOD)
 
-/* 0.7 * upper bound (62500000) */
-#define QM_RL_MAX_INC_VAL		43750000
-
 /* RL increment value - rate is specified in mbps. the factor of 1.01 was
- * added after seeing only 99% factor reached in a 25Gbps port with DPDK RFC
- * 2544 test. In this scenario the PF RL was reducing the line rate to 99%
- * although the credit increment value was the correct one and FW calculated
- * correct packet sizes. The reason for the inaccuracy of the RL is unknown at
- * this point.
- */
-#define QM_RL_INC_VAL(rate) OSAL_MAX_T(u32, (u32)(((rate ? rate : 1000000) * \
-				       QM_RL_PERIOD * 101) / (8 * 100)), 1)
+* added after seeing only 99% factor reached in a 25Gbps port with DPDK RFC
+* 2544 test. In this scenario the PF RL was reducing the line rate to 99%
+* although the credit increment value was the correct one and FW calculated
+* correct packet sizes. The reason for the inaccuracy of the RL is unknown at
+* this point.
+*/
+#define QM_RL_INC_VAL(rate) \
+	OSAL_MAX_T(u32, (u32)(((rate ? rate : 100000) * QM_RL_PERIOD * 101) / \
+	(8 * 100)), 1)
+
+/* PF RL Upper bound is set to 10 * burst size of 1ms in 50Gbps */
+#define QM_PF_RL_UPPER_BOUND		62500000
+
+/* Max PF RL increment value is 0.7 * upper bound */
+#define QM_PF_RL_MAX_INC_VAL		((QM_PF_RL_UPPER_BOUND * 7) / 10)
+
+/* Vport RL Upper bound, link speed is in Mpbs */
+#define QM_VP_RL_UPPER_BOUND(speed) \
+	((u32)OSAL_MAX_T(u32, QM_RL_INC_VAL(speed), 9700 + 1000))
+
+/* Max Vport RL increment value is the Vport RL upper bound */
+#define QM_VP_RL_MAX_INC_VAL(speed)	QM_VP_RL_UPPER_BOUND(speed)
+
+/* Vport RL credit threshold in case of QM bypass */
+#define QM_VP_RL_BYPASS_THRESH_SPEED	(QM_VP_RL_UPPER_BOUND(10000) - 1)
 
 /* AFullOprtnstcCrdMask constants */
 #define QM_OPPOR_LINE_VOQ_DEF		1
@@ -94,13 +112,17 @@ static u16 task_region_offsets[1][E4_NUM_OF_CONNECTION_TYPES] = {
 /* Pure LB CmdQ lines (+spare) */
 #define PBF_CMDQ_PURE_LB_LINES		150
 
-#define PBF_CMDQ_LINES_RT_OFFSET(voq) \
-	(PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET + voq * \
+#define PBF_CMDQ_LINES_E5_RSVD_RATIO	8
+
+#define PBF_CMDQ_LINES_RT_OFFSET(ext_voq) \
+	(PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET + \
+	 ext_voq * \
 	 (PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET - \
 	  PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET))
 
-#define PBF_BTB_GUARANTEED_RT_OFFSET(voq) \
-	(PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET + voq * \
+#define PBF_BTB_GUARANTEED_RT_OFFSET(ext_voq) \
+	(PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET + \
+	 ext_voq * \
 	 (PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET - \
 	  PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET))
 
@@ -140,25 +162,58 @@ static u16 task_region_offsets[1][E4_NUM_OF_CONNECTION_TYPES] = {
 #define QM_CMD_SET_FIELD(var, cmd, field, value) \
 	SET_FIELD(var[cmd##_##field##_OFFSET], cmd##_##field, value)
 
-/* QM: VOQ macros */
-#define PHYS_VOQ(port, tc, max_phys_tcs_per_port) \
-	((port) * (max_phys_tcs_per_port) + (tc))
-#define LB_VOQ(port)				 (MAX_PHYS_VOQS + (port))
-#define VOQ(port, tc, max_phys_tcs_per_port) \
-	((tc) < LB_TC ? PHYS_VOQ(port, tc, max_phys_tcs_per_port) : \
-				 LB_VOQ(port))
-
+#define QM_INIT_TX_PQ_MAP(p_hwfn, map, chip, pq_id, rl_valid, \
+			  vp_pq_id, rl_id, ext_voq, wrr) \
+	do {						\
+		OSAL_MEMSET(&map, 0, sizeof(map)); \
+		SET_FIELD(map.reg, QM_RF_PQ_MAP_##chip##_PQ_VALID, 1); \
+		SET_FIELD(map.reg, QM_RF_PQ_MAP_##chip##_RL_VALID, rl_valid); \
+		SET_FIELD(map.reg, QM_RF_PQ_MAP_##chip##_VP_PQ_ID, vp_pq_id); \
+		SET_FIELD(map.reg, QM_RF_PQ_MAP_##chip##_RL_ID, rl_id); \
+		SET_FIELD(map.reg, QM_RF_PQ_MAP_##chip##_VOQ, ext_voq); \
+		SET_FIELD(map.reg, \
+			  QM_RF_PQ_MAP_##chip##_WRR_WEIGHT_GROUP, wrr); \
+		STORE_RT_REG(p_hwfn, QM_REG_TXPQMAP_RT_OFFSET + pq_id, \
+			     *((u32 *)&map)); \
+	} while (0)
+
+#define WRITE_PQ_INFO_TO_RAM		1
+#define PQ_INFO_ELEMENT(vp, pf, tc, port, rl_valid, rl)	\
+	(((vp) << 0) | ((pf) << 12) | ((tc) << 16) |    \
+	 ((port) << 20) | ((rl_valid) << 22) | ((rl) << 24))
+#define PQ_INFO_RAM_GRC_ADDRESS(pq_id) \
+	(XSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM + 21768 + (pq_id) * 4)
 
 /******************** INTERNAL IMPLEMENTATION *********************/
 
+/* Returns the external VOQ number */
+static u8 ecore_get_ext_voq(struct ecore_hwfn *p_hwfn,
+			    u8 port_id,
+			    u8 tc,
+			    u8 max_phys_tcs_per_port)
+{
+	if (tc == PURE_LB_TC)
+		return NUM_OF_PHYS_TCS * (MAX_NUM_PORTS_BB) + port_id;
+	else
+		return port_id * (max_phys_tcs_per_port) + tc;
+}
+
 /* Prepare PF RL enable/disable runtime init values */
 static void ecore_enable_pf_rl(struct ecore_hwfn *p_hwfn, bool pf_rl_en)
 {
 	STORE_RT_REG(p_hwfn, QM_REG_RLPFENABLE_RT_OFFSET, pf_rl_en ? 1 : 0);
 	if (pf_rl_en) {
+		u8 num_ext_voqs = MAX_NUM_VOQS_E4;
+		u64 voq_bit_mask = ((u64)1 << num_ext_voqs) - 1;
+
 		/* Enable RLs for all VOQs */
 		STORE_RT_REG(p_hwfn, QM_REG_RLPFVOQENABLE_RT_OFFSET,
-			     (1 << MAX_NUM_VOQS) - 1);
+			     (u32)voq_bit_mask);
+#ifdef QM_REG_RLPFVOQENABLE_MSB_RT_OFFSET
+		if (num_ext_voqs >= 32)
+			STORE_RT_REG(p_hwfn, QM_REG_RLPFVOQENABLE_MSB_RT_OFFSET,
+				     (u32)(voq_bit_mask >> 32));
+#endif
 
 		/* Write RL period */
 		STORE_RT_REG(p_hwfn, QM_REG_RLPFPERIOD_RT_OFFSET,
@@ -169,7 +224,7 @@ static void ecore_enable_pf_rl(struct ecore_hwfn *p_hwfn, bool pf_rl_en)
 		/* Set credit threshold for QM bypass flow */
 		if (QM_BYPASS_EN)
 			STORE_RT_REG(p_hwfn, QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET,
-				     QM_RL_UPPER_BOUND);
+				     QM_PF_RL_UPPER_BOUND);
 	}
 }
 
@@ -200,7 +255,7 @@ static void ecore_enable_vport_rl(struct ecore_hwfn *p_hwfn, bool vport_rl_en)
 		if (QM_BYPASS_EN)
 			STORE_RT_REG(p_hwfn,
 				     QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET,
-				     QM_RL_UPPER_BOUND);
+				     QM_VP_RL_BYPASS_THRESH_SPEED);
 	}
 }
 
@@ -220,17 +275,19 @@ static void ecore_enable_vport_wfq(struct ecore_hwfn *p_hwfn, bool vport_wfq_en)
  * the specified VOQ
  */
 static void ecore_cmdq_lines_voq_rt_init(struct ecore_hwfn *p_hwfn,
-					 u8 voq, u16 cmdq_lines)
+					 u8 ext_voq,
+					 u16 cmdq_lines)
 {
 	u32 qm_line_crd;
 
 	qm_line_crd = QM_VOQ_LINE_CRD(cmdq_lines);
 
-	OVERWRITE_RT_REG(p_hwfn, PBF_CMDQ_LINES_RT_OFFSET(voq),
+	OVERWRITE_RT_REG(p_hwfn, PBF_CMDQ_LINES_RT_OFFSET(ext_voq),
 			 (u32)cmdq_lines);
-	STORE_RT_REG(p_hwfn, QM_REG_VOQCRDLINE_RT_OFFSET + voq, qm_line_crd);
-	STORE_RT_REG(p_hwfn, QM_REG_VOQINITCRDLINE_RT_OFFSET + voq,
-		     qm_line_crd);
+	STORE_RT_REG(p_hwfn, QM_REG_VOQCRDLINE_RT_OFFSET + ext_voq,
+			 qm_line_crd);
+	STORE_RT_REG(p_hwfn, QM_REG_VOQINITCRDLINE_RT_OFFSET + ext_voq,
+			 qm_line_crd);
 }
 
 /* Prepare runtime init values to allocate PBF command queue lines. */
@@ -240,11 +297,12 @@ static void ecore_cmdq_lines_rt_init(struct ecore_hwfn *p_hwfn,
 				     struct init_qm_port_params
 				     port_params[MAX_NUM_PORTS])
 {
-	u8 tc, voq, port_id, num_tcs_in_port;
+	u8 tc, ext_voq, port_id, num_tcs_in_port;
+	u8 num_ext_voqs = MAX_NUM_VOQS_E4;
 
-	/* Clear PBF lines for all VOQs */
-	for (voq = 0; voq < MAX_NUM_VOQS; voq++)
-		STORE_RT_REG(p_hwfn, PBF_CMDQ_LINES_RT_OFFSET(voq), 0);
+	/* Clear PBF lines of all VOQs */
+	for (ext_voq = 0; ext_voq < num_ext_voqs; ext_voq++)
+		STORE_RT_REG(p_hwfn, PBF_CMDQ_LINES_RT_OFFSET(ext_voq), 0);
 
 	for (port_id = 0; port_id < max_ports_per_engine; port_id++) {
 		u16 phys_lines, phys_lines_per_tc;
@@ -252,31 +310,35 @@ static void ecore_cmdq_lines_rt_init(struct ecore_hwfn *p_hwfn,
 		if (!port_params[port_id].active)
 			continue;
 
-		/* Find #lines to divide between the active physical TCs */
-		phys_lines = port_params[port_id].num_pbf_cmd_lines -
-			     PBF_CMDQ_PURE_LB_LINES;
+		/* Find number of command queue lines to divide between the
+		 * active physical TCs. In E5, 1/8 of the lines are reserved.
+		 * the lines for pure LB TC are subtracted.
+		 */
+		phys_lines = port_params[port_id].num_pbf_cmd_lines;
+		phys_lines -= PBF_CMDQ_PURE_LB_LINES;
 
 		/* Find #lines per active physical TC */
 		num_tcs_in_port = 0;
-		for (tc = 0; tc < NUM_OF_PHYS_TCS; tc++)
+		for (tc = 0; tc < max_phys_tcs_per_port; tc++)
 			if (((port_params[port_id].active_phys_tcs >> tc) &
 			      0x1) == 1)
 				num_tcs_in_port++;
 		phys_lines_per_tc = phys_lines / num_tcs_in_port;
 
 		/* Init registers per active TC */
-		for (tc = 0; tc < NUM_OF_PHYS_TCS; tc++) {
+		for (tc = 0; tc < max_phys_tcs_per_port; tc++) {
+			ext_voq = ecore_get_ext_voq(p_hwfn, port_id, tc,
+						    max_phys_tcs_per_port);
 			if (((port_params[port_id].active_phys_tcs >> tc) &
-			      0x1) == 1) {
-				voq = PHYS_VOQ(port_id, tc,
-					       max_phys_tcs_per_port);
-				ecore_cmdq_lines_voq_rt_init(p_hwfn, voq,
+			    0x1) == 1)
+				ecore_cmdq_lines_voq_rt_init(p_hwfn, ext_voq,
 							     phys_lines_per_tc);
-			}
 		}
 
 		/* Init registers for pure LB TC */
-		ecore_cmdq_lines_voq_rt_init(p_hwfn, LB_VOQ(port_id),
+		ext_voq = ecore_get_ext_voq(p_hwfn, port_id, PURE_LB_TC,
+					    max_phys_tcs_per_port);
+		ecore_cmdq_lines_voq_rt_init(p_hwfn, ext_voq,
 					     PBF_CMDQ_PURE_LB_LINES);
 	}
 }
@@ -308,7 +370,7 @@ static void ecore_btb_blocks_rt_init(struct ecore_hwfn *p_hwfn,
 				     port_params[MAX_NUM_PORTS])
 {
 	u32 usable_blocks, pure_lb_blocks, phys_blocks;
-	u8 tc, voq, port_id, num_tcs_in_port;
+	u8 tc, ext_voq, port_id, num_tcs_in_port;
 
 	for (port_id = 0; port_id < max_ports_per_engine; port_id++) {
 		if (!port_params[port_id].active)
@@ -339,18 +401,19 @@ static void ecore_btb_blocks_rt_init(struct ecore_hwfn *p_hwfn,
 		/* Init physical TCs */
 		for (tc = 0; tc < NUM_OF_PHYS_TCS; tc++) {
 			if (((port_params[port_id].active_phys_tcs >> tc) &
-			      0x1) == 1) {
-				voq = PHYS_VOQ(port_id, tc,
-					       max_phys_tcs_per_port);
+			     0x1) == 1) {
+				ext_voq = ecore_get_ext_voq(p_hwfn, port_id, tc,
+							 max_phys_tcs_per_port);
 				STORE_RT_REG(p_hwfn,
-					     PBF_BTB_GUARANTEED_RT_OFFSET(voq),
-					     phys_blocks);
+					PBF_BTB_GUARANTEED_RT_OFFSET(ext_voq),
+					phys_blocks);
 			}
 		}
 
 		/* Init pure LB TC */
-		STORE_RT_REG(p_hwfn,
-			     PBF_BTB_GUARANTEED_RT_OFFSET(LB_VOQ(port_id)),
+		ext_voq = ecore_get_ext_voq(p_hwfn, port_id, PURE_LB_TC,
+					    max_phys_tcs_per_port);
+		STORE_RT_REG(p_hwfn, PBF_BTB_GUARANTEED_RT_OFFSET(ext_voq),
 			     pure_lb_blocks);
 	}
 }
@@ -361,7 +424,6 @@ static void ecore_tx_pq_map_rt_init(struct ecore_hwfn *p_hwfn,
 				    u8 port_id,
 				    u8 pf_id,
 				    u8 max_phys_tcs_per_port,
-				    bool is_first_pf,
 				    u32 num_pf_cids,
 				    u32 num_vf_cids,
 				    u16 start_pq,
@@ -401,12 +463,12 @@ static void ecore_tx_pq_map_rt_init(struct ecore_hwfn *p_hwfn,
 	/* Go over all Tx PQs */
 	for (i = 0, pq_id = start_pq; i < num_pqs; i++, pq_id++) {
 		u32 max_qm_global_rls = MAX_QM_GLOBAL_RLS;
-		struct qm_rf_pq_map tx_pq_map;
+		u8 ext_voq, vport_id_in_pf;
 		bool is_vf_pq, rl_valid;
-		u8 voq, vport_id_in_pf;
 		u16 first_tx_pq_id;
 
-		voq = VOQ(port_id, pq_params[i].tc_id, max_phys_tcs_per_port);
+		ext_voq = ecore_get_ext_voq(p_hwfn, port_id, pq_params[i].tc_id,
+					    max_phys_tcs_per_port);
 		is_vf_pq = (i >= num_pf_pqs);
 		rl_valid = pq_params[i].rl_valid && pq_params[i].vport_id <
 			   max_qm_global_rls;
@@ -416,16 +478,17 @@ static void ecore_tx_pq_map_rt_init(struct ecore_hwfn *p_hwfn,
 		first_tx_pq_id =
 		vport_params[vport_id_in_pf].first_tx_pq_id[pq_params[i].tc_id];
 		if (first_tx_pq_id == QM_INVALID_PQ_ID) {
+			u32 map_val = (ext_voq << QM_WFQ_VP_PQ_VOQ_SHIFT) |
+				       (pf_id << (QM_WFQ_VP_PQ_PF_E4_SHIFT));
+
 			/* Create new VP PQ */
 			vport_params[vport_id_in_pf].
 			    first_tx_pq_id[pq_params[i].tc_id] = pq_id;
 			first_tx_pq_id = pq_id;
 
 			/* Map VP PQ to VOQ and PF */
-			STORE_RT_REG(p_hwfn,
-				     QM_REG_WFQVPMAP_RT_OFFSET + first_tx_pq_id,
-				     (voq << QM_WFQ_VP_PQ_VOQ_SHIFT) | (pf_id <<
-							QM_WFQ_VP_PQ_PF_SHIFT));
+			STORE_RT_REG(p_hwfn, QM_REG_WFQVPMAP_RT_OFFSET +
+				     first_tx_pq_id, map_val);
 		}
 
 		/* Check RL ID */
@@ -434,26 +497,29 @@ static void ecore_tx_pq_map_rt_init(struct ecore_hwfn *p_hwfn,
 			DP_NOTICE(p_hwfn, true,
 				  "Invalid VPORT ID for rate limiter config\n");
 
-		/* Fill PQ map entry */
-		OSAL_MEMSET(&tx_pq_map, 0, sizeof(tx_pq_map));
-		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_PQ_VALID, 1);
-		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_RL_VALID,
-			  rl_valid ? 1 : 0);
-		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_VP_PQ_ID, first_tx_pq_id);
-		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_RL_ID,
-			  rl_valid ? pq_params[i].vport_id : 0);
-		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_VOQ, voq);
-		SET_FIELD(tx_pq_map.reg, QM_RF_PQ_MAP_WRR_WEIGHT_GROUP,
-			  pq_params[i].wrr_group);
-
-		/* Write PQ map entry to CAM */
-		STORE_RT_REG(p_hwfn, QM_REG_TXPQMAP_RT_OFFSET + pq_id,
-			     *((u32 *)&tx_pq_map));
+		/* Prepare PQ map entry */
+		struct qm_rf_pq_map_e4 tx_pq_map;
+		QM_INIT_TX_PQ_MAP(p_hwfn, tx_pq_map, E4, pq_id, rl_valid ?
+				  1 : 0,
+				  first_tx_pq_id, rl_valid ?
+				  pq_params[i].vport_id : 0,
+				  ext_voq, pq_params[i].wrr_group);
 
 		/* Set base address */
 		STORE_RT_REG(p_hwfn, QM_REG_BASEADDRTXPQ_RT_OFFSET + pq_id,
 			     mem_addr_4kb);
 
+		/* Write PQ info to RAM */
+		if (WRITE_PQ_INFO_TO_RAM != 0) {
+			u32 pq_info = 0;
+			pq_info = PQ_INFO_ELEMENT(first_tx_pq_id, pf_id,
+						  pq_params[i].tc_id, port_id,
+						  rl_valid ? 1 : 0, rl_valid ?
+						  pq_params[i].vport_id : 0);
+			ecore_wr(p_hwfn, p_ptt, PQ_INFO_RAM_GRC_ADDRESS(pq_id),
+				 pq_info);
+		}
+
 		/* If VF PQ, add indication to PQ VF mask */
 		if (is_vf_pq) {
 			tx_pq_vf_mask[pq_id / QM_PF_QUEUE_GROUP_SIZE] |=
@@ -473,10 +539,10 @@ static void ecore_tx_pq_map_rt_init(struct ecore_hwfn *p_hwfn,
 
 /* Prepare Other PQ mapping runtime init values for the specified PF */
 static void ecore_other_pq_map_rt_init(struct ecore_hwfn *p_hwfn,
-				       u8 port_id,
 				       u8 pf_id,
 				       u32 num_pf_cids,
-				       u32 num_tids, u32 base_mem_addr_4kb)
+				       u32 num_tids,
+				       u32 base_mem_addr_4kb)
 {
 	u32 pq_size, pq_mem_4kb, mem_addr_4kb;
 	u16 i, pq_id, pq_group;
@@ -518,13 +584,9 @@ static int ecore_pf_wfq_rt_init(struct ecore_hwfn *p_hwfn,
 				struct init_qm_pq_params *pq_params)
 {
 	u32 inc_val, crd_reg_offset;
-	u8 voq;
+	u8 ext_voq;
 	u16 i;
 
-	crd_reg_offset = (pf_id < MAX_NUM_PFS_BB ? QM_REG_WFQPFCRD_RT_OFFSET :
-			  QM_REG_WFQPFCRD_MSB_RT_OFFSET) +
-			 (pf_id % MAX_NUM_PFS_BB);
-
 	inc_val = QM_WFQ_INC_VAL(pf_wfq);
 	if (!inc_val || inc_val > QM_WFQ_MAX_INC_VAL) {
 		DP_NOTICE(p_hwfn, true,
@@ -533,14 +595,21 @@ static int ecore_pf_wfq_rt_init(struct ecore_hwfn *p_hwfn,
 	}
 
 	for (i = 0; i < num_tx_pqs; i++) {
-		voq = VOQ(port_id, pq_params[i].tc_id, max_phys_tcs_per_port);
-		OVERWRITE_RT_REG(p_hwfn, crd_reg_offset + voq * MAX_NUM_PFS_BB,
+		ext_voq = ecore_get_ext_voq(p_hwfn, port_id, pq_params[i].tc_id,
+					    max_phys_tcs_per_port);
+		crd_reg_offset = (pf_id < MAX_NUM_PFS_BB ?
+				  QM_REG_WFQPFCRD_RT_OFFSET :
+				  QM_REG_WFQPFCRD_MSB_RT_OFFSET) +
+				 ext_voq * MAX_NUM_PFS_BB +
+				 (pf_id % MAX_NUM_PFS_BB);
+		OVERWRITE_RT_REG(p_hwfn, crd_reg_offset,
 				 (u32)QM_WFQ_CRD_REG_SIGN_BIT);
+		STORE_RT_REG(p_hwfn, QM_REG_WFQPFUPPERBOUND_RT_OFFSET + pf_id,
+			     QM_WFQ_UPPER_BOUND | (u32)QM_WFQ_CRD_REG_SIGN_BIT);
+		STORE_RT_REG(p_hwfn, QM_REG_WFQPFWEIGHT_RT_OFFSET + pf_id,
+			     inc_val);
 	}
 
-	STORE_RT_REG(p_hwfn, QM_REG_WFQPFUPPERBOUND_RT_OFFSET + pf_id,
-		     QM_WFQ_UPPER_BOUND | (u32)QM_WFQ_CRD_REG_SIGN_BIT);
-	STORE_RT_REG(p_hwfn, QM_REG_WFQPFWEIGHT_RT_OFFSET + pf_id, inc_val);
 	return 0;
 }
 
@@ -552,7 +621,7 @@ static int ecore_pf_rl_rt_init(struct ecore_hwfn *p_hwfn, u8 pf_id, u32 pf_rl)
 	u32 inc_val;
 
 	inc_val = QM_RL_INC_VAL(pf_rl);
-	if (inc_val > QM_RL_MAX_INC_VAL) {
+	if (inc_val > QM_PF_RL_MAX_INC_VAL) {
 		DP_NOTICE(p_hwfn, true,
 			  "Invalid PF rate limit configuration\n");
 		return -1;
@@ -561,7 +630,7 @@ static int ecore_pf_rl_rt_init(struct ecore_hwfn *p_hwfn, u8 pf_id, u32 pf_rl)
 	STORE_RT_REG(p_hwfn, QM_REG_RLPFCRD_RT_OFFSET + pf_id,
 		     (u32)QM_RL_CRD_REG_SIGN_BIT);
 	STORE_RT_REG(p_hwfn, QM_REG_RLPFUPPERBOUND_RT_OFFSET + pf_id,
-		     QM_RL_UPPER_BOUND | (u32)QM_RL_CRD_REG_SIGN_BIT);
+		     QM_PF_RL_UPPER_BOUND | (u32)QM_RL_CRD_REG_SIGN_BIT);
 	STORE_RT_REG(p_hwfn, QM_REG_RLPFINCVAL_RT_OFFSET + pf_id, inc_val);
 
 	return 0;
@@ -612,6 +681,7 @@ static int ecore_vp_wfq_rt_init(struct ecore_hwfn *p_hwfn,
 static int ecore_vport_rl_rt_init(struct ecore_hwfn *p_hwfn,
 				  u8 start_vport,
 				  u8 num_vports,
+				  u32 link_speed,
 				  struct init_qm_vport_params *vport_params)
 {
 	u8 i, vport_id;
@@ -625,8 +695,9 @@ static int ecore_vport_rl_rt_init(struct ecore_hwfn *p_hwfn,
 
 	/* Go over all PF VPORTs */
 	for (i = 0, vport_id = start_vport; i < num_vports; i++, vport_id++) {
-		u32 inc_val = QM_RL_INC_VAL(vport_params[i].vport_rl);
-		if (inc_val > QM_RL_MAX_INC_VAL) {
+		inc_val = QM_RL_INC_VAL(vport_params[i].vport_rl ?
+			  vport_params[i].vport_rl : link_speed);
+		if (inc_val > QM_VP_RL_MAX_INC_VAL(link_speed)) {
 			DP_NOTICE(p_hwfn, true,
 				  "Invalid VPORT rate-limit configuration\n");
 			return -1;
@@ -636,7 +707,8 @@ static int ecore_vport_rl_rt_init(struct ecore_hwfn *p_hwfn,
 			     (u32)QM_RL_CRD_REG_SIGN_BIT);
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_RLGLBLUPPERBOUND_RT_OFFSET + vport_id,
-			     QM_RL_UPPER_BOUND | (u32)QM_RL_CRD_REG_SIGN_BIT);
+			     QM_VP_RL_UPPER_BOUND(link_speed) |
+			     (u32)QM_RL_CRD_REG_SIGN_BIT);
 		STORE_RT_REG(p_hwfn, QM_REG_RLGLBLINCVAL_RT_OFFSET + vport_id,
 			     inc_val);
 	}
@@ -667,7 +739,9 @@ static bool ecore_poll_on_qm_cmd_ready(struct ecore_hwfn *p_hwfn,
 
 static bool ecore_send_qm_cmd(struct ecore_hwfn *p_hwfn,
 			      struct ecore_ptt *p_ptt,
-			      u32 cmd_addr, u32 cmd_data_lsb, u32 cmd_data_msb)
+							  u32 cmd_addr,
+							  u32 cmd_data_lsb,
+							  u32 cmd_data_msb)
 {
 	if (!ecore_poll_on_qm_cmd_ready(p_hwfn, p_ptt))
 		return false;
@@ -684,10 +758,11 @@ static bool ecore_send_qm_cmd(struct ecore_hwfn *p_hwfn,
 
 /******************** INTERFACE IMPLEMENTATION *********************/
 
-u32 ecore_qm_pf_mem_size(u8 pf_id,
-			 u32 num_pf_cids,
-			 u32 num_vf_cids,
-			 u32 num_tids, u16 num_pf_pqs, u16 num_vf_pqs)
+u32 ecore_qm_pf_mem_size(u32 num_pf_cids,
+						 u32 num_vf_cids,
+						 u32 num_tids,
+						 u16 num_pf_pqs,
+						 u16 num_vf_pqs)
 {
 	return QM_PQ_MEM_4KB(num_pf_cids) * num_pf_pqs +
 	    QM_PQ_MEM_4KB(num_vf_cids) * num_vf_pqs +
@@ -748,7 +823,6 @@ int ecore_qm_pf_rt_init(struct ecore_hwfn *p_hwfn,
 			u8 port_id,
 			u8 pf_id,
 			u8 max_phys_tcs_per_port,
-			bool is_first_pf,
 			u32 num_pf_cids,
 			u32 num_vf_cids,
 			u32 num_tids,
@@ -759,6 +833,7 @@ int ecore_qm_pf_rt_init(struct ecore_hwfn *p_hwfn,
 			u8 num_vports,
 			u16 pf_wfq,
 			u32 pf_rl,
+			u32 link_speed,
 			struct init_qm_pq_params *pq_params,
 			struct init_qm_vport_params *vport_params)
 {
@@ -775,16 +850,14 @@ int ecore_qm_pf_rt_init(struct ecore_hwfn *p_hwfn,
 
 	/* Map Other PQs (if any) */
 #if QM_OTHER_PQS_PER_PF > 0
-	ecore_other_pq_map_rt_init(p_hwfn, port_id, pf_id, num_pf_cids,
-				   num_tids, 0);
+	ecore_other_pq_map_rt_init(p_hwfn, pf_id, num_pf_cids, num_tids, 0);
 #endif
 
 	/* Map Tx PQs */
 	ecore_tx_pq_map_rt_init(p_hwfn, p_ptt, port_id, pf_id,
-				max_phys_tcs_per_port, is_first_pf, num_pf_cids,
-				num_vf_cids, start_pq, num_pf_pqs, num_vf_pqs,
-				start_vport, other_mem_size_4kb, pq_params,
-				vport_params);
+				max_phys_tcs_per_port, num_pf_cids, num_vf_cids,
+				start_pq, num_pf_pqs, num_vf_pqs, start_vport,
+				other_mem_size_4kb, pq_params, vport_params);
 
 	/* Init PF WFQ */
 	if (pf_wfq)
@@ -803,7 +876,7 @@ int ecore_qm_pf_rt_init(struct ecore_hwfn *p_hwfn,
 
 	/* Set VPORT RL */
 	if (ecore_vport_rl_rt_init
-	    (p_hwfn, start_vport, num_vports, vport_params))
+	    (p_hwfn, start_vport, num_vports, link_speed, vport_params))
 		return -1;
 
 	return 0;
@@ -832,7 +905,7 @@ int ecore_init_pf_rl(struct ecore_hwfn *p_hwfn,
 	u32 inc_val;
 
 	inc_val = QM_RL_INC_VAL(pf_rl);
-	if (inc_val > QM_RL_MAX_INC_VAL) {
+	if (inc_val > QM_PF_RL_MAX_INC_VAL) {
 		DP_NOTICE(p_hwfn, true,
 			  "Invalid PF rate limit configuration\n");
 		return -1;
@@ -872,7 +945,9 @@ int ecore_init_vport_wfq(struct ecore_hwfn *p_hwfn,
 }
 
 int ecore_init_vport_rl(struct ecore_hwfn *p_hwfn,
-			struct ecore_ptt *p_ptt, u8 vport_id, u32 vport_rl)
+			struct ecore_ptt *p_ptt, u8 vport_id,
+						u32 vport_rl,
+						u32 link_speed)
 {
 	u32 inc_val, max_qm_global_rls = MAX_QM_GLOBAL_RLS;
 
@@ -882,8 +957,8 @@ int ecore_init_vport_rl(struct ecore_hwfn *p_hwfn,
 		return -1;
 	}
 
-	inc_val = QM_RL_INC_VAL(vport_rl);
-	if (inc_val > QM_RL_MAX_INC_VAL) {
+	inc_val = QM_RL_INC_VAL(vport_rl ? vport_rl : link_speed);
+	if (inc_val > QM_VP_RL_MAX_INC_VAL(link_speed)) {
 		DP_NOTICE(p_hwfn, true,
 			  "Invalid VPORT rate-limit configuration\n");
 		return -1;
@@ -1335,23 +1410,8 @@ void ecore_init_brb_ram(struct ecore_hwfn *p_hwfn,
 	}
 }
 
-/* In MF should be called once per engine to set EtherType of OuterTag */
-void ecore_set_engine_mf_ovlan_eth_type(struct ecore_hwfn *p_hwfn,
-					struct ecore_ptt *p_ptt, u32 ethType)
-{
-	/* Update PRS register */
-	STORE_RT_REG(p_hwfn, PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET, ethType);
-
-	/* Update NIG register */
-	STORE_RT_REG(p_hwfn, NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET, ethType);
-
-	/* Update PBF register */
-	STORE_RT_REG(p_hwfn, PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET, ethType);
-}
-
 /* In MF should be called once per port to set EtherType of OuterTag */
-void ecore_set_port_mf_ovlan_eth_type(struct ecore_hwfn *p_hwfn,
-				      struct ecore_ptt *p_ptt, u32 ethType)
+void ecore_set_port_mf_ovlan_eth_type(struct ecore_hwfn *p_hwfn, u32 ethType)
 {
 	/* Update DORQ register */
 	STORE_RT_REG(p_hwfn, DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET, ethType);
@@ -1497,35 +1557,23 @@ void ecore_set_geneve_enable(struct ecore_hwfn *p_hwfn,
 #define RAM_LINE_SIZE sizeof(u64)
 #define REG_SIZE sizeof(u32)
 
-void ecore_set_rfs_mode_disable(struct ecore_hwfn *p_hwfn,
-	struct ecore_ptt *p_ptt,
-	u16 pf_id)
+void ecore_gft_disable(struct ecore_hwfn *p_hwfn,
+		       struct ecore_ptt *p_ptt,
+		       u16 pf_id)
 {
-	union gft_cam_line_union cam_line;
-	struct gft_ram_line ram_line;
-	u32 i, *ram_line_ptr;
-
-	ram_line_ptr = (u32 *)&ram_line;
-
-	/* Stop using gft logic, disable gft search */
+	/* disable gft search for PF */
 	ecore_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_GFT, 0);
-	ecore_wr(p_hwfn, p_ptt, PRS_REG_CM_HDR_GFT, 0x0);
 
-	/* Clean ram & cam for next rfs/gft session*/
+	/* Clean ram & cam for next gft session*/
 
 	/* Zero camline */
-	OSAL_MEMSET(&cam_line, 0, sizeof(cam_line));
-	ecore_wr(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id,
-					cam_line.cam_line_mapped.camline);
+	ecore_wr(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id, 0);
 
 	/* Zero ramline */
-	OSAL_MEMSET(&ram_line, 0, sizeof(ram_line));
-
-	/* Each iteration write to reg */
-	for (i = 0; i < RAM_LINE_SIZE / REG_SIZE; i++)
-		ecore_wr(p_hwfn, p_ptt, PRS_REG_GFT_PROFILE_MASK_RAM +
-			 RAM_LINE_SIZE * pf_id +
-			 i * REG_SIZE, *(ram_line_ptr + i));
+	ecore_wr(p_hwfn, p_ptt, PRS_REG_GFT_PROFILE_MASK_RAM +
+				RAM_LINE_SIZE * pf_id, 0);
+	ecore_wr(p_hwfn, p_ptt, PRS_REG_GFT_PROFILE_MASK_RAM +
+				RAM_LINE_SIZE * pf_id + REG_SIZE, 0);
 }
 
 
@@ -1543,115 +1591,110 @@ void ecore_set_gft_event_id_cm_hdr(struct ecore_hwfn *p_hwfn,
 	ecore_wr(p_hwfn, p_ptt, PRS_REG_CM_HDR_GFT, rfs_cm_hdr_event_id);
 }
 
-void ecore_set_rfs_mode_enable(struct ecore_hwfn *p_hwfn,
+void ecore_gft_config(struct ecore_hwfn *p_hwfn,
 			       struct ecore_ptt *p_ptt,
 			       u16 pf_id,
 			       bool tcp,
 			       bool udp,
 			       bool ipv4,
-			       bool ipv6)
+			       bool ipv6,
+			       enum gft_profile_type profile_type)
 {
-	u32 rfs_cm_hdr_event_id = ecore_rd(p_hwfn, p_ptt, PRS_REG_CM_HDR_GFT);
-	union gft_cam_line_union camLine;
-	struct gft_ram_line ramLine;
-	u32 *ramLinePointer = (u32 *)&ramLine;
-	int i;
+	u32 reg_val, cam_line, ram_line_lo, ram_line_hi;
 
 	if (!ipv6 && !ipv4)
-		DP_NOTICE(p_hwfn, true,
-			  "set_rfs_mode_enable: must accept at "
-			  "least on of - ipv4 or ipv6");
-
+		DP_NOTICE(p_hwfn, true, "gft_config: must accept at least on of - ipv4 or ipv6'\n");
 	if (!tcp && !udp)
-		DP_NOTICE(p_hwfn, true,
-			  "set_rfs_mode_enable: must accept at "
-			  "least on of - udp or tcp");
+		DP_NOTICE(p_hwfn, true, "gft_config: must accept at least on of - udp or tcp\n");
+	if (profile_type >= MAX_GFT_PROFILE_TYPE)
+		DP_NOTICE(p_hwfn, true, "gft_config: unsupported gft_profile_type\n");
 
 	/* Set RFS event ID to be awakened i Tstorm By Prs */
-	rfs_cm_hdr_event_id |=  T_ETH_PACKET_MATCH_RFS_EVENTID <<
-	    PRS_REG_CM_HDR_GFT_EVENT_ID_SHIFT;
-	rfs_cm_hdr_event_id |=  PARSER_ETH_CONN_CM_HDR <<
-	    PRS_REG_CM_HDR_GFT_CM_HDR_SHIFT;
-	ecore_wr(p_hwfn, p_ptt, PRS_REG_CM_HDR_GFT, rfs_cm_hdr_event_id);
+	reg_val = T_ETH_PACKET_MATCH_RFS_EVENTID <<
+		  PRS_REG_CM_HDR_GFT_EVENT_ID_SHIFT;
+	reg_val |= PARSER_ETH_CONN_CM_HDR << PRS_REG_CM_HDR_GFT_CM_HDR_SHIFT;
+	ecore_wr(p_hwfn, p_ptt, PRS_REG_CM_HDR_GFT, reg_val);
 
-	/* Configure Registers for RFS mode */
+	/* Do not load context only cid in PRS on match. */
+	ecore_wr(p_hwfn, p_ptt, PRS_REG_LOAD_L2_FILTER, 0);
 
-	/* Enable gft search */
-	ecore_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_GFT, 1);
-	ecore_wr(p_hwfn, p_ptt, PRS_REG_LOAD_L2_FILTER, 0); /* do not load
-							     * context only cid
-							     * in PRS on match
-							     */
-	camLine.cam_line_mapped.camline = 0;
+	/* Do not use tenant ID exist bit for gft search*/
+	ecore_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_TENANT_ID, 0);
 
-	/* Cam line is now valid!! */
-	SET_FIELD(camLine.cam_line_mapped.camline,
-		  GFT_CAM_LINE_MAPPED_VALID, 1);
+	/* Set Cam */
+	cam_line = 0;
+	SET_FIELD(cam_line, GFT_CAM_LINE_MAPPED_VALID, 1);
 
 	/* Filters are per PF!! */
-	SET_FIELD(camLine.cam_line_mapped.camline,
-		  GFT_CAM_LINE_MAPPED_PF_ID_MASK,
+	SET_FIELD(cam_line, GFT_CAM_LINE_MAPPED_PF_ID_MASK,
 		  GFT_CAM_LINE_MAPPED_PF_ID_MASK_MASK);
-	SET_FIELD(camLine.cam_line_mapped.camline,
-		  GFT_CAM_LINE_MAPPED_PF_ID, pf_id);
+	SET_FIELD(cam_line, GFT_CAM_LINE_MAPPED_PF_ID, pf_id);
 
 	if (!(tcp && udp)) {
-		SET_FIELD(camLine.cam_line_mapped.camline,
+		SET_FIELD(cam_line,
 			  GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK,
 			  GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE_MASK_MASK);
 		if (tcp)
-			SET_FIELD(camLine.cam_line_mapped.camline,
+			SET_FIELD(cam_line,
 				  GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE,
 				  GFT_PROFILE_TCP_PROTOCOL);
 		else
-			SET_FIELD(camLine.cam_line_mapped.camline,
+			SET_FIELD(cam_line,
 				  GFT_CAM_LINE_MAPPED_UPPER_PROTOCOL_TYPE,
 				  GFT_PROFILE_UDP_PROTOCOL);
 	}
 
 	if (!(ipv4 && ipv6)) {
-		SET_FIELD(camLine.cam_line_mapped.camline,
-			  GFT_CAM_LINE_MAPPED_IP_VERSION_MASK, 1);
+		SET_FIELD(cam_line, GFT_CAM_LINE_MAPPED_IP_VERSION_MASK, 1);
 		if (ipv4)
-			SET_FIELD(camLine.cam_line_mapped.camline,
-				  GFT_CAM_LINE_MAPPED_IP_VERSION,
+			SET_FIELD(cam_line, GFT_CAM_LINE_MAPPED_IP_VERSION,
 				  GFT_PROFILE_IPV4);
 		else
-			SET_FIELD(camLine.cam_line_mapped.camline,
-				  GFT_CAM_LINE_MAPPED_IP_VERSION,
+			SET_FIELD(cam_line, GFT_CAM_LINE_MAPPED_IP_VERSION,
 				  GFT_PROFILE_IPV6);
 	}
 
 	/* Write characteristics to cam */
 	ecore_wr(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id,
-	    camLine.cam_line_mapped.camline);
-	camLine.cam_line_mapped.camline =
-	    ecore_rd(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id);
+		 cam_line);
+	cam_line = ecore_rd(p_hwfn, p_ptt,
+			    PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id);
 
 	/* Write line to RAM - compare to filter 4 tuple */
-	ramLine.lo = 0;
-	ramLine.hi = 0;
-	SET_FIELD(ramLine.hi, GFT_RAM_LINE_DST_IP, 1);
-	SET_FIELD(ramLine.hi, GFT_RAM_LINE_SRC_IP, 1);
-	SET_FIELD(ramLine.hi, GFT_RAM_LINE_OVER_IP_PROTOCOL, 1);
-	SET_FIELD(ramLine.lo, GFT_RAM_LINE_ETHERTYPE, 1);
-	SET_FIELD(ramLine.lo, GFT_RAM_LINE_SRC_PORT, 1);
-	SET_FIELD(ramLine.lo, GFT_RAM_LINE_DST_PORT, 1);
-
-	/* Each iteration write to reg */
-	for (i = 0; i < RAM_LINE_SIZE / REG_SIZE; i++)
-		ecore_wr(p_hwfn, p_ptt, PRS_REG_GFT_PROFILE_MASK_RAM +
-			 RAM_LINE_SIZE * pf_id +
-			 i * REG_SIZE, *(ramLinePointer + i));
+	ram_line_lo = 0;
+	ram_line_hi = 0;
+
+	if (profile_type == GFT_PROFILE_TYPE_4_TUPLE) {
+		SET_FIELD(ram_line_hi, GFT_RAM_LINE_DST_IP, 1);
+		SET_FIELD(ram_line_hi, GFT_RAM_LINE_SRC_IP, 1);
+		SET_FIELD(ram_line_hi, GFT_RAM_LINE_OVER_IP_PROTOCOL, 1);
+		SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
+		SET_FIELD(ram_line_lo, GFT_RAM_LINE_SRC_PORT, 1);
+		SET_FIELD(ram_line_lo, GFT_RAM_LINE_DST_PORT, 1);
+	} else if (profile_type == GFT_PROFILE_TYPE_L4_DST_PORT) {
+		SET_FIELD(ram_line_hi, GFT_RAM_LINE_OVER_IP_PROTOCOL, 1);
+		SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
+		SET_FIELD(ram_line_lo, GFT_RAM_LINE_DST_PORT, 1);
+	} else if (profile_type == GFT_PROFILE_TYPE_IP_DST_PORT) {
+		SET_FIELD(ram_line_hi, GFT_RAM_LINE_DST_IP, 1);
+		SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
+	}
+
+	ecore_wr(p_hwfn, p_ptt,
+		 PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id,
+		 ram_line_lo);
+	ecore_wr(p_hwfn, p_ptt,
+		 PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id +
+		 REG_SIZE, ram_line_hi);
 
 	/* Set default profile so that no filter match will happen */
-	ramLine.lo = 0xffffffff;
-	ramLine.hi = 0x3ff;
+	ecore_wr(p_hwfn, p_ptt, PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE *
+		 PRS_GFT_CAM_LINES_NO_MATCH, 0xffffffff);
+	ecore_wr(p_hwfn, p_ptt, PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE *
+		 PRS_GFT_CAM_LINES_NO_MATCH + REG_SIZE, 0x3ff);
 
-	for (i = 0; i < RAM_LINE_SIZE / REG_SIZE; i++)
-		ecore_wr(p_hwfn, p_ptt, PRS_REG_GFT_PROFILE_MASK_RAM +
-			 RAM_LINE_SIZE * PRS_GFT_CAM_LINES_NO_MATCH +
-			 i * REG_SIZE, *(ramLinePointer + i));
+	/* Enable gft search */
+	ecore_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_GFT, 1);
 }
 
 /* Configure VF zone size mode */
@@ -1726,16 +1769,13 @@ u32 ecore_get_mstorm_eth_vf_prods_offset(struct ecore_hwfn *p_hwfn,
 
 #ifndef LINUX_REMOVE
 #define CRC8_INIT_VALUE 0xFF
-#define CRC8_TABLE_SIZE 256
 #endif
 static u8 cdu_crc8_table[CRC8_TABLE_SIZE];
 
 /* Calculate and return CDU validation byte per connection type / region /
  * cid
  */
-static u8 ecore_calc_cdu_validation_byte(struct ecore_hwfn *p_hwfn,
-					 u8 conn_type,
-					 u8 region, u32 cid)
+static u8 ecore_calc_cdu_validation_byte(u8 conn_type, u8 region, u32 cid)
 {
 	const u8 validation_cfg = CDU_VALIDATION_DEFAULT_CFG;
 
@@ -1794,9 +1834,8 @@ static u8 ecore_calc_cdu_validation_byte(struct ecore_hwfn *p_hwfn,
 }
 
 /* Calcualte and set validation bytes for session context */
-void ecore_calc_session_ctx_validation(struct ecore_hwfn *p_hwfn,
-				       void *p_ctx_mem,
-				       u16 ctx_size, u8 ctx_type, u32 cid)
+void ecore_calc_session_ctx_validation(void *p_ctx_mem, u16 ctx_size,
+				       u8 ctx_type, u32 cid)
 {
 	u8 *x_val_ptr, *t_val_ptr, *u_val_ptr, *p_ctx;
 
@@ -1807,14 +1846,14 @@ void ecore_calc_session_ctx_validation(struct ecore_hwfn *p_hwfn,
 
 	OSAL_MEMSET(p_ctx, 0, ctx_size);
 
-	*x_val_ptr = ecore_calc_cdu_validation_byte(p_hwfn, ctx_type, 3, cid);
-	*t_val_ptr = ecore_calc_cdu_validation_byte(p_hwfn, ctx_type, 4, cid);
-	*u_val_ptr = ecore_calc_cdu_validation_byte(p_hwfn, ctx_type, 5, cid);
+	*x_val_ptr = ecore_calc_cdu_validation_byte(ctx_type, 3, cid);
+	*t_val_ptr = ecore_calc_cdu_validation_byte(ctx_type, 4, cid);
+	*u_val_ptr = ecore_calc_cdu_validation_byte(ctx_type, 5, cid);
 }
 
 /* Calcualte and set validation bytes for task context */
-void ecore_calc_task_ctx_validation(struct ecore_hwfn *p_hwfn, void *p_ctx_mem,
-				    u16 ctx_size, u8 ctx_type, u32 tid)
+void ecore_calc_task_ctx_validation(void *p_ctx_mem, u16 ctx_size, u8 ctx_type,
+				    u32 tid)
 {
 	u8 *p_ctx, *region1_val_ptr;
 
@@ -1823,8 +1862,7 @@ void ecore_calc_task_ctx_validation(struct ecore_hwfn *p_hwfn, void *p_ctx_mem,
 
 	OSAL_MEMSET(p_ctx, 0, ctx_size);
 
-	*region1_val_ptr = ecore_calc_cdu_validation_byte(p_hwfn, ctx_type,
-								1, tid);
+	*region1_val_ptr = ecore_calc_cdu_validation_byte(ctx_type, 1, tid);
 }
 
 /* Memset session context to 0 while preserving validation bytes */
diff --git a/drivers/net/qede/base/ecore_init_fw_funcs.h b/drivers/net/qede/base/ecore_init_fw_funcs.h
index 488dc005..ab560e59 100644
--- a/drivers/net/qede/base/ecore_init_fw_funcs.h
+++ b/drivers/net/qede/base/ecore_init_fw_funcs.h
@@ -18,7 +18,6 @@ struct init_qm_pq_params;
  * Returns the required host memory size in 4KB units.
  * Must be called before all QM init HSI functions.
  *
- * @param pf_id -	physical function ID
  * @param num_pf_cids - number of connections used by this PF
  * @param num_vf_cids -	number of connections used by VFs of this PF
  * @param num_tids -	number of tasks used by this PF
@@ -27,8 +26,7 @@ struct init_qm_pq_params;
  *
  * @return The required host memory size in 4KB units.
  */
-u32 ecore_qm_pf_mem_size(u8 pf_id,
-						 u32 num_pf_cids,
+u32 ecore_qm_pf_mem_size(u32 num_pf_cids,
 						 u32 num_vf_cids,
 						 u32 num_tids,
 						 u16 num_pf_pqs,
@@ -66,7 +64,6 @@ int ecore_qm_common_rt_init(struct ecore_hwfn *p_hwfn,
  * @param port_id		- port ID
  * @param pf_id			- PF ID
  * @param max_phys_tcs_per_port	- max number of physical TCs per port in HW
- * @param is_first_pf		- 1 = first PF in engine, 0 = othwerwise
  * @param num_pf_cids		- number of connections used by this PF
  * @param num_vf_cids		- number of connections used by VFs of this PF
  * @param num_tids		- number of tasks used by this PF
@@ -80,6 +77,7 @@ int ecore_qm_common_rt_init(struct ecore_hwfn *p_hwfn,
  *		   be 0. otherwise, the weight must be non-zero.
  * @param pf_rl - rate limit in Mb/sec units. a value of 0 means don't
  *                configure. ignored if PF RL is globally disabled.
+ * @param link_speed -		  link speed in Mbps.
  * @param pq_params - array of size (num_pf_pqs+num_vf_pqs) with parameters for
  *                    each Tx PQ associated with the specified PF.
  * @param vport_params - array of size num_vports with parameters for each
@@ -88,23 +86,23 @@ int ecore_qm_common_rt_init(struct ecore_hwfn *p_hwfn,
  * @return 0 on success, -1 on error.
  */
 int ecore_qm_pf_rt_init(struct ecore_hwfn *p_hwfn,
-				struct ecore_ptt *p_ptt,
-				u8 port_id,
-				u8 pf_id,
-				u8 max_phys_tcs_per_port,
-				bool is_first_pf,
-				u32 num_pf_cids,
-				u32 num_vf_cids,
-				u32 num_tids,
-				u16 start_pq,
-				u16 num_pf_pqs,
-				u16 num_vf_pqs,
-				u8 start_vport,
-				u8 num_vports,
-				u16 pf_wfq,
-				u32 pf_rl,
-				struct init_qm_pq_params *pq_params,
-				struct init_qm_vport_params *vport_params);
+			struct ecore_ptt *p_ptt,
+			u8 port_id,
+			u8 pf_id,
+			u8 max_phys_tcs_per_port,
+			u32 num_pf_cids,
+			u32 num_vf_cids,
+			u32 num_tids,
+			u16 start_pq,
+			u16 num_pf_pqs,
+			u16 num_vf_pqs,
+			u8 start_vport,
+			u8 num_vports,
+			u16 pf_wfq,
+			u32 pf_rl,
+			u32 link_speed,
+			struct init_qm_pq_params *pq_params,
+			struct init_qm_vport_params *vport_params);
 
 /**
  * @brief ecore_init_pf_wfq  Initializes the WFQ weight of the specified PF
@@ -157,17 +155,19 @@ int ecore_init_vport_wfq(struct ecore_hwfn *p_hwfn,
  * @brief ecore_init_vport_rl - Initializes the rate limit of the specified
  * VPORT.
  *
- * @param p_hwfn	- HW device data
- * @param p_ptt		- ptt window used for writing the registers
- * @param vport_id	- VPORT ID
- * @param vport_rl	- rate limit in Mb/sec units
+ * @param p_hwfn -	       HW device data
+ * @param p_ptt -	       ptt window used for writing the registers
+ * @param vport_id -   VPORT ID
+ * @param vport_rl -   rate limit in Mb/sec units
+ * @param link_speed - link speed in Mbps.
  *
  * @return 0 on success, -1 on error.
  */
 int ecore_init_vport_rl(struct ecore_hwfn *p_hwfn,
 						struct ecore_ptt *p_ptt,
 						u8 vport_id,
-						u32 vport_rl);
+						u32 vport_rl,
+						u32 link_speed);
 
 /**
  * @brief ecore_send_qm_stop_cmd  Sends a stop command to the QM
@@ -261,28 +261,15 @@ void ecore_init_brb_ram(struct ecore_hwfn *p_hwfn,
 
 #ifndef UNUSED_HSI_FUNC
 /**
- * @brief ecore_set_engine_mf_ovlan_eth_type - initializes Nig,Prs,Pbf and llh
- *                                             ethType Regs to  input ethType
- *                                             should Be called once per engine
- *                                             if engine
- *  is in BD mode.
- *
- * @param p_ptt   - ptt window used for writing the registers.
- * @param ethType - etherType to configure
- */
-void ecore_set_engine_mf_ovlan_eth_type(struct ecore_hwfn *p_hwfn,
-			struct ecore_ptt *p_ptt, u32 ethType);
-
-/**
  * @brief ecore_set_port_mf_ovlan_eth_type - initializes DORQ ethType Regs to
  *                                           input ethType should Be called
  *                                           once per port.
  *
- * @param p_ptt   - ptt window used for writing the registers.
+ * @param p_hwfn -	    HW device data
  * @param ethType - etherType to configure
  */
 void ecore_set_port_mf_ovlan_eth_type(struct ecore_hwfn *p_hwfn,
-			struct ecore_ptt *p_ptt, u32 ethType);
+				      u32 ethType);
 #endif /* UNUSED_HSI_FUNC */
 
 /**
@@ -351,33 +338,35 @@ void ecore_set_gft_event_id_cm_hdr(struct ecore_hwfn *p_hwfn,
 				   struct ecore_ptt *p_ptt);
 
 /**
- * @brief ecore_set_rfs_mode_disable - Disable and configure HW for RFS
+ * @brief ecore_gft_disable - Disable and GFT
  *
  * @param p_hwfn -   HW device data
  * @param p_ptt -   ptt window used for writing the registers.
- * @param pf_id - pf on which to disable RFS.
+ * @param pf_id - pf on which to disable GFT.
  */
-void ecore_set_rfs_mode_disable(struct ecore_hwfn *p_hwfn,
-				struct ecore_ptt *p_ptt,
-				u16 pf_id);
+void ecore_gft_disable(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt,
+						u16 pf_id);
 
 /**
-* @brief ecore_set_rfs_mode_enable - enable and configure HW for RFS
+ * @brief ecore_gft_config - Enable and configure HW for GFT
 *
 * @param p_ptt	- ptt window used for writing the registers.
-* @param pf_id	- pf on which to enable RFS.
+ * @param pf_id - pf on which to enable GFT.
 * @param tcp	- set profile tcp packets.
 * @param udp	- set profile udp  packet.
 * @param ipv4	- set profile ipv4 packet.
 * @param ipv6	- set profile ipv6 packet.
+ * @param profile_type -  define packet same fields. Use enum gft_profile_type.
 */
-void ecore_set_rfs_mode_enable(struct ecore_hwfn *p_hwfn,
+void ecore_gft_config(struct ecore_hwfn *p_hwfn,
 	struct ecore_ptt *p_ptt,
 	u16 pf_id,
 	bool tcp,
 	bool udp,
 	bool ipv4,
-	bool ipv6);
+	bool ipv6,
+	enum gft_profile_type profile_type);
 #endif /* UNUSED_HSI_FUNC */
 
 /**
@@ -431,26 +420,25 @@ void ecore_enable_context_validation(struct ecore_hwfn *p_hwfn,
  * @param ctx_type -	context type.
  * @param cid -		context cid.
  */
-void ecore_calc_session_ctx_validation(struct ecore_hwfn *p_hwfn,
-				       void *p_ctx_mem,
+void ecore_calc_session_ctx_validation(void *p_ctx_mem,
 				       u16 ctx_size,
 				       u8 ctx_type,
 				       u32 cid);
+
 /**
  * @brief ecore_calc_task_ctx_validation - Calcualte validation byte for task
  * context.
  *
- * @param p_hwfn -		    HW device data
  * @param p_ctx_mem -	pointer to context memory.
  * @param ctx_size -	context size.
  * @param ctx_type -	context type.
  * @param tid -		    context tid.
  */
-void ecore_calc_task_ctx_validation(struct ecore_hwfn *p_hwfn,
-				    void *p_ctx_mem,
+void ecore_calc_task_ctx_validation(void *p_ctx_mem,
 				    u16 ctx_size,
 				    u8 ctx_type,
 				    u32 tid);
+
 /**
  * @brief ecore_memset_session_ctx - Memset session context to 0 while
  * preserving validation bytes.
diff --git a/drivers/net/qede/base/ecore_init_ops.c b/drivers/net/qede/base/ecore_init_ops.c
index b907a95e..91633c11 100644
--- a/drivers/net/qede/base/ecore_init_ops.c
+++ b/drivers/net/qede/base/ecore_init_ops.c
@@ -40,6 +40,13 @@ void ecore_init_clear_rt_data(struct ecore_hwfn *p_hwfn)
 
 void ecore_init_store_rt_reg(struct ecore_hwfn *p_hwfn, u32 rt_offset, u32 val)
 {
+	if (rt_offset >= RUNTIME_ARRAY_SIZE) {
+		DP_ERR(p_hwfn,
+		       "Avoid storing %u in rt_data at index %u since RUNTIME_ARRAY_SIZE is %u!\n",
+		       val, rt_offset, RUNTIME_ARRAY_SIZE);
+		return;
+	}
+
 	p_hwfn->rt_data.init_val[rt_offset] = val;
 	p_hwfn->rt_data.b_valid[rt_offset] = true;
 }
@@ -49,6 +56,14 @@ void ecore_init_store_rt_agg(struct ecore_hwfn *p_hwfn,
 {
 	osal_size_t i;
 
+	if ((rt_offset + size - 1) >= RUNTIME_ARRAY_SIZE) {
+		DP_ERR(p_hwfn,
+		       "Avoid storing values in rt_data at indices %u-%u since RUNTIME_ARRAY_SIZE is %u!\n",
+		       rt_offset, (u32)(rt_offset + size - 1),
+		       RUNTIME_ARRAY_SIZE);
+		return;
+	}
+
 	for (i = 0; i < size / sizeof(u32); i++) {
 		p_hwfn->rt_data.init_val[rt_offset + i] = p_val[i];
 		p_hwfn->rt_data.b_valid[rt_offset + i] = true;
@@ -161,8 +176,7 @@ static enum _ecore_status_t ecore_init_array_dmae(struct ecore_hwfn *p_hwfn,
 
 static enum _ecore_status_t ecore_init_fill_dmae(struct ecore_hwfn *p_hwfn,
 						 struct ecore_ptt *p_ptt,
-						 u32 addr, u32 fill,
-						 u32 fill_count)
+						 u32 addr, u32 fill_count)
 {
 	static u32 zero_buffer[DMAE_MAX_RW_SIZE];
 
@@ -294,7 +308,7 @@ static enum _ecore_status_t ecore_init_cmd_wr(struct ecore_hwfn *p_hwfn,
 	case INIT_SRC_ZEROS:
 		data = OSAL_LE32_TO_CPU(p_cmd->args.zeros_count);
 		if (b_must_dmae || (b_can_dmae && (data >= 64)))
-			rc = ecore_init_fill_dmae(p_hwfn, p_ptt, addr, 0, data);
+			rc = ecore_init_fill_dmae(p_hwfn, p_ptt, addr, data);
 		else
 			ecore_init_fill(p_hwfn, p_ptt, addr, 0, data);
 		break;
@@ -303,10 +317,10 @@ static enum _ecore_status_t ecore_init_cmd_wr(struct ecore_hwfn *p_hwfn,
 					  b_must_dmae, b_can_dmae);
 		break;
 	case INIT_SRC_RUNTIME:
-		ecore_init_rt(p_hwfn, p_ptt, addr,
-			      OSAL_LE16_TO_CPU(p_cmd->args.runtime.offset),
-			      OSAL_LE16_TO_CPU(p_cmd->args.runtime.size),
-			      b_must_dmae);
+		rc = ecore_init_rt(p_hwfn, p_ptt, addr,
+				   OSAL_LE16_TO_CPU(p_cmd->args.runtime.offset),
+				   OSAL_LE16_TO_CPU(p_cmd->args.runtime.size),
+				   b_must_dmae);
 		break;
 	}
 
@@ -382,10 +396,13 @@ static void ecore_init_cmd_rd(struct ecore_hwfn *p_hwfn,
 		       OSAL_LE32_TO_CPU(cmd->op_data));
 }
 
-/* init_ops callbacks entry point */
+/* init_ops callbacks entry point.
+ * OSAL_UNUSED is temporary used to avoid unused-parameter compilation warnings.
+ * Should be removed when the function is actually used.
+ */
 static void ecore_init_cmd_cb(struct ecore_hwfn *p_hwfn,
-			      struct ecore_ptt *p_ptt,
-			      struct init_callback_op *p_cmd)
+			      struct ecore_ptt OSAL_UNUSED * p_ptt,
+			      struct init_callback_op OSAL_UNUSED * p_cmd)
 {
 	DP_NOTICE(p_hwfn, true,
 		  "Currently init values have no need of callbacks\n");
@@ -429,17 +446,16 @@ static u32 ecore_init_cmd_mode(struct ecore_hwfn *p_hwfn,
 				 INIT_IF_MODE_OP_CMD_OFFSET);
 }
 
-static u32 ecore_init_cmd_phase(struct ecore_hwfn *p_hwfn,
-				struct init_if_phase_op *p_cmd,
+static u32 ecore_init_cmd_phase(struct init_if_phase_op *p_cmd,
 				u32 phase, u32 phase_id)
 {
 	u32 data = OSAL_LE32_TO_CPU(p_cmd->phase_data);
+	u32 op_data = OSAL_LE32_TO_CPU(p_cmd->op_data);
 
 	if (!(GET_FIELD(data, INIT_IF_PHASE_OP_PHASE) == phase &&
 	      (GET_FIELD(data, INIT_IF_PHASE_OP_PHASE_ID) == ANY_PHASE_ID ||
 	       GET_FIELD(data, INIT_IF_PHASE_OP_PHASE_ID) == phase_id)))
-		return GET_FIELD(OSAL_LE32_TO_CPU(p_cmd->op_data),
-				 INIT_IF_PHASE_OP_CMD_OFFSET);
+		return GET_FIELD(op_data, INIT_IF_PHASE_OP_CMD_OFFSET);
 	else
 		return 0;
 }
@@ -485,8 +501,8 @@ enum _ecore_status_t ecore_init_run(struct ecore_hwfn *p_hwfn,
 						       modes);
 			break;
 		case INIT_OP_IF_PHASE:
-			cmd_num += ecore_init_cmd_phase(p_hwfn, &cmd->if_phase,
-							phase, phase_id);
+			cmd_num += ecore_init_cmd_phase(&cmd->if_phase, phase,
+							phase_id);
 			b_dmae = GET_FIELD(data, INIT_IF_PHASE_OP_DMAE_ENABLE);
 			break;
 		case INIT_OP_DELAY:
@@ -510,7 +526,8 @@ enum _ecore_status_t ecore_init_run(struct ecore_hwfn *p_hwfn,
 	return rc;
 }
 
-void ecore_gtt_init(struct ecore_hwfn *p_hwfn)
+void ecore_gtt_init(struct ecore_hwfn *p_hwfn,
+		    struct ecore_ptt *p_ptt)
 {
 	u32 gtt_base;
 	u32 i;
@@ -528,7 +545,7 @@ void ecore_gtt_init(struct ecore_hwfn *p_hwfn)
 
 		/* initialize PTT/GTT (poll for completion) */
 		if (!initialized) {
-			ecore_wr(p_hwfn, p_hwfn->p_main_ptt,
+			ecore_wr(p_hwfn, p_ptt,
 				 PGLUE_B_REG_START_INIT_PTT_GTT, 1);
 			initialized = true;
 		}
@@ -537,7 +554,7 @@ void ecore_gtt_init(struct ecore_hwfn *p_hwfn)
 			/* ptt might be overrided by HW until this is done */
 			OSAL_UDELAY(10);
 			ecore_ptt_invalidate(p_hwfn);
-			val = ecore_rd(p_hwfn, p_hwfn->p_main_ptt,
+			val = ecore_rd(p_hwfn, p_ptt,
 				       PGLUE_B_REG_INIT_DONE_PTT_GTT);
 		} while ((val != 1) && --poll_cnt);
 
@@ -557,7 +574,11 @@ void ecore_gtt_init(struct ecore_hwfn *p_hwfn)
 }
 
 enum _ecore_status_t ecore_init_fw_data(struct ecore_dev *p_dev,
-					const u8 *data)
+#ifdef CONFIG_ECORE_BINARY_FW
+					const u8 *fw_data)
+#else
+					const u8 OSAL_UNUSED * fw_data)
+#endif
 {
 	struct ecore_fw_data *fw = p_dev->fw_data;
 
@@ -565,24 +586,24 @@ enum _ecore_status_t ecore_init_fw_data(struct ecore_dev *p_dev,
 	struct bin_buffer_hdr *buf_hdr;
 	u32 offset, len;
 
-	if (!data) {
+	if (!fw_data) {
 		DP_NOTICE(p_dev, true, "Invalid fw data\n");
 		return ECORE_INVAL;
 	}
 
-	buf_hdr = (struct bin_buffer_hdr *)(uintptr_t)data;
+	buf_hdr = (struct bin_buffer_hdr *)(uintptr_t)fw_data;
 
 	offset = buf_hdr[BIN_BUF_INIT_FW_VER_INFO].offset;
-	fw->fw_ver_info = (struct fw_ver_info *)((uintptr_t)(data + offset));
+	fw->fw_ver_info = (struct fw_ver_info *)((uintptr_t)(fw_data + offset));
 
 	offset = buf_hdr[BIN_BUF_INIT_CMD].offset;
-	fw->init_ops = (union init_op *)((uintptr_t)(data + offset));
+	fw->init_ops = (union init_op *)((uintptr_t)(fw_data + offset));
 
 	offset = buf_hdr[BIN_BUF_INIT_VAL].offset;
-	fw->arr_data = (u32 *)((uintptr_t)(data + offset));
+	fw->arr_data = (u32 *)((uintptr_t)(fw_data + offset));
 
 	offset = buf_hdr[BIN_BUF_INIT_MODE_TREE].offset;
-	fw->modes_tree_buf = (u8 *)((uintptr_t)(data + offset));
+	fw->modes_tree_buf = (u8 *)((uintptr_t)(fw_data + offset));
 	len = buf_hdr[BIN_BUF_INIT_CMD].length;
 	fw->init_ops_size = len / sizeof(struct init_raw_op);
 #else
diff --git a/drivers/net/qede/base/ecore_init_ops.h b/drivers/net/qede/base/ecore_init_ops.h
index d58c7d6a..e293a4a3 100644
--- a/drivers/net/qede/base/ecore_init_ops.h
+++ b/drivers/net/qede/base/ecore_init_ops.h
@@ -107,5 +107,6 @@ void ecore_init_store_rt_agg(struct ecore_hwfn *p_hwfn,
  *
  * @param p_hwfn
  */
-void ecore_gtt_init(struct ecore_hwfn *p_hwfn);
+void ecore_gtt_init(struct ecore_hwfn *p_hwfn,
+		    struct ecore_ptt *p_ptt);
 #endif /* __ECORE_INIT_OPS__ */
diff --git a/drivers/net/qede/base/ecore_int.c b/drivers/net/qede/base/ecore_int.c
index b57c510c..e6cef85b 100644
--- a/drivers/net/qede/base/ecore_int.c
+++ b/drivers/net/qede/base/ecore_int.c
@@ -9,7 +9,6 @@
 #include "bcm_osal.h"
 #include "ecore.h"
 #include "ecore_spq.h"
-#include "reg_addr.h"
 #include "ecore_gtt_reg_addr.h"
 #include "ecore_init_ops.h"
 #include "ecore_rt_defs.h"
@@ -30,7 +29,7 @@ struct ecore_pi_info {
 struct ecore_sb_sp_info {
 	struct ecore_sb_info sb_info;
 	/* per protocol index data */
-	struct ecore_pi_info pi_info_arr[PIS_PER_SB];
+	struct ecore_pi_info pi_info_arr[PIS_PER_SB_E4];
 };
 
 enum ecore_attention_type {
@@ -248,21 +247,21 @@ static enum _ecore_status_t ecore_grc_attn_cb(struct ecore_hwfn *p_hwfn)
 	tmp2 = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
 			GRC_REG_TIMEOUT_ATTN_ACCESS_DATA_1);
 
-	DP_INFO(p_hwfn->p_dev,
-		"GRC timeout [%08x:%08x] - %s Address [%08x] [Master %s]"
-		" [PF: %02x %s %02x]\n",
-		tmp2, tmp,
-		(tmp & ECORE_GRC_ATTENTION_RDWR_BIT) ? "Write to" : "Read from",
-		(tmp & ECORE_GRC_ATTENTION_ADDRESS_MASK) << 2,
-		grc_timeout_attn_master_to_str((tmp &
-					ECORE_GRC_ATTENTION_MASTER_MASK) >>
-				       ECORE_GRC_ATTENTION_MASTER_SHIFT),
-		(tmp2 & ECORE_GRC_ATTENTION_PF_MASK),
-		(((tmp2 & ECORE_GRC_ATTENTION_PRIV_MASK) >>
+	DP_NOTICE(p_hwfn->p_dev, false,
+		  "GRC timeout [%08x:%08x] - %s Address [%08x] [Master %s] [PF: %02x %s %02x]\n",
+		  tmp2, tmp,
+		  (tmp & ECORE_GRC_ATTENTION_RDWR_BIT) ? "Write to"
+						       : "Read from",
+		  (tmp & ECORE_GRC_ATTENTION_ADDRESS_MASK) << 2,
+		  grc_timeout_attn_master_to_str(
+			(tmp & ECORE_GRC_ATTENTION_MASTER_MASK) >>
+			 ECORE_GRC_ATTENTION_MASTER_SHIFT),
+		  (tmp2 & ECORE_GRC_ATTENTION_PF_MASK),
+		  (((tmp2 & ECORE_GRC_ATTENTION_PRIV_MASK) >>
 		  ECORE_GRC_ATTENTION_PRIV_SHIFT) ==
-		 ECORE_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Irrelevant:)",
-		(tmp2 & ECORE_GRC_ATTENTION_VF_MASK) >>
-		ECORE_GRC_ATTENTION_VF_SHIFT);
+		  ECORE_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Irrelevant:)",
+		  (tmp2 & ECORE_GRC_ATTENTION_VF_MASK) >>
+		  ECORE_GRC_ATTENTION_VF_SHIFT);
 
 out:
 	/* Regardles of anything else, clean the validity bit */
@@ -414,31 +413,136 @@ ecore_general_attention_35(struct ecore_hwfn *p_hwfn)
 	return ECORE_SUCCESS;
 }
 
-#define ECORE_DORQ_ATTENTION_REASON_MASK (0xfffff)
-#define ECORE_DORQ_ATTENTION_OPAQUE_MASK (0xffff)
-#define ECORE_DORQ_ATTENTION_SIZE_MASK	 (0x7f0000)
-#define ECORE_DORQ_ATTENTION_SIZE_SHIFT	 (16)
+#define ECORE_DORQ_ATTENTION_REASON_MASK	(0xfffff)
+#define ECORE_DORQ_ATTENTION_OPAQUE_MASK	(0xffff)
+#define ECORE_DORQ_ATTENTION_OPAQUE_SHIFT	(0x0)
+#define ECORE_DORQ_ATTENTION_SIZE_MASK		(0x7f)
+#define ECORE_DORQ_ATTENTION_SIZE_SHIFT		(16)
+
+#define ECORE_DB_REC_COUNT			10
+#define ECORE_DB_REC_INTERVAL			100
+
+/* assumes sticky overflow indication was set for this PF */
+static enum _ecore_status_t ecore_db_rec_attn(struct ecore_hwfn *p_hwfn,
+					      struct ecore_ptt *p_ptt)
+{
+	u8 count = ECORE_DB_REC_COUNT;
+	u32 usage = 1;
+
+	/* wait for usage to zero or count to run out. This is necessary since
+	 * EDPM doorbell transactions can take multiple 64b cycles, and as such
+	 * can "split" over the pci. Possibly, the doorbell drop can happen with
+	 * half an EDPM in the queue and other half dropped. Another EDPM
+	 * doorbell to the same address (from doorbell recovery mechanism or
+	 * from the doorbelling entity) could have first half dropped and second
+	 * half interperted as continuation of the first. To prevent such
+	 * malformed doorbells from reaching the device, flush the queue before
+	 * releaseing the overflow sticky indication.
+	 */
+	while (count-- && usage) {
+		usage = ecore_rd(p_hwfn, p_ptt, DORQ_REG_PF_USAGE_CNT);
+		OSAL_UDELAY(ECORE_DB_REC_INTERVAL);
+	}
+
+	/* should have been depleted by now */
+	if (usage) {
+		DP_NOTICE(p_hwfn->p_dev, false,
+			  "DB recovery: doorbell usage failed to zero after %d usec. usage was %x\n",
+			  ECORE_DB_REC_INTERVAL * ECORE_DB_REC_COUNT, usage);
+		return ECORE_TIMEOUT;
+	}
+
+	/* flush any pedning (e)dpm as they may never arrive */
+	ecore_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
+
+	/* release overflow sticky indication (stop silently dropping
+	 * everything)
+	 */
+	ecore_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
+
+	/* repeat all last doorbells (doorbell drop recovery) */
+	ecore_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL);
+
+	return ECORE_SUCCESS;
+}
 
 static enum _ecore_status_t ecore_dorq_attn_cb(struct ecore_hwfn *p_hwfn)
 {
-	u32 reason;
+	u32 int_sts, first_drop_reason, details, address, overflow,
+		all_drops_reason;
+	struct ecore_ptt *p_ptt = p_hwfn->p_dpc_ptt;
+	enum _ecore_status_t rc;
 
-	reason = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt, DORQ_REG_DB_DROP_REASON) &
-	    ECORE_DORQ_ATTENTION_REASON_MASK;
-	if (reason) {
-		u32 details = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-				       DORQ_REG_DB_DROP_DETAILS);
+	int_sts = ecore_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
+	DP_NOTICE(p_hwfn->p_dev, false, "DORQ attention. int_sts was %x\n",
+		  int_sts);
 
-		DP_INFO(p_hwfn->p_dev,
-			"DORQ db_drop: address 0x%08x Opaque FID 0x%04x"
-			" Size [bytes] 0x%08x Reason: 0x%08x\n",
-			ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-				 DORQ_REG_DB_DROP_DETAILS_ADDRESS),
-			(u16)(details & ECORE_DORQ_ATTENTION_OPAQUE_MASK),
-			((details & ECORE_DORQ_ATTENTION_SIZE_MASK) >>
-			 ECORE_DORQ_ATTENTION_SIZE_SHIFT) * 4, reason);
+	/* int_sts may be zero since all PFs were interrupted for doorbell
+	 * overflow but another one already handled it. Can abort here. If
+	 * This PF also requires overflow recovery we will be interrupted again
+	 */
+	if (!int_sts)
+		return ECORE_SUCCESS;
+
+	/* check if db_drop or overflow happened */
+	if (int_sts & (DORQ_REG_INT_STS_DB_DROP |
+		       DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) {
+		/* obtain data about db drop/overflow */
+		first_drop_reason = ecore_rd(p_hwfn, p_ptt,
+				  DORQ_REG_DB_DROP_REASON) &
+				  ECORE_DORQ_ATTENTION_REASON_MASK;
+		details = ecore_rd(p_hwfn, p_ptt,
+				   DORQ_REG_DB_DROP_DETAILS);
+		address = ecore_rd(p_hwfn, p_ptt,
+				   DORQ_REG_DB_DROP_DETAILS_ADDRESS);
+		overflow = ecore_rd(p_hwfn, p_ptt,
+				    DORQ_REG_PF_OVFL_STICKY);
+		all_drops_reason = ecore_rd(p_hwfn, p_ptt,
+					    DORQ_REG_DB_DROP_DETAILS_REASON);
+
+		/* log info */
+		DP_NOTICE(p_hwfn->p_dev, false,
+			  "Doorbell drop occurred\n"
+			  "Address\t\t0x%08x\t(second BAR address)\n"
+			  "FID\t\t0x%04x\t\t(Opaque FID)\n"
+			  "Size\t\t0x%04x\t\t(in bytes)\n"
+			  "1st drop reason\t0x%08x\t(details on first drop since last handling)\n"
+			  "Sticky reasons\t0x%08x\t(all drop reasons since last handling)\n"
+			  "Overflow\t0x%x\t\t(a per PF indication)\n",
+			  address,
+			  GET_FIELD(details, ECORE_DORQ_ATTENTION_OPAQUE),
+			  GET_FIELD(details, ECORE_DORQ_ATTENTION_SIZE) * 4,
+			  first_drop_reason, all_drops_reason, overflow);
+
+		/* if this PF caused overflow, initiate recovery */
+		if (overflow) {
+			rc = ecore_db_rec_attn(p_hwfn, p_ptt);
+			if (rc != ECORE_SUCCESS)
+				return rc;
+		}
+
+		/* clear the doorbell drop details and prepare for next drop */
+		ecore_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0);
+
+		/* mark interrupt as handeld (note: even if drop was due to a
+		 * different reason than overflow we mark as handled)
+		 */
+		ecore_wr(p_hwfn, p_ptt, DORQ_REG_INT_STS_WR,
+			 DORQ_REG_INT_STS_DB_DROP |
+			 DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR);
+
+		/* if there are no indications otherthan drop indications,
+		 * success
+		 */
+		if ((int_sts & ~(DORQ_REG_INT_STS_DB_DROP |
+				 DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR |
+				 DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) == 0)
+			return ECORE_SUCCESS;
 	}
 
+	/* some other indication was present - non recoverable */
+	DP_INFO(p_hwfn, "DORQ fatal attention\n");
+
 	return ECORE_INVAL;
 }
 
@@ -851,32 +955,38 @@ ecore_int_deassertion_aeu_bit(struct ecore_hwfn *p_hwfn,
  * @brief ecore_int_deassertion_parity - handle a single parity AEU source
  *
  * @param p_hwfn
- * @param p_aeu - descriptor of an AEU bit which caused the
- *              parity
+ * @param p_aeu - descriptor of an AEU bit which caused the parity
+ * @param aeu_en_reg - address of the AEU enable register
  * @param bit_index
  */
 static void ecore_int_deassertion_parity(struct ecore_hwfn *p_hwfn,
 					 struct aeu_invert_reg_bit *p_aeu,
-					 u8 bit_index)
+					 u32 aeu_en_reg, u8 bit_index)
 {
-	u32 block_id = p_aeu->block_index;
+	u32 block_id = p_aeu->block_index, mask, val;
 
-	DP_INFO(p_hwfn->p_dev, "%s[%d] parity attention is set\n",
-		p_aeu->bit_name, bit_index);
-
-	if (block_id == MAX_BLOCK_ID)
-		return;
+	DP_NOTICE(p_hwfn->p_dev, false,
+		  "%s parity attention is set [address 0x%08x, bit %d]\n",
+		  p_aeu->bit_name, aeu_en_reg, bit_index);
 
-	ecore_int_attn_print(p_hwfn, block_id,
-			     ATTN_TYPE_PARITY, false);
+	if (block_id != MAX_BLOCK_ID) {
+		ecore_int_attn_print(p_hwfn, block_id, ATTN_TYPE_PARITY, false);
 
-	/* In A0, there's a single parity bit for several blocks */
-	if (block_id == BLOCK_BTB) {
-		ecore_int_attn_print(p_hwfn, BLOCK_OPTE,
-				     ATTN_TYPE_PARITY, false);
-		ecore_int_attn_print(p_hwfn, BLOCK_MCP,
-				     ATTN_TYPE_PARITY, false);
+		/* In A0, there's a single parity bit for several blocks */
+		if (block_id == BLOCK_BTB) {
+			ecore_int_attn_print(p_hwfn, BLOCK_OPTE,
+					     ATTN_TYPE_PARITY, false);
+			ecore_int_attn_print(p_hwfn, BLOCK_MCP,
+					     ATTN_TYPE_PARITY, false);
+		}
 	}
+
+	/* Prevent this parity error from being re-asserted */
+	mask = ~(0x1 << bit_index);
+	val = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt, aeu_en_reg);
+	ecore_wr(p_hwfn, p_hwfn->p_dpc_ptt, aeu_en_reg, val & mask);
+	DP_INFO(p_hwfn, "`%s' - Disabled future parity errors\n",
+		p_aeu->bit_name);
 }
 
 /**
@@ -891,8 +1001,7 @@ static enum _ecore_status_t ecore_int_deassertion(struct ecore_hwfn *p_hwfn,
 						  u16 deasserted_bits)
 {
 	struct ecore_sb_attn_info *sb_attn_sw = p_hwfn->p_sb_attn;
-	u32 aeu_inv_arr[NUM_ATTN_REGS], aeu_mask;
-	bool b_parity = false;
+	u32 aeu_inv_arr[NUM_ATTN_REGS], aeu_mask, aeu_en, en;
 	u8 i, j, k, bit_idx;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 
@@ -908,11 +1017,11 @@ static enum _ecore_status_t ecore_int_deassertion(struct ecore_hwfn *p_hwfn,
 	/* Handle parity attentions first */
 	for (i = 0; i < NUM_ATTN_REGS; i++) {
 		struct aeu_invert_reg *p_aeu = &sb_attn_sw->p_aeu_desc[i];
-		u32 en = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-				  MISC_REG_AEU_ENABLE1_IGU_OUT_0 +
-				  i * sizeof(u32));
+		u32 parities;
 
-		u32 parities = sb_attn_sw->parity_mask[i] & aeu_inv_arr[i] & en;
+		aeu_en = MISC_REG_AEU_ENABLE1_IGU_OUT_0 + i * sizeof(u32);
+		en = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt, aeu_en);
+		parities = sb_attn_sw->parity_mask[i] & aeu_inv_arr[i] & en;
 
 		/* Skip register in which no parity bit is currently set */
 		if (!parities)
@@ -922,11 +1031,9 @@ static enum _ecore_status_t ecore_int_deassertion(struct ecore_hwfn *p_hwfn,
 			struct aeu_invert_reg_bit *p_bit = &p_aeu->bits[j];
 
 			if (ecore_int_is_parity_flag(p_hwfn, p_bit) &&
-			    !!(parities & (1 << bit_idx))) {
+			    !!(parities & (1 << bit_idx)))
 				ecore_int_deassertion_parity(p_hwfn, p_bit,
-							     bit_idx);
-				b_parity = true;
-			}
+							     aeu_en, bit_idx);
 
 			bit_idx += ATTENTION_LENGTH(p_bit->flags);
 		}
@@ -941,10 +1048,13 @@ static enum _ecore_status_t ecore_int_deassertion(struct ecore_hwfn *p_hwfn,
 			continue;
 
 		for (i = 0; i < NUM_ATTN_REGS; i++) {
-			u32 aeu_en = MISC_REG_AEU_ENABLE1_IGU_OUT_0 +
-			    i * sizeof(u32) + k * sizeof(u32) * NUM_ATTN_REGS;
-			u32 en = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt, aeu_en);
-			u32 bits = aeu_inv_arr[i] & en;
+			u32 bits;
+
+			aeu_en = MISC_REG_AEU_ENABLE1_IGU_OUT_0 +
+				 i * sizeof(u32) +
+				 k * sizeof(u32) * NUM_ATTN_REGS;
+			en = ecore_rd(p_hwfn, p_hwfn->p_dpc_ptt, aeu_en);
+			bits = aeu_inv_arr[i] & en;
 
 			/* Skip if no bit from this group is currently set */
 			if (!bits)
@@ -1369,6 +1479,49 @@ void ecore_init_cau_sb_entry(struct ecore_hwfn *p_hwfn,
 	SET_FIELD(p_sb_entry->data, CAU_SB_ENTRY_STATE1, cau_state);
 }
 
+static void _ecore_int_cau_conf_pi(struct ecore_hwfn *p_hwfn,
+				   struct ecore_ptt *p_ptt,
+				   u16 igu_sb_id, u32 pi_index,
+				   enum ecore_coalescing_fsm coalescing_fsm,
+				   u8 timeset)
+{
+	struct cau_pi_entry pi_entry;
+	u32 sb_offset, pi_offset;
+
+	if (IS_VF(p_hwfn->p_dev))
+		return;/* @@@TBD MichalK- VF CAU... */
+
+	sb_offset = igu_sb_id * PIS_PER_SB_E4;
+	OSAL_MEMSET(&pi_entry, 0, sizeof(struct cau_pi_entry));
+
+	SET_FIELD(pi_entry.prod, CAU_PI_ENTRY_PI_TIMESET, timeset);
+	if (coalescing_fsm == ECORE_COAL_RX_STATE_MACHINE)
+		SET_FIELD(pi_entry.prod, CAU_PI_ENTRY_FSM_SEL, 0);
+	else
+		SET_FIELD(pi_entry.prod, CAU_PI_ENTRY_FSM_SEL, 1);
+
+	pi_offset = sb_offset + pi_index;
+	if (p_hwfn->hw_init_done) {
+		ecore_wr(p_hwfn, p_ptt,
+			 CAU_REG_PI_MEMORY + pi_offset * sizeof(u32),
+			 *((u32 *)&(pi_entry)));
+	} else {
+		STORE_RT_REG(p_hwfn,
+			     CAU_REG_PI_MEMORY_RT_OFFSET + pi_offset,
+			     *((u32 *)&(pi_entry)));
+	}
+}
+
+void ecore_int_cau_conf_pi(struct ecore_hwfn *p_hwfn,
+			   struct ecore_ptt *p_ptt,
+			   struct ecore_sb_info *p_sb, u32 pi_index,
+			   enum ecore_coalescing_fsm coalescing_fsm,
+			   u8 timeset)
+{
+	_ecore_int_cau_conf_pi(p_hwfn, p_ptt, p_sb->igu_sb_id,
+			       pi_index, coalescing_fsm, timeset);
+}
+
 void ecore_int_cau_conf_sb(struct ecore_hwfn *p_hwfn,
 			   struct ecore_ptt *p_ptt,
 			   dma_addr_t sb_phys, u16 igu_sb_id,
@@ -1420,8 +1573,9 @@ void ecore_int_cau_conf_sb(struct ecore_hwfn *p_hwfn,
 		else
 			timer_res = 2;
 		timeset = (u8)(p_hwfn->p_dev->rx_coalesce_usecs >> timer_res);
-		ecore_int_cau_conf_pi(p_hwfn, p_ptt, igu_sb_id, RX_PI,
-				      ECORE_COAL_RX_STATE_MACHINE, timeset);
+		_ecore_int_cau_conf_pi(p_hwfn, p_ptt, igu_sb_id, RX_PI,
+				       ECORE_COAL_RX_STATE_MACHINE,
+				       timeset);
 
 		if (p_hwfn->p_dev->tx_coalesce_usecs <= 0x7F)
 			timer_res = 0;
@@ -1431,46 +1585,14 @@ void ecore_int_cau_conf_sb(struct ecore_hwfn *p_hwfn,
 			timer_res = 2;
 		timeset = (u8)(p_hwfn->p_dev->tx_coalesce_usecs >> timer_res);
 		for (i = 0; i < num_tc; i++) {
-			ecore_int_cau_conf_pi(p_hwfn, p_ptt,
-					      igu_sb_id, TX_PI(i),
-					      ECORE_COAL_TX_STATE_MACHINE,
-					      timeset);
+			_ecore_int_cau_conf_pi(p_hwfn, p_ptt,
+					       igu_sb_id, TX_PI(i),
+					       ECORE_COAL_TX_STATE_MACHINE,
+					       timeset);
 		}
 	}
 }
 
-void ecore_int_cau_conf_pi(struct ecore_hwfn *p_hwfn,
-			   struct ecore_ptt *p_ptt,
-			   u16 igu_sb_id, u32 pi_index,
-			   enum ecore_coalescing_fsm coalescing_fsm, u8 timeset)
-{
-	struct cau_pi_entry pi_entry;
-	u32 sb_offset, pi_offset;
-
-	if (IS_VF(p_hwfn->p_dev))
-		return;		/* @@@TBD MichalK- VF CAU... */
-
-	sb_offset = igu_sb_id * PIS_PER_SB;
-	OSAL_MEMSET(&pi_entry, 0, sizeof(struct cau_pi_entry));
-
-	SET_FIELD(pi_entry.prod, CAU_PI_ENTRY_PI_TIMESET, timeset);
-	if (coalescing_fsm == ECORE_COAL_RX_STATE_MACHINE)
-		SET_FIELD(pi_entry.prod, CAU_PI_ENTRY_FSM_SEL, 0);
-	else
-		SET_FIELD(pi_entry.prod, CAU_PI_ENTRY_FSM_SEL, 1);
-
-	pi_offset = sb_offset + pi_index;
-	if (p_hwfn->hw_init_done) {
-		ecore_wr(p_hwfn, p_ptt,
-			 CAU_REG_PI_MEMORY + pi_offset * sizeof(u32),
-			 *((u32 *)&(pi_entry)));
-	} else {
-		STORE_RT_REG(p_hwfn,
-			     CAU_REG_PI_MEMORY_RT_OFFSET + pi_offset,
-			     *((u32 *)&(pi_entry)));
-	}
-}
-
 void ecore_int_sb_setup(struct ecore_hwfn *p_hwfn,
 			struct ecore_ptt *p_ptt, struct ecore_sb_info *sb_info)
 {
@@ -1483,16 +1605,50 @@ void ecore_int_sb_setup(struct ecore_hwfn *p_hwfn,
 				      sb_info->igu_sb_id, 0, 0);
 }
 
-/**
- * @brief ecore_get_igu_sb_id - given a sw sb_id return the
- *        igu_sb_id
- *
- * @param p_hwfn
- * @param sb_id
- *
- * @return u16
- */
-static u16 ecore_get_igu_sb_id(struct ecore_hwfn *p_hwfn, u16 sb_id)
+struct ecore_igu_block *
+ecore_get_igu_free_sb(struct ecore_hwfn *p_hwfn, bool b_is_pf)
+{
+	struct ecore_igu_block *p_block;
+	u16 igu_id;
+
+	for (igu_id = 0; igu_id < ECORE_MAPPING_MEMORY_SIZE(p_hwfn->p_dev);
+	     igu_id++) {
+		p_block = &p_hwfn->hw_info.p_igu_info->entry[igu_id];
+
+		if (!(p_block->status & ECORE_IGU_STATUS_VALID) ||
+		    !(p_block->status & ECORE_IGU_STATUS_FREE))
+			continue;
+
+		if (!!(p_block->status & ECORE_IGU_STATUS_PF) ==
+		    b_is_pf)
+			return p_block;
+	}
+
+	return OSAL_NULL;
+}
+
+static u16 ecore_get_pf_igu_sb_id(struct ecore_hwfn *p_hwfn,
+				  u16 vector_id)
+{
+	struct ecore_igu_block *p_block;
+	u16 igu_id;
+
+	for (igu_id = 0; igu_id < ECORE_MAPPING_MEMORY_SIZE(p_hwfn->p_dev);
+	     igu_id++) {
+		p_block = &p_hwfn->hw_info.p_igu_info->entry[igu_id];
+
+		if (!(p_block->status & ECORE_IGU_STATUS_VALID) ||
+		    !p_block->is_pf ||
+		    p_block->vector_number != vector_id)
+			continue;
+
+		return igu_id;
+	}
+
+	return ECORE_SB_INVALID_IDX;
+}
+
+u16 ecore_get_igu_sb_id(struct ecore_hwfn *p_hwfn, u16 sb_id)
 {
 	u16 igu_sb_id;
 
@@ -1500,11 +1656,15 @@ static u16 ecore_get_igu_sb_id(struct ecore_hwfn *p_hwfn, u16 sb_id)
 	if (sb_id == ECORE_SP_SB_ID)
 		igu_sb_id = p_hwfn->hw_info.p_igu_info->igu_dsb_id;
 	else if (IS_PF(p_hwfn->p_dev))
-		igu_sb_id = sb_id + p_hwfn->hw_info.p_igu_info->igu_base_sb;
+		igu_sb_id = ecore_get_pf_igu_sb_id(p_hwfn, sb_id + 1);
 	else
 		igu_sb_id = ecore_vf_get_igu_sb_id(p_hwfn, sb_id);
 
-	if (sb_id == ECORE_SP_SB_ID)
+	if (igu_sb_id == ECORE_SB_INVALID_IDX)
+		DP_NOTICE(p_hwfn, true,
+			  "Slowpath SB vector %04x doesn't exist\n",
+			  sb_id);
+	else if (sb_id == ECORE_SP_SB_ID)
 		DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
 			   "Slowpath SB index in IGU is 0x%04x\n", igu_sb_id);
 	else
@@ -1525,9 +1685,24 @@ enum _ecore_status_t ecore_int_sb_init(struct ecore_hwfn *p_hwfn,
 
 	sb_info->igu_sb_id = ecore_get_igu_sb_id(p_hwfn, sb_id);
 
+	if (sb_info->igu_sb_id == ECORE_SB_INVALID_IDX)
+		return ECORE_INVAL;
+
+	/* Let the igu info reference the client's SB info */
 	if (sb_id != ECORE_SP_SB_ID) {
-		p_hwfn->sbs_info[sb_id] = sb_info;
-		p_hwfn->num_sbs++;
+		if (IS_PF(p_hwfn->p_dev)) {
+			struct ecore_igu_info *p_info;
+			struct ecore_igu_block *p_block;
+
+			p_info = p_hwfn->hw_info.p_igu_info;
+			p_block = &p_info->entry[sb_info->igu_sb_id];
+
+			p_block->sb_info = sb_info;
+			p_block->status &= ~ECORE_IGU_STATUS_FREE;
+			p_info->usage.free_cnt--;
+		} else {
+			ecore_vf_set_sb_info(p_hwfn, sb_id, sb_info);
+		}
 	}
 #ifdef ECORE_CONFIG_DIRECT_HWFN
 	sb_info->p_hwfn = p_hwfn;
@@ -1559,20 +1734,35 @@ enum _ecore_status_t ecore_int_sb_release(struct ecore_hwfn *p_hwfn,
 					  struct ecore_sb_info *sb_info,
 					  u16 sb_id)
 {
-	if (sb_id == ECORE_SP_SB_ID) {
-		DP_ERR(p_hwfn, "Do Not free sp sb using this function");
-		return ECORE_INVAL;
-	}
+	struct ecore_igu_info *p_info;
+	struct ecore_igu_block *p_block;
+
+	if (sb_info == OSAL_NULL)
+		return ECORE_SUCCESS;
 
 	/* zero status block and ack counter */
 	sb_info->sb_ack = 0;
 	OSAL_MEMSET(sb_info->sb_virt, 0, sizeof(*sb_info->sb_virt));
 
-	if (p_hwfn->sbs_info[sb_id] != OSAL_NULL) {
-		p_hwfn->sbs_info[sb_id] = OSAL_NULL;
-		p_hwfn->num_sbs--;
+	if (IS_VF(p_hwfn->p_dev)) {
+		ecore_vf_set_sb_info(p_hwfn, sb_id, OSAL_NULL);
+		return ECORE_SUCCESS;
 	}
 
+	p_info = p_hwfn->hw_info.p_igu_info;
+	p_block = &p_info->entry[sb_info->igu_sb_id];
+
+	/* Vector 0 is reserved to Default SB */
+	if (p_block->vector_number == 0) {
+		DP_ERR(p_hwfn, "Do Not free sp sb using this function");
+		return ECORE_INVAL;
+	}
+
+	/* Lose reference to client's SB info, and fix counters */
+	p_block->sb_info = OSAL_NULL;
+	p_block->status |= ECORE_IGU_STATUS_FREE;
+	p_info->usage.free_cnt++;
+
 	return ECORE_SUCCESS;
 }
 
@@ -1735,15 +1925,6 @@ ecore_int_igu_enable(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 			  enum ecore_int_mode int_mode)
 {
 	enum _ecore_status_t rc = ECORE_SUCCESS;
-	u32 tmp;
-
-	/* @@@tmp - Starting with MFW 8.2.1.0 we've started hitting AVS stop
-	 * attentions. Since we're waiting for BRCM answer regarding this
-	 * attention, in the meanwhile we simply mask it.
-	 */
-	tmp = ecore_rd(p_hwfn, p_ptt, MISC_REG_AEU_ENABLE4_IGU_OUT_0);
-	tmp &= ~0x800;
-	ecore_wr(p_hwfn, p_ptt, MISC_REG_AEU_ENABLE4_IGU_OUT_0, tmp);
 
 	ecore_int_igu_enable_attn(p_hwfn, p_ptt);
 
@@ -1778,11 +1959,13 @@ void ecore_int_igu_disable_int(struct ecore_hwfn *p_hwfn,
 
 #define IGU_CLEANUP_SLEEP_LENGTH		(1000)
 static void ecore_int_igu_cleanup_sb(struct ecore_hwfn *p_hwfn,
-			      struct ecore_ptt *p_ptt,
-			      u32 sb_id, bool cleanup_set, u16 opaque_fid)
+				     struct ecore_ptt *p_ptt,
+				     u32 igu_sb_id,
+				     bool cleanup_set,
+				     u16 opaque_fid)
 {
 	u32 cmd_ctrl = 0, val = 0, sb_bit = 0, sb_bit_addr = 0, data = 0;
-	u32 pxp_addr = IGU_CMD_INT_ACK_BASE + sb_id;
+	u32 pxp_addr = IGU_CMD_INT_ACK_BASE + igu_sb_id;
 	u32 sleep_cnt = IGU_CLEANUP_SLEEP_LENGTH;
 	u8 type = 0;		/* FIXME MichalS type??? */
 
@@ -1813,8 +1996,8 @@ static void ecore_int_igu_cleanup_sb(struct ecore_hwfn *p_hwfn,
 	OSAL_MMIOWB(p_hwfn->p_dev);
 
 	/* calculate where to read the status bit from */
-	sb_bit = 1 << (sb_id % 32);
-	sb_bit_addr = sb_id / 32 * sizeof(u32);
+	sb_bit = 1 << (igu_sb_id % 32);
+	sb_bit_addr = igu_sb_id / 32 * sizeof(u32);
 
 	sb_bit_addr += IGU_REG_CLEANUP_STATUS_0 + (0x80 * type);
 
@@ -1829,21 +2012,28 @@ static void ecore_int_igu_cleanup_sb(struct ecore_hwfn *p_hwfn,
 	if (!sleep_cnt)
 		DP_NOTICE(p_hwfn, true,
 			  "Timeout waiting for clear status 0x%08x [for sb %d]\n",
-			  val, sb_id);
+			  val, igu_sb_id);
 }
 
 void ecore_int_igu_init_pure_rt_single(struct ecore_hwfn *p_hwfn,
 				       struct ecore_ptt *p_ptt,
-				       u32 sb_id, u16 opaque, bool b_set)
+				       u16 igu_sb_id, u16 opaque, bool b_set)
 {
+	struct ecore_igu_block *p_block;
 	int pi, i;
 
+	p_block = &p_hwfn->hw_info.p_igu_info->entry[igu_sb_id];
+	DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
+		   "Cleaning SB [%04x]: func_id= %d is_pf = %d vector_num = 0x%0x\n",
+		   igu_sb_id, p_block->function_id, p_block->is_pf,
+		   p_block->vector_number);
+
 	/* Set */
 	if (b_set)
-		ecore_int_igu_cleanup_sb(p_hwfn, p_ptt, sb_id, 1, opaque);
+		ecore_int_igu_cleanup_sb(p_hwfn, p_ptt, igu_sb_id, 1, opaque);
 
 	/* Clear */
-	ecore_int_igu_cleanup_sb(p_hwfn, p_ptt, sb_id, 0, opaque);
+	ecore_int_igu_cleanup_sb(p_hwfn, p_ptt, igu_sb_id, 0, opaque);
 
 	/* Wait for the IGU SB to cleanup */
 	for (i = 0; i < IGU_CLEANUP_SLEEP_LENGTH; i++) {
@@ -1851,8 +2041,8 @@ void ecore_int_igu_init_pure_rt_single(struct ecore_hwfn *p_hwfn,
 
 		val = ecore_rd(p_hwfn, p_ptt,
 			       IGU_REG_WRITE_DONE_PENDING +
-			       ((sb_id / 32) * 4));
-		if (val & (1 << (sb_id % 32)))
+			       ((igu_sb_id / 32) * 4));
+		if (val & (1 << (igu_sb_id % 32)))
 			OSAL_UDELAY(10);
 		else
 			break;
@@ -1860,21 +2050,22 @@ void ecore_int_igu_init_pure_rt_single(struct ecore_hwfn *p_hwfn,
 	if (i == IGU_CLEANUP_SLEEP_LENGTH)
 		DP_NOTICE(p_hwfn, true,
 			  "Failed SB[0x%08x] still appearing in WRITE_DONE_PENDING\n",
-			  sb_id);
+			  igu_sb_id);
 
 	/* Clear the CAU for the SB */
 	for (pi = 0; pi < 12; pi++)
 		ecore_wr(p_hwfn, p_ptt,
-			 CAU_REG_PI_MEMORY + (sb_id * 12 + pi) * 4, 0);
+			 CAU_REG_PI_MEMORY + (igu_sb_id * 12 + pi) * 4, 0);
 }
 
 void ecore_int_igu_init_pure_rt(struct ecore_hwfn *p_hwfn,
 				struct ecore_ptt *p_ptt,
 				bool b_set, bool b_slowpath)
 {
-	u32 igu_base_sb = p_hwfn->hw_info.p_igu_info->igu_base_sb;
-	u32 igu_sb_cnt = p_hwfn->hw_info.p_igu_info->igu_sb_cnt;
-	u32 sb_id = 0, val = 0;
+	struct ecore_igu_info *p_info = p_hwfn->hw_info.p_igu_info;
+	struct ecore_igu_block *p_block;
+	u16 igu_sb_id = 0;
+	u32 val = 0;
 
 	/* @@@TBD MichalK temporary... should be moved to init-tool... */
 	val = ecore_rd(p_hwfn, p_ptt, IGU_REG_BLOCK_CONFIGURATION);
@@ -1883,53 +2074,204 @@ void ecore_int_igu_init_pure_rt(struct ecore_hwfn *p_hwfn,
 	ecore_wr(p_hwfn, p_ptt, IGU_REG_BLOCK_CONFIGURATION, val);
 	/* end temporary */
 
-	DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
-		   "IGU cleaning SBs [%d,...,%d]\n",
-		   igu_base_sb, igu_base_sb + igu_sb_cnt - 1);
+	for (igu_sb_id = 0;
+	     igu_sb_id < ECORE_MAPPING_MEMORY_SIZE(p_hwfn->p_dev);
+	     igu_sb_id++) {
+		p_block = &p_info->entry[igu_sb_id];
 
-	for (sb_id = igu_base_sb; sb_id < igu_base_sb + igu_sb_cnt; sb_id++)
-		ecore_int_igu_init_pure_rt_single(p_hwfn, p_ptt, sb_id,
+		if (!(p_block->status & ECORE_IGU_STATUS_VALID) ||
+		    !p_block->is_pf ||
+		    (p_block->status & ECORE_IGU_STATUS_DSB))
+			continue;
+
+		ecore_int_igu_init_pure_rt_single(p_hwfn, p_ptt, igu_sb_id,
 						  p_hwfn->hw_info.opaque_fid,
 						  b_set);
+	}
 
-	if (!b_slowpath)
-		return;
+	if (b_slowpath)
+		ecore_int_igu_init_pure_rt_single(p_hwfn, p_ptt,
+						  p_info->igu_dsb_id,
+						  p_hwfn->hw_info.opaque_fid,
+						  b_set);
+}
 
-	sb_id = p_hwfn->hw_info.p_igu_info->igu_dsb_id;
-	DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
-		   "IGU cleaning slowpath SB [%d]\n", sb_id);
-	ecore_int_igu_init_pure_rt_single(p_hwfn, p_ptt, sb_id,
-					  p_hwfn->hw_info.opaque_fid, b_set);
+int ecore_int_igu_reset_cam(struct ecore_hwfn *p_hwfn,
+			    struct ecore_ptt *p_ptt)
+{
+	struct ecore_igu_info *p_info = p_hwfn->hw_info.p_igu_info;
+	struct ecore_igu_block *p_block;
+	int pf_sbs, vf_sbs;
+	u16 igu_sb_id;
+	u32 val, rval;
+
+	if (!RESC_NUM(p_hwfn, ECORE_SB)) {
+		/* We're using an old MFW - have to prevent any switching
+		 * of SBs between PF and VFs as later driver wouldn't be
+		 * able to tell which belongs to which.
+		 */
+		p_info->b_allow_pf_vf_change = false;
+	} else {
+		/* Use the numbers the MFW have provided -
+		 * don't forget MFW accounts for the default SB as well.
+		 */
+		p_info->b_allow_pf_vf_change = true;
+
+		if (p_info->usage.cnt != RESC_NUM(p_hwfn, ECORE_SB) - 1) {
+			DP_INFO(p_hwfn,
+				"MFW notifies of 0x%04x PF SBs; IGU indicates of only 0x%04x\n",
+				RESC_NUM(p_hwfn, ECORE_SB) - 1,
+				p_info->usage.cnt);
+			p_info->usage.cnt = RESC_NUM(p_hwfn, ECORE_SB) - 1;
+		}
+
+		/* TODO - how do we learn about VF SBs from MFW? */
+		if (IS_PF_SRIOV(p_hwfn)) {
+			u16 vfs = p_hwfn->p_dev->p_iov_info->total_vfs;
+
+			if (vfs != p_info->usage.iov_cnt)
+				DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
+					   "0x%04x VF SBs in IGU CAM != PCI configuration 0x%04x\n",
+					   p_info->usage.iov_cnt, vfs);
+
+			/* At this point we know how many SBs we have totally
+			 * in IGU + number of PF SBs. So we can validate that
+			 * we'd have sufficient for VF.
+			 */
+			if (vfs > p_info->usage.free_cnt +
+				  p_info->usage.free_cnt_iov -
+				  p_info->usage.cnt) {
+				DP_NOTICE(p_hwfn, true,
+					  "Not enough SBs for VFs - 0x%04x SBs, from which %04x PFs and %04x are required\n",
+					  p_info->usage.free_cnt +
+					  p_info->usage.free_cnt_iov,
+					  p_info->usage.cnt, vfs);
+				return ECORE_INVAL;
+			}
+		}
+	}
+
+	/* Cap the number of VFs SBs by the number of VFs */
+	if (IS_PF_SRIOV(p_hwfn))
+		p_info->usage.iov_cnt = p_hwfn->p_dev->p_iov_info->total_vfs;
+
+	/* Mark all SBs as free, now in the right PF/VFs division */
+	p_info->usage.free_cnt = p_info->usage.cnt;
+	p_info->usage.free_cnt_iov = p_info->usage.iov_cnt;
+	p_info->usage.orig = p_info->usage.cnt;
+	p_info->usage.iov_orig = p_info->usage.iov_cnt;
+
+	/* We now proceed to re-configure the IGU cam to reflect the initial
+	 * configuration. We can start with the Default SB.
+	 */
+	pf_sbs = p_info->usage.cnt;
+	vf_sbs = p_info->usage.iov_cnt;
+
+	for (igu_sb_id = p_info->igu_dsb_id;
+	     igu_sb_id < ECORE_MAPPING_MEMORY_SIZE(p_hwfn->p_dev);
+	     igu_sb_id++) {
+		p_block = &p_info->entry[igu_sb_id];
+		val = 0;
+
+		if (!(p_block->status & ECORE_IGU_STATUS_VALID))
+			continue;
+
+		if (p_block->status & ECORE_IGU_STATUS_DSB) {
+			p_block->function_id = p_hwfn->rel_pf_id;
+			p_block->is_pf = 1;
+			p_block->vector_number = 0;
+			p_block->status = ECORE_IGU_STATUS_VALID |
+					  ECORE_IGU_STATUS_PF |
+					  ECORE_IGU_STATUS_DSB;
+		} else if (pf_sbs) {
+			pf_sbs--;
+			p_block->function_id = p_hwfn->rel_pf_id;
+			p_block->is_pf = 1;
+			p_block->vector_number = p_info->usage.cnt - pf_sbs;
+			p_block->status = ECORE_IGU_STATUS_VALID |
+					  ECORE_IGU_STATUS_PF |
+					  ECORE_IGU_STATUS_FREE;
+		} else if (vf_sbs) {
+			p_block->function_id =
+				p_hwfn->p_dev->p_iov_info->first_vf_in_pf +
+				p_info->usage.iov_cnt - vf_sbs;
+			p_block->is_pf = 0;
+			p_block->vector_number = 0;
+			p_block->status = ECORE_IGU_STATUS_VALID |
+					  ECORE_IGU_STATUS_FREE;
+			vf_sbs--;
+		} else {
+			p_block->function_id = 0;
+			p_block->is_pf = 0;
+			p_block->vector_number = 0;
+		}
+
+		SET_FIELD(val, IGU_MAPPING_LINE_FUNCTION_NUMBER,
+			  p_block->function_id);
+		SET_FIELD(val, IGU_MAPPING_LINE_PF_VALID, p_block->is_pf);
+		SET_FIELD(val, IGU_MAPPING_LINE_VECTOR_NUMBER,
+			  p_block->vector_number);
+
+		/* VF entries would be enabled when VF is initializaed */
+		SET_FIELD(val, IGU_MAPPING_LINE_VALID, p_block->is_pf);
+
+		rval = ecore_rd(p_hwfn, p_ptt,
+				IGU_REG_MAPPING_MEMORY +
+				sizeof(u32) * igu_sb_id);
+
+		if (rval != val) {
+			ecore_wr(p_hwfn, p_ptt,
+				 IGU_REG_MAPPING_MEMORY +
+				 sizeof(u32) * igu_sb_id,
+				 val);
+
+			DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
+				   "IGU reset: [SB 0x%04x] func_id = %d is_pf = %d vector_num = 0x%x [%08x -> %08x]\n",
+				   igu_sb_id, p_block->function_id,
+				   p_block->is_pf, p_block->vector_number,
+				   rval, val);
+		}
+	}
+
+	return 0;
+}
+
+int ecore_int_igu_reset_cam_default(struct ecore_hwfn *p_hwfn,
+				    struct ecore_ptt *p_ptt)
+{
+	struct ecore_sb_cnt_info *p_cnt = &p_hwfn->hw_info.p_igu_info->usage;
+
+	/* Return all the usage indications to default prior to the reset;
+	 * The reset expects the !orig to reflect the initial status of the
+	 * SBs, and would re-calculate the originals based on those.
+	 */
+	p_cnt->cnt = p_cnt->orig;
+	p_cnt->free_cnt = p_cnt->orig;
+	p_cnt->iov_cnt = p_cnt->iov_orig;
+	p_cnt->free_cnt_iov = p_cnt->iov_orig;
+	p_cnt->orig = 0;
+	p_cnt->iov_orig = 0;
+
+	/* TODO - we probably need to re-configure the CAU as well... */
+	return ecore_int_igu_reset_cam(p_hwfn, p_ptt);
 }
 
-static u32 ecore_int_igu_read_cam_block(struct ecore_hwfn *p_hwfn,
-					struct ecore_ptt *p_ptt, u16 sb_id)
+static void ecore_int_igu_read_cam_block(struct ecore_hwfn *p_hwfn,
+					 struct ecore_ptt *p_ptt,
+					 u16 igu_sb_id)
 {
 	u32 val = ecore_rd(p_hwfn, p_ptt,
-			   IGU_REG_MAPPING_MEMORY + sizeof(u32) * sb_id);
+			   IGU_REG_MAPPING_MEMORY + sizeof(u32) * igu_sb_id);
 	struct ecore_igu_block *p_block;
 
-	p_block = &p_hwfn->hw_info.p_igu_info->igu_map.igu_blocks[sb_id];
-
-	/* stop scanning when hit first invalid PF entry */
-	if (!GET_FIELD(val, IGU_MAPPING_LINE_VALID) &&
-	    GET_FIELD(val, IGU_MAPPING_LINE_PF_VALID))
-		goto out;
+	p_block = &p_hwfn->hw_info.p_igu_info->entry[igu_sb_id];
 
 	/* Fill the block information */
-	p_block->status = ECORE_IGU_STATUS_VALID;
 	p_block->function_id = GET_FIELD(val, IGU_MAPPING_LINE_FUNCTION_NUMBER);
 	p_block->is_pf = GET_FIELD(val, IGU_MAPPING_LINE_PF_VALID);
 	p_block->vector_number = GET_FIELD(val, IGU_MAPPING_LINE_VECTOR_NUMBER);
 
-	DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
-		   "IGU_BLOCK: [SB 0x%04x, Value in CAM 0x%08x] func_id = %d"
-		   " is_pf = %d vector_num = 0x%x\n",
-		   sb_id, val, p_block->function_id, p_block->is_pf,
-		   p_block->vector_number);
-
-out:
-	return val;
+	p_block->igu_sb_id = igu_sb_id;
 }
 
 enum _ecore_status_t ecore_int_igu_read_cam(struct ecore_hwfn *p_hwfn,
@@ -1937,140 +2279,217 @@ enum _ecore_status_t ecore_int_igu_read_cam(struct ecore_hwfn *p_hwfn,
 {
 	struct ecore_igu_info *p_igu_info;
 	struct ecore_igu_block *p_block;
-	u32 min_vf = 0, max_vf = 0, val;
-	u16 sb_id, last_iov_sb_id = 0;
-	u16 prev_sb_id = 0xFF;
+	u32 min_vf = 0, max_vf = 0;
+	u16 igu_sb_id;
 
-	p_hwfn->hw_info.p_igu_info = OSAL_ALLOC(p_hwfn->p_dev,
-						GFP_KERNEL,
-						sizeof(*p_igu_info));
+	p_hwfn->hw_info.p_igu_info = OSAL_ZALLOC(p_hwfn->p_dev,
+						 GFP_KERNEL,
+						 sizeof(*p_igu_info));
 	if (!p_hwfn->hw_info.p_igu_info)
 		return ECORE_NOMEM;
-
-	OSAL_MEMSET(p_hwfn->hw_info.p_igu_info, 0, sizeof(*p_igu_info));
-
 	p_igu_info = p_hwfn->hw_info.p_igu_info;
 
-	/* Initialize base sb / sb cnt for PFs and VFs */
-	p_igu_info->igu_base_sb = 0xffff;
-	p_igu_info->igu_sb_cnt = 0;
-	p_igu_info->igu_dsb_id = 0xffff;
-	p_igu_info->igu_base_sb_iov = 0xffff;
+	/* Distinguish between existent and onn-existent default SB */
+	p_igu_info->igu_dsb_id = ECORE_SB_INVALID_IDX;
 
+	/* Find the range of VF ids whose SB belong to this PF */
 	if (p_hwfn->p_dev->p_iov_info) {
 		struct ecore_hw_sriov_info *p_iov = p_hwfn->p_dev->p_iov_info;
 
 		min_vf = p_iov->first_vf_in_pf;
 		max_vf = p_iov->first_vf_in_pf + p_iov->total_vfs;
 	}
-	for (sb_id = 0;
-	     sb_id < ECORE_MAPPING_MEMORY_SIZE(p_hwfn->p_dev);
-	     sb_id++) {
-		p_block = &p_igu_info->igu_map.igu_blocks[sb_id];
-		val = ecore_int_igu_read_cam_block(p_hwfn, p_ptt, sb_id);
-		if (!GET_FIELD(val, IGU_MAPPING_LINE_VALID) &&
-		    GET_FIELD(val, IGU_MAPPING_LINE_PF_VALID))
-			break;
 
-		if (p_block->is_pf) {
-			if (p_block->function_id == p_hwfn->rel_pf_id) {
-				p_block->status |= ECORE_IGU_STATUS_PF;
-
-				if (p_block->vector_number == 0) {
-					if (p_igu_info->igu_dsb_id == 0xffff)
-						p_igu_info->igu_dsb_id = sb_id;
-				} else {
-					if (p_igu_info->igu_base_sb == 0xffff) {
-						p_igu_info->igu_base_sb = sb_id;
-					} else if (prev_sb_id != sb_id - 1) {
-						DP_NOTICE(p_hwfn->p_dev, false,
-							  "consecutive igu"
-							  " vectors for HWFN"
-							  " %x broken",
-							  p_hwfn->rel_pf_id);
-						break;
-					}
-					prev_sb_id = sb_id;
-					/* we don't count the default */
-					(p_igu_info->igu_sb_cnt)++;
-				}
-			}
-		} else {
-			if ((p_block->function_id >= min_vf) &&
-			    (p_block->function_id < max_vf)) {
-				/* Available for VFs of this PF */
-				if (p_igu_info->igu_base_sb_iov == 0xffff) {
-					p_igu_info->igu_base_sb_iov = sb_id;
-				} else if (last_iov_sb_id != sb_id - 1) {
-					if (!val)
-						DP_VERBOSE(p_hwfn->p_dev,
-							   ECORE_MSG_INTR,
-							   "First uninited IGU"
-							   " CAM entry at"
-							   " index 0x%04x\n",
-							   sb_id);
-					else
-						DP_NOTICE(p_hwfn->p_dev, false,
-							  "Consecutive igu"
-							  " vectors for HWFN"
-							  " %x vfs is broken"
-							  " [jumps from %04x"
-							  " to %04x]\n",
-							  p_hwfn->rel_pf_id,
-							  last_iov_sb_id,
-							  sb_id);
-					break;
-				}
-				p_block->status |= ECORE_IGU_STATUS_FREE;
-				p_hwfn->hw_info.p_igu_info->free_blks++;
-				last_iov_sb_id = sb_id;
-			}
+	for (igu_sb_id = 0;
+	     igu_sb_id < ECORE_MAPPING_MEMORY_SIZE(p_hwfn->p_dev);
+	     igu_sb_id++) {
+		/* Read current entry; Notice it might not belong to this PF */
+		ecore_int_igu_read_cam_block(p_hwfn, p_ptt, igu_sb_id);
+		p_block = &p_igu_info->entry[igu_sb_id];
+
+		if ((p_block->is_pf) &&
+		    (p_block->function_id == p_hwfn->rel_pf_id)) {
+			p_block->status = ECORE_IGU_STATUS_PF |
+					  ECORE_IGU_STATUS_VALID |
+					  ECORE_IGU_STATUS_FREE;
+
+			if (p_igu_info->igu_dsb_id != ECORE_SB_INVALID_IDX)
+				p_igu_info->usage.cnt++;
+		} else if (!(p_block->is_pf) &&
+			   (p_block->function_id >= min_vf) &&
+			   (p_block->function_id < max_vf)) {
+			/* Available for VFs of this PF */
+			p_block->status = ECORE_IGU_STATUS_VALID |
+					  ECORE_IGU_STATUS_FREE;
+
+			if (p_igu_info->igu_dsb_id != ECORE_SB_INVALID_IDX)
+				p_igu_info->usage.iov_cnt++;
+		}
+
+		/* Mark the First entry belonging to the PF or its VFs
+		 * as the default SB [we'll reset IGU prior to first usage].
+		 */
+		if ((p_block->status & ECORE_IGU_STATUS_VALID) &&
+		    (p_igu_info->igu_dsb_id == ECORE_SB_INVALID_IDX)) {
+			p_igu_info->igu_dsb_id = igu_sb_id;
+			p_block->status |= ECORE_IGU_STATUS_DSB;
 		}
+
+		/* While this isn't suitable for all clients, limit number
+		 * of prints by having each PF print only its entries with the
+		 * exception of PF0 which would print everything.
+		 */
+		if ((p_block->status & ECORE_IGU_STATUS_VALID) ||
+		    (p_hwfn->abs_pf_id == 0))
+			DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
+				   "IGU_BLOCK: [SB 0x%04x] func_id = %d is_pf = %d vector_num = 0x%x\n",
+				   igu_sb_id, p_block->function_id,
+				   p_block->is_pf, p_block->vector_number);
+	}
+
+	if (p_igu_info->igu_dsb_id == ECORE_SB_INVALID_IDX) {
+		DP_NOTICE(p_hwfn, true,
+			  "IGU CAM returned invalid values igu_dsb_id=0x%x\n",
+			  p_igu_info->igu_dsb_id);
+		return ECORE_INVAL;
 	}
 
-	/* There's a possibility the igu_sb_cnt_iov doesn't properly reflect
-	 * the number of VF SBs [especially for first VF on engine, as we can't
-	 * diffrentiate between empty entries and its entries].
-	 * Since we don't really support more SBs than VFs today, prevent any
-	 * such configuration by sanitizing the number of SBs to equal the
-	 * number of VFs.
+	/* All non default SB are considered free at this point */
+	p_igu_info->usage.free_cnt = p_igu_info->usage.cnt;
+	p_igu_info->usage.free_cnt_iov = p_igu_info->usage.iov_cnt;
+
+	DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
+		   "igu_dsb_id=0x%x, num Free SBs - PF: %04x VF: %04x [might change after resource allocation]\n",
+		   p_igu_info->igu_dsb_id, p_igu_info->usage.cnt,
+		   p_igu_info->usage.iov_cnt);
+
+	return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t
+ecore_int_igu_relocate_sb(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			  u16 sb_id, bool b_to_vf)
+{
+	struct ecore_igu_info *p_info = p_hwfn->hw_info.p_igu_info;
+	struct ecore_igu_block *p_block = OSAL_NULL;
+	u16 igu_sb_id = 0, vf_num = 0;
+	u32 val = 0;
+
+	if (IS_VF(p_hwfn->p_dev) || !IS_PF_SRIOV(p_hwfn))
+		return ECORE_INVAL;
+
+	if (sb_id == ECORE_SP_SB_ID)
+		return ECORE_INVAL;
+
+	if (!p_info->b_allow_pf_vf_change) {
+		DP_INFO(p_hwfn, "Can't relocate SBs as MFW is too old.\n");
+		return ECORE_INVAL;
+	}
+
+	/* If we're moving a SB from PF to VF, the client had to specify
+	 * which vector it wants to move.
 	 */
-	if (IS_PF_SRIOV(p_hwfn)) {
-		u16 total_vfs = p_hwfn->p_dev->p_iov_info->total_vfs;
-
-		if (total_vfs < p_igu_info->free_blks) {
-			DP_VERBOSE(p_hwfn, (ECORE_MSG_INTR | ECORE_MSG_IOV),
-				   "Limiting number of SBs for IOV - %04x --> %04x\n",
-				   p_igu_info->free_blks,
-				   p_hwfn->p_dev->p_iov_info->total_vfs);
-			p_igu_info->free_blks = total_vfs;
-		} else if (total_vfs > p_igu_info->free_blks) {
-			DP_NOTICE(p_hwfn, true,
-				  "IGU has only %04x SBs for VFs while the device has %04x VFs\n",
-				  p_igu_info->free_blks, total_vfs);
+	if (b_to_vf) {
+		igu_sb_id = ecore_get_pf_igu_sb_id(p_hwfn, sb_id + 1);
+		if (igu_sb_id == ECORE_SB_INVALID_IDX)
 			return ECORE_INVAL;
-		}
 	}
 
-	p_igu_info->igu_sb_cnt_iov = p_igu_info->free_blks;
+	/* If we're moving a SB from VF to PF, need to validate there isn't
+	 * already a line configured for that vector.
+	 */
+	if (!b_to_vf) {
+		if (ecore_get_pf_igu_sb_id(p_hwfn, sb_id + 1) !=
+		    ECORE_SB_INVALID_IDX)
+			return ECORE_INVAL;
+	}
 
-	DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
-		   "IGU igu_base_sb=0x%x [IOV 0x%x] igu_sb_cnt=%d [IOV 0x%x] "
-		   "igu_dsb_id=0x%x\n",
-		   p_igu_info->igu_base_sb, p_igu_info->igu_base_sb_iov,
-		   p_igu_info->igu_sb_cnt, p_igu_info->igu_sb_cnt_iov,
-		   p_igu_info->igu_dsb_id);
-
-	if (p_igu_info->igu_base_sb == 0xffff ||
-	    p_igu_info->igu_dsb_id == 0xffff || p_igu_info->igu_sb_cnt == 0) {
-		DP_NOTICE(p_hwfn, true,
-			  "IGU CAM returned invalid values igu_base_sb=0x%x "
-			  "igu_sb_cnt=%d igu_dsb_id=0x%x\n",
-			  p_igu_info->igu_base_sb, p_igu_info->igu_sb_cnt,
-			  p_igu_info->igu_dsb_id);
+	/* We need to validate that the SB can actually be relocated.
+	 * This would also handle the previous case where we've explicitly
+	 * stated which IGU SB needs to move.
+	 */
+	for (; igu_sb_id < ECORE_MAPPING_MEMORY_SIZE(p_hwfn->p_dev);
+	     igu_sb_id++) {
+		p_block = &p_info->entry[igu_sb_id];
+
+		if (!(p_block->status & ECORE_IGU_STATUS_VALID) ||
+		    !(p_block->status & ECORE_IGU_STATUS_FREE) ||
+		    (!!(p_block->status & ECORE_IGU_STATUS_PF) != b_to_vf)) {
+			if (b_to_vf)
+				return ECORE_INVAL;
+			else
+				continue;
+		}
+
+		break;
+	}
+
+	if (igu_sb_id == ECORE_MAPPING_MEMORY_SIZE(p_hwfn->p_dev)) {
+		DP_VERBOSE(p_hwfn, (ECORE_MSG_INTR | ECORE_MSG_IOV),
+			   "Failed to find a free SB to move\n");
 		return ECORE_INVAL;
 	}
 
+	/* At this point, p_block points to the SB we want to relocate */
+	if (b_to_vf) {
+		p_block->status &= ~ECORE_IGU_STATUS_PF;
+
+		/* It doesn't matter which VF number we choose, since we're
+		 * going to disable the line; But let's keep it in range.
+		 */
+		vf_num = (u16)p_hwfn->p_dev->p_iov_info->first_vf_in_pf;
+
+		p_block->function_id = (u8)vf_num;
+		p_block->is_pf = 0;
+		p_block->vector_number = 0;
+
+		p_info->usage.cnt--;
+		p_info->usage.free_cnt--;
+		p_info->usage.iov_cnt++;
+		p_info->usage.free_cnt_iov++;
+
+		/* TODO - if SBs aren't really the limiting factor,
+		 * then it might not be accurate [in the since that
+		 * we might not need decrement the feature].
+		 */
+		p_hwfn->hw_info.feat_num[ECORE_PF_L2_QUE]--;
+		p_hwfn->hw_info.feat_num[ECORE_VF_L2_QUE]++;
+	} else {
+		p_block->status |= ECORE_IGU_STATUS_PF;
+		p_block->function_id = p_hwfn->rel_pf_id;
+		p_block->is_pf = 1;
+		p_block->vector_number = sb_id + 1;
+
+		p_info->usage.cnt++;
+		p_info->usage.free_cnt++;
+		p_info->usage.iov_cnt--;
+		p_info->usage.free_cnt_iov--;
+
+		p_hwfn->hw_info.feat_num[ECORE_PF_L2_QUE]++;
+		p_hwfn->hw_info.feat_num[ECORE_VF_L2_QUE]--;
+	}
+
+	/* Update the IGU and CAU with the new configuration */
+	SET_FIELD(val, IGU_MAPPING_LINE_FUNCTION_NUMBER,
+		  p_block->function_id);
+	SET_FIELD(val, IGU_MAPPING_LINE_PF_VALID, p_block->is_pf);
+	SET_FIELD(val, IGU_MAPPING_LINE_VALID, p_block->is_pf);
+	SET_FIELD(val, IGU_MAPPING_LINE_VECTOR_NUMBER,
+		  p_block->vector_number);
+
+	ecore_wr(p_hwfn, p_ptt,
+		 IGU_REG_MAPPING_MEMORY + sizeof(u32) * igu_sb_id,
+		 val);
+
+	ecore_int_cau_conf_sb(p_hwfn, p_ptt, 0,
+			      igu_sb_id, vf_num,
+			      p_block->is_pf ? 0 : 1);
+
+	DP_VERBOSE(p_hwfn, ECORE_MSG_INTR,
+		   "Relocation: [SB 0x%04x] func_id = %d is_pf = %d vector_num = 0x%x\n",
+		   igu_sb_id, p_block->function_id,
+		   p_block->is_pf, p_block->vector_number);
+
 	return ECORE_SUCCESS;
 }
 
@@ -2170,14 +2589,13 @@ void ecore_int_setup(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 void ecore_int_get_num_sbs(struct ecore_hwfn *p_hwfn,
 			   struct ecore_sb_cnt_info *p_sb_cnt_info)
 {
-	struct ecore_igu_info *info = p_hwfn->hw_info.p_igu_info;
+	struct ecore_igu_info *p_igu_info = p_hwfn->hw_info.p_igu_info;
 
-	if (!info || !p_sb_cnt_info)
+	if (!p_igu_info || !p_sb_cnt_info)
 		return;
 
-	p_sb_cnt_info->sb_cnt = info->igu_sb_cnt;
-	p_sb_cnt_info->sb_iov_cnt = info->igu_sb_cnt_iov;
-	p_sb_cnt_info->sb_free_blk = info->free_blks;
+	OSAL_MEMCPY(p_sb_cnt_info, &p_igu_info->usage,
+		    sizeof(*p_sb_cnt_info));
 }
 
 void ecore_int_disable_post_isr_release(struct ecore_dev *p_dev)
@@ -2249,10 +2667,11 @@ enum _ecore_status_t ecore_int_get_sb_dbg(struct ecore_hwfn *p_hwfn,
 	p_info->igu_cons = ecore_rd(p_hwfn, p_ptt,
 				    IGU_REG_CONSUMER_MEM + sbid * 4);
 
-	for (i = 0; i < PIS_PER_SB; i++)
+	for (i = 0; i < PIS_PER_SB_E4; i++)
 		p_info->pi[i] = (u16)ecore_rd(p_hwfn, p_ptt,
 					      CAU_REG_PI_MEMORY +
-					      sbid * 4 * PIS_PER_SB +  i * 4);
+					      sbid * 4 * PIS_PER_SB_E4 +
+					      i * 4);
 
 	return ECORE_SUCCESS;
 }
diff --git a/drivers/net/qede/base/ecore_int.h b/drivers/net/qede/base/ecore_int.h
index 067ed605..563051c3 100644
--- a/drivers/net/qede/base/ecore_int.h
+++ b/drivers/net/qede/base/ecore_int.h
@@ -19,33 +19,78 @@
 #define ECORE_SB_EVENT_MASK	0x0003
 
 #define SB_ALIGNED_SIZE(p_hwfn)					\
-	ALIGNED_TYPE_SIZE(struct status_block, p_hwfn)
+	ALIGNED_TYPE_SIZE(struct status_block_e4, p_hwfn)
+
+#define ECORE_SB_INVALID_IDX	0xffff
 
 struct ecore_igu_block {
 	u8 status;
 #define ECORE_IGU_STATUS_FREE	0x01
 #define ECORE_IGU_STATUS_VALID	0x02
 #define ECORE_IGU_STATUS_PF	0x04
+#define ECORE_IGU_STATUS_DSB	0x08
 
 	u8 vector_number;
 	u8 function_id;
 	u8 is_pf;
-};
 
-struct ecore_igu_map {
-	struct ecore_igu_block igu_blocks[MAX_TOT_SB_PER_PATH];
+	/* Index inside IGU [meant for back reference] */
+	u16 igu_sb_id;
+
+	struct ecore_sb_info *sb_info;
 };
 
 struct ecore_igu_info {
-	struct ecore_igu_map igu_map;
+	struct ecore_igu_block entry[MAX_TOT_SB_PER_PATH];
 	u16 igu_dsb_id;
-	u16 igu_base_sb;
-	u16 igu_base_sb_iov;
-	u16 igu_sb_cnt;
-	u16 igu_sb_cnt_iov;
-	u16 free_blks;
+
+	/* The numbers can shift when using APIs to switch SBs between PF and
+	 * VF.
+	 */
+	struct ecore_sb_cnt_info usage;
+
+	/* Determine whether we can shift SBs between VFs and PFs */
+	bool b_allow_pf_vf_change;
 };
 
+/**
+ * @brief - Make sure the IGU CAM reflects the resources provided by MFW
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int ecore_int_igu_reset_cam(struct ecore_hwfn *p_hwfn,
+			    struct ecore_ptt *p_ptt);
+
+/**
+ * @brief - Make sure IGU CAM reflects the default resources once again,
+ *          starting with a 'dirty' SW database.
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int ecore_int_igu_reset_cam_default(struct ecore_hwfn *p_hwfn,
+				    struct ecore_ptt *p_ptt);
+
+/**
+ * @brief Translate the weakly-defined client sb-id into an IGU sb-id
+ *
+ * @param p_hwfn
+ * @param sb_id - user provided sb_id
+ *
+ * @return an index inside IGU CAM where the SB resides
+ */
+u16 ecore_get_igu_sb_id(struct ecore_hwfn *p_hwfn, u16 sb_id);
+
+/**
+ * @brief return a pointer to an unused valid SB
+ *
+ * @param p_hwfn
+ * @param b_is_pf - true iff we want a SB belonging to a PF
+ *
+ * @return point to an igu_block, OSAL_NULL if none is available
+ */
+struct ecore_igu_block *
+ecore_get_igu_free_sb(struct ecore_hwfn *p_hwfn, bool b_is_pf);
 /* TODO Names of function may change... */
 void ecore_int_igu_init_pure_rt(struct ecore_hwfn *p_hwfn,
 				struct ecore_ptt *p_ptt,
@@ -125,9 +170,11 @@ u16 ecore_int_get_sp_sb_id(struct ecore_hwfn *p_hwfn);
  * @param opaque	- opaque fid of the sb owner.
  * @param cleanup_set	- set(1) / clear(0)
  */
-void ecore_int_igu_init_pure_rt_single(struct ecore_hwfn *p_hwfn,
-				       struct ecore_ptt *p_ptt,
-				       u32 sb_id, u16 opaque, bool b_set);
+void ecore_int_igu_init_pure_rt_single(struct ecore_hwfn	*p_hwfn,
+				       struct ecore_ptt		*p_ptt,
+				       u16			sb_id,
+				       u16			opaque,
+				       bool			b_set);
 
 /**
  * @brief ecore_int_cau_conf - configure cau for a given status
diff --git a/drivers/net/qede/base/ecore_int_api.h b/drivers/net/qede/base/ecore_int_api.h
index 799fbe82..24cdf5ed 100644
--- a/drivers/net/qede/base/ecore_int_api.h
+++ b/drivers/net/qede/base/ecore_int_api.h
@@ -26,7 +26,7 @@ enum ecore_int_mode {
 #endif
 
 struct ecore_sb_info {
-	struct status_block *sb_virt;
+	struct status_block_e4 *sb_virt;
 	dma_addr_t sb_phys;
 	u32 sb_ack;		/* Last given ack */
 	u16 igu_sb_id;
@@ -44,13 +44,19 @@ struct ecore_sb_info {
 struct ecore_sb_info_dbg {
 	u32 igu_prod;
 	u32 igu_cons;
-	u16 pi[PIS_PER_SB];
+	u16 pi[PIS_PER_SB_E4];
 };
 
 struct ecore_sb_cnt_info {
-	int sb_cnt;
-	int sb_iov_cnt;
-	int sb_free_blk;
+	/* Original, current, and free SBs for PF */
+	int orig;
+	int cnt;
+	int free_cnt;
+
+	/* Original, current and free SBS for child VFs */
+	int iov_orig;
+	int iov_cnt;
+	int free_cnt_iov;
 };
 
 static OSAL_INLINE u16 ecore_sb_update_sb_idx(struct ecore_sb_info *sb_info)
@@ -61,7 +67,7 @@ static OSAL_INLINE u16 ecore_sb_update_sb_idx(struct ecore_sb_info *sb_info)
 	/* barrier(); status block is written to by the chip */
 	/* FIXME: need some sort of barrier. */
 	prod = OSAL_LE32_TO_CPU(sb_info->sb_virt->prod_index) &
-	    STATUS_BLOCK_PROD_INDEX_MASK;
+	    STATUS_BLOCK_E4_PROD_INDEX_MASK;
 	if (sb_info->sb_ack != prod) {
 		sb_info->sb_ack = prod;
 		rc |= ECORE_SB_IDX;
@@ -173,17 +179,17 @@ enum ecore_coalescing_fsm {
  *
  * @param p_hwfn
  * @param p_ptt
- * @param igu_sb_id
+ * @param p_sb
  * @param pi_index
  * @param state
  * @param timeset
  */
-void ecore_int_cau_conf_pi(struct ecore_hwfn *p_hwfn,
-			   struct ecore_ptt *p_ptt,
-			   u16 igu_sb_id,
-			   u32 pi_index,
-			   enum ecore_coalescing_fsm coalescing_fsm,
-			   u8 timeset);
+void ecore_int_cau_conf_pi(struct ecore_hwfn		*p_hwfn,
+			   struct ecore_ptt		*p_ptt,
+			   struct ecore_sb_info		*p_sb,
+			   u32				pi_index,
+			   enum ecore_coalescing_fsm	coalescing_fsm,
+			   u8				timeset);
 
 /**
  *
@@ -219,6 +225,7 @@ void ecore_int_igu_disable_int(struct ecore_hwfn *p_hwfn,
 u64 ecore_int_igu_read_sisr_reg(struct ecore_hwfn *p_hwfn);
 
 #define ECORE_SP_SB_ID 0xffff
+
 /**
  * @brief ecore_int_sb_init - Initializes the sb_info structure.
  *
@@ -324,4 +331,18 @@ enum _ecore_status_t ecore_int_get_sb_dbg(struct ecore_hwfn *p_hwfn,
 					  struct ecore_sb_info *p_sb,
 					  struct ecore_sb_info_dbg *p_info);
 
+/**
+ * @brief - Move a free Status block between PF and child VF
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param sb_id - The PF fastpath vector to be moved [re-assigned if claiming
+ *                from VF, given-up if moving to VF]
+ * @param b_to_vf - PF->VF == true, VF->PF == false
+ *
+ * @return ECORE_SUCCESS if SB successfully moved.
+ */
+enum _ecore_status_t
+ecore_int_igu_relocate_sb(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			  u16 sb_id, bool b_to_vf);
 #endif
diff --git a/drivers/net/qede/base/ecore_iov_api.h b/drivers/net/qede/base/ecore_iov_api.h
index 50cb3f2b..218ef50b 100644
--- a/drivers/net/qede/base/ecore_iov_api.h
+++ b/drivers/net/qede/base/ecore_iov_api.h
@@ -345,21 +345,13 @@ ecore_iov_get_public_vf_info(struct ecore_hwfn *p_hwfn,
 			     u16 vfid, bool b_enabled_only);
 
 /**
- * @brief Set pending events bitmap for given @vfid
- *
- * @param p_hwfn
- * @param vfid
- */
-void ecore_iov_pf_add_pending_events(struct ecore_hwfn *p_hwfn, u8 vfid);
-
-/**
- * @brief Copy pending events bitmap in @events and clear
- *	  original copy of events
+ * @brief fills a bitmask of all VFs which have pending unhandled
+ *        messages.
  *
  * @param p_hwfn
  */
-void ecore_iov_pf_get_and_clear_pending_events(struct ecore_hwfn *p_hwfn,
-					       u64 *events);
+void ecore_iov_pf_get_pending_events(struct ecore_hwfn *p_hwfn,
+				     u64 *events);
 
 /**
  * @brief Copy VF's message to PF's buffer
@@ -693,25 +685,60 @@ bool ecore_iov_is_vf_started(struct ecore_hwfn *p_hwfn,
  * @return - rate in Mbps
  */
 int ecore_iov_get_vf_min_rate(struct ecore_hwfn *p_hwfn, int vfid);
+
 #endif
 
 /**
+ * @brief ecore_pf_configure_vf_queue_coalesce - PF configure coalesce
+ *    parameters of VFs for Rx and Tx queue.
+ *    While the API allows setting coalescing per-qid, all queues sharing a SB
+ *    should be in same range [i.e., either 0-0x7f, 0x80-0xff or 0x100-0x1ff]
+ *    otherwise configuration would break.
+ *
+ * @param p_hwfn
+ * @param rx_coal - Rx Coalesce value in micro seconds.
+ * @param tx_coal - TX Coalesce value in micro seconds.
+ * @param vf_id
+ * @param qid
+ *
+ * @return int
+ **/
+enum _ecore_status_t
+ecore_iov_pf_configure_vf_queue_coalesce(struct ecore_hwfn *p_hwfn,
+					 u16 rx_coal, u16 tx_coal,
+					 u16 vf_id, u16 qid);
+
+/**
  * @brief - Given a VF index, return index of next [including that] active VF.
  *
  * @param p_hwfn
  * @param rel_vf_id
  *
- * @return E4_MAX_NUM_VFS in case no further active VFs, otherwise index.
+ * @return MAX_NUM_VFS_E4 in case no further active VFs, otherwise index.
  */
 u16 ecore_iov_get_next_active_vf(struct ecore_hwfn *p_hwfn, u16 rel_vf_id);
 
 void ecore_iov_bulletin_set_udp_ports(struct ecore_hwfn *p_hwfn, int vfid,
 				      u16 vxlan_port, u16 geneve_port);
+
+#ifdef CONFIG_ECORE_SW_CHANNEL
+/**
+ * @brief Set whether PF should communicate with VF using SW/HW channel
+ *        Needs to be called for an enabled VF before acquire is over
+ *        [latest good point for doing that is OSAL_IOV_VF_ACQUIRE()]
+ *
+ * @param p_hwfn
+ * @param vfid - relative vf index
+ * @param b_is_hw - true iff PF is to use HW channel for communication
+ */
+void ecore_iov_set_vf_hw_channel(struct ecore_hwfn *p_hwfn, int vfid,
+				 bool b_is_hw);
+#endif
 #endif /* CONFIG_ECORE_SRIOV */
 
 #define ecore_for_each_vf(_p_hwfn, _i)					\
 	for (_i = ecore_iov_get_next_active_vf(_p_hwfn, 0);		\
-	     _i < E4_MAX_NUM_VFS;					\
+	     _i < MAX_NUM_VFS_E4;					\
 	     _i = ecore_iov_get_next_active_vf(_p_hwfn, _i + 1))
 
 #endif
diff --git a/drivers/net/qede/base/ecore_iro.h b/drivers/net/qede/base/ecore_iro.h
index b4bfe89f..360d7f88 100644
--- a/drivers/net/qede/base/ecore_iro.h
+++ b/drivers/net/qede/base/ecore_iro.h
@@ -193,5 +193,13 @@
 #define TSTORM_ROCE_EVENTS_STAT_OFFSET(roce_pf_id) (IRO[48].base + \
 	((roce_pf_id) * IRO[48].m1))
 #define TSTORM_ROCE_EVENTS_STAT_SIZE (IRO[48].size)
+/* DCQCN Received Statistics */
+#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(roce_pf_id) (IRO[49].base + \
+	((roce_pf_id) * IRO[49].m1))
+#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_SIZE (IRO[49].size)
+/* DCQCN Sent Statistics */
+#define PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(roce_pf_id) (IRO[50].base + \
+	((roce_pf_id) * IRO[50].m1))
+#define PSTORM_ROCE_DCQCN_SENT_STATS_SIZE (IRO[50].size)
 
 #endif /* __IRO_H__ */
diff --git a/drivers/net/qede/base/ecore_iro_values.h b/drivers/net/qede/base/ecore_iro_values.h
index bc8df8f8..41532eeb 100644
--- a/drivers/net/qede/base/ecore_iro_values.h
+++ b/drivers/net/qede/base/ecore_iro_values.h
@@ -9,13 +9,13 @@
 #ifndef __IRO_VALUES_H__
 #define __IRO_VALUES_H__
 
-static const struct iro iro_arr[49] = {
+static const struct iro iro_arr[51] = {
 /* YSTORM_FLOW_CONTROL_MODE_OFFSET */
 	{      0x0,      0x0,      0x0,      0x0,      0x8},
 /* TSTORM_PORT_STAT_OFFSET(port_id) */
 	{   0x4cb0,     0x80,      0x0,      0x0,     0x80},
 /* TSTORM_LL2_PORT_STAT_OFFSET(port_id) */
-	{   0x6518,     0x20,      0x0,      0x0,     0x20},
+	{   0x6508,     0x20,      0x0,      0x0,     0x20},
 /* USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id) */
 	{    0xb00,      0x8,      0x0,      0x0,      0x4},
 /* USTORM_FLR_FINAL_ACK_OFFSET(pf_id) */
@@ -29,9 +29,9 @@ static const struct iro iro_arr[49] = {
 /* XSTORM_INTEG_TEST_DATA_OFFSET */
 	{   0x4c40,      0x0,      0x0,      0x0,     0x78},
 /* YSTORM_INTEG_TEST_DATA_OFFSET */
-	{   0x3df0,      0x0,      0x0,      0x0,     0x78},
+	{   0x3e10,      0x0,      0x0,      0x0,     0x78},
 /* PSTORM_INTEG_TEST_DATA_OFFSET */
-	{   0x29b0,      0x0,      0x0,      0x0,     0x78},
+	{   0x2b50,      0x0,      0x0,      0x0,     0x78},
 /* TSTORM_INTEG_TEST_DATA_OFFSET */
 	{   0x4c38,      0x0,      0x0,      0x0,     0x78},
 /* MSTORM_INTEG_TEST_DATA_OFFSET */
@@ -41,11 +41,11 @@ static const struct iro iro_arr[49] = {
 /* TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id) */
 	{    0xa28,      0x8,      0x0,      0x0,      0x8},
 /* CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) */
-	{   0x61f8,     0x10,      0x0,      0x0,     0x10},
+	{   0x61e8,     0x10,      0x0,      0x0,     0x10},
 /* CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) */
-	{   0xbd20,     0x30,      0x0,      0x0,     0x30},
+	{   0xb820,     0x30,      0x0,      0x0,     0x30},
 /* CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_tx_stats_id) */
-	{   0x95b8,     0x30,      0x0,      0x0,     0x30},
+	{   0x96b8,     0x30,      0x0,      0x0,     0x30},
 /* MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) */
 	{   0x4b60,     0x80,      0x0,      0x0,     0x40},
 /* MSTORM_ETH_PF_PRODS_OFFSET(queue_id) */
@@ -59,11 +59,11 @@ static const struct iro iro_arr[49] = {
 /* USTORM_QUEUE_STAT_OFFSET(stat_counter_id) */
 	{   0x8150,     0x40,      0x0,      0x0,     0x30},
 /* USTORM_ETH_PF_STAT_OFFSET(pf_id) */
-	{   0xec70,     0x60,      0x0,      0x0,     0x60},
+	{   0xe770,     0x60,      0x0,      0x0,     0x60},
 /* PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) */
-	{   0x2b48,     0x80,      0x0,      0x0,     0x38},
+	{   0x2ce8,     0x80,      0x0,      0x0,     0x38},
 /* PSTORM_ETH_PF_STAT_OFFSET(pf_id) */
-	{   0xf1b0,     0x78,      0x0,      0x0,     0x78},
+	{   0xf2b0,     0x78,      0x0,      0x0,     0x78},
 /* PSTORM_CTL_FRAME_ETHTYPE_OFFSET(ethType_id) */
 	{    0x1f8,      0x4,      0x0,      0x0,      0x4},
 /* TSTORM_ETH_PRS_INPUT_OFFSET */
@@ -81,33 +81,37 @@ static const struct iro iro_arr[49] = {
 /* TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) */
 	{      0x0,      0x8,      0x0,      0x0,      0x8},
 /* TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id,bdq_id) */
-	{    0x200,     0x10,      0x8,      0x0,      0x8},
+	{    0x200,     0x18,      0x8,      0x0,      0x8},
 /* MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id,bdq_id) */
-	{    0xb78,     0x10,      0x8,      0x0,      0x2},
+	{    0xb78,     0x18,      0x8,      0x0,      0x2},
 /* TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) */
-	{   0xd9a8,     0x38,      0x0,      0x0,     0x24},
+	{   0xd878,     0x50,      0x0,      0x0,     0x3c},
 /* MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) */
-	{  0x12988,     0x10,      0x0,      0x0,      0x8},
+	{  0x12908,     0x18,      0x0,      0x0,     0x10},
 /* USTORM_ISCSI_RX_STATS_OFFSET(pf_id) */
-	{  0x11fa0,     0x38,      0x0,      0x0,     0x18},
+	{  0x11aa8,     0x40,      0x0,      0x0,     0x18},
 /* XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) */
-	{   0xa8c0,     0x38,      0x0,      0x0,     0x10},
+	{   0xa580,     0x50,      0x0,      0x0,     0x20},
 /* YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) */
-	{   0x86f8,     0x30,      0x0,      0x0,     0x18},
+	{   0x86f8,     0x40,      0x0,      0x0,     0x28},
 /* PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) */
-	{  0x101f8,     0x10,      0x0,      0x0,     0x10},
+	{  0x102f8,     0x18,      0x0,      0x0,     0x10},
 /* TSTORM_FCOE_RX_STATS_OFFSET(pf_id) */
 	{   0xde28,     0x48,      0x0,      0x0,     0x38},
 /* PSTORM_FCOE_TX_STATS_OFFSET(pf_id) */
-	{  0x10660,     0x20,      0x0,      0x0,     0x20},
+	{  0x10760,     0x20,      0x0,      0x0,     0x20},
 /* PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) */
-	{   0x2b80,     0x80,      0x0,      0x0,     0x10},
+	{   0x2d20,     0x80,      0x0,      0x0,     0x10},
 /* TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) */
 	{   0x5020,     0x10,      0x0,      0x0,     0x10},
 /* XSTORM_IWARP_RXMIT_STATS_OFFSET(pf_id) */
 	{   0xc9b0,     0x30,      0x0,      0x0,     0x10},
 /* TSTORM_ROCE_EVENTS_STAT_OFFSET(roce_pf_id) */
 	{   0xeec0,     0x10,      0x0,      0x0,     0x10},
+/* YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(roce_pf_id) */
+	{   0xa398,     0x10,      0x0,      0x0,     0x10},
+/* PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(roce_pf_id) */
+	{  0x13100,      0x8,      0x0,      0x0,      0x8},
 };
 
 #endif /* __IRO_VALUES_H__ */
diff --git a/drivers/net/qede/base/ecore_l2.c b/drivers/net/qede/base/ecore_l2.c
index e58b8fa0..e3afc8a3 100644
--- a/drivers/net/qede/base/ecore_l2.c
+++ b/drivers/net/qede/base/ecore_l2.c
@@ -173,16 +173,19 @@ static void ecore_eth_queue_qid_usage_del(struct ecore_hwfn *p_hwfn,
 void ecore_eth_queue_cid_release(struct ecore_hwfn *p_hwfn,
 				 struct ecore_queue_cid *p_cid)
 {
-	/* For VF-queues, stuff is a bit complicated as:
-	 *  - They always maintain the qid_usage on their own.
-	 *  - In legacy mode, they also maintain their CIDs.
-	 */
+	bool b_legacy_vf = !!(p_cid->vf_legacy &
+			      ECORE_QCID_LEGACY_VF_CID);
 
-	/* VFs' CIDs are 0-based in PF-view, and uninitialized on VF */
-	if (IS_PF(p_hwfn->p_dev) && !p_cid->b_legacy_vf)
+	/* VFs' CIDs are 0-based in PF-view, and uninitialized on VF.
+	 * For legacy vf-queues, the CID doesn't go through here.
+	 */
+	if (IS_PF(p_hwfn->p_dev) && !b_legacy_vf)
 		_ecore_cxt_release_cid(p_hwfn, p_cid->cid, p_cid->vfid);
-	if (!p_cid->b_legacy_vf)
+
+	/* VFs maintain the index inside queue-zone on their own */
+	if (p_cid->vfid == ECORE_QUEUE_CID_PF)
 		ecore_eth_queue_qid_usage_del(p_hwfn, p_cid);
+
 	OSAL_VFREE(p_hwfn->p_dev, p_cid);
 }
 
@@ -193,6 +196,7 @@ static struct ecore_queue_cid *
 _ecore_eth_queue_to_cid(struct ecore_hwfn *p_hwfn,
 			u16 opaque_fid, u32 cid,
 			struct ecore_queue_start_common_params *p_params,
+			bool b_is_rx,
 			struct ecore_queue_cid_vf_params *p_vf_params)
 {
 	struct ecore_queue_cid *p_cid;
@@ -204,14 +208,21 @@ _ecore_eth_queue_to_cid(struct ecore_hwfn *p_hwfn,
 
 	p_cid->opaque_fid = opaque_fid;
 	p_cid->cid = cid;
-	p_cid->rel = *p_params;
 	p_cid->p_owner = p_hwfn;
 
+	/* Fill in parameters */
+	p_cid->rel.vport_id = p_params->vport_id;
+	p_cid->rel.queue_id = p_params->queue_id;
+	p_cid->rel.stats_id = p_params->stats_id;
+	p_cid->sb_igu_id = p_params->p_sb->igu_sb_id;
+	p_cid->b_is_rx = b_is_rx;
+	p_cid->sb_idx = p_params->sb_idx;
+
 	/* Fill-in bits related to VFs' queues if information was provided */
 	if (p_vf_params != OSAL_NULL) {
 		p_cid->vfid = p_vf_params->vfid;
 		p_cid->vf_qid = p_vf_params->vf_qid;
-		p_cid->b_legacy_vf = p_vf_params->b_legacy;
+		p_cid->vf_legacy = p_vf_params->vf_legacy;
 	} else {
 		p_cid->vfid = ECORE_QUEUE_CID_PF;
 	}
@@ -224,7 +235,7 @@ _ecore_eth_queue_to_cid(struct ecore_hwfn *p_hwfn,
 	}
 
 	/* Calculate the engine-absolute indices of the resources.
-	 * The would guarantee they're valid later on.
+	 * This would guarantee they're valid later on.
 	 * In some cases [SBs] we already have the right values.
 	 */
 	rc = ecore_fw_vport(p_hwfn, p_cid->rel.vport_id, &p_cid->abs.vport_id);
@@ -248,10 +259,6 @@ _ecore_eth_queue_to_cid(struct ecore_hwfn *p_hwfn,
 		p_cid->abs.stats_id = p_cid->rel.stats_id;
 	}
 
-	/* SBs relevant information was already provided as absolute */
-	p_cid->abs.sb = p_cid->rel.sb;
-	p_cid->abs.sb_idx = p_cid->rel.sb_idx;
-
 out:
 	/* VF-images have provided the qid_usage_idx on their own.
 	 * Otherwise, we need to allocate a unique one.
@@ -270,7 +277,7 @@ out:
 		   p_cid->rel.queue_id,	p_cid->qid_usage_idx,
 		   p_cid->abs.queue_id,
 		   p_cid->rel.stats_id, p_cid->abs.stats_id,
-		   p_cid->abs.sb, p_cid->abs.sb_idx);
+		   p_cid->sb_igu_id, p_cid->sb_idx);
 
 	return p_cid;
 
@@ -282,6 +289,7 @@ fail:
 struct ecore_queue_cid *
 ecore_eth_queue_to_cid(struct ecore_hwfn *p_hwfn, u16 opaque_fid,
 		       struct ecore_queue_start_common_params *p_params,
+		       bool b_is_rx,
 		       struct ecore_queue_cid_vf_params *p_vf_params)
 {
 	struct ecore_queue_cid *p_cid;
@@ -296,7 +304,8 @@ ecore_eth_queue_to_cid(struct ecore_hwfn *p_hwfn, u16 opaque_fid,
 	if (p_vf_params) {
 		vfid = p_vf_params->vfid;
 
-		if (p_vf_params->b_legacy) {
+		if (p_vf_params->vf_legacy &
+		    ECORE_QCID_LEGACY_VF_CID) {
 			b_legacy_vf = true;
 			cid = p_vf_params->vf_qid;
 		}
@@ -315,7 +324,7 @@ ecore_eth_queue_to_cid(struct ecore_hwfn *p_hwfn, u16 opaque_fid,
 	}
 
 	p_cid = _ecore_eth_queue_to_cid(p_hwfn, opaque_fid, cid,
-					p_params, p_vf_params);
+					p_params, b_is_rx, p_vf_params);
 	if ((p_cid == OSAL_NULL) && IS_PF(p_hwfn->p_dev) && !b_legacy_vf)
 		_ecore_cxt_release_cid(p_hwfn, cid, vfid);
 
@@ -324,9 +333,11 @@ ecore_eth_queue_to_cid(struct ecore_hwfn *p_hwfn, u16 opaque_fid,
 
 static struct ecore_queue_cid *
 ecore_eth_queue_to_cid_pf(struct ecore_hwfn *p_hwfn, u16 opaque_fid,
+			  bool b_is_rx,
 			  struct ecore_queue_start_common_params *p_params)
 {
-	return ecore_eth_queue_to_cid(p_hwfn, opaque_fid, p_params, OSAL_NULL);
+	return ecore_eth_queue_to_cid(p_hwfn, opaque_fid, p_params, b_is_rx,
+				      OSAL_NULL);
 }
 
 enum _ecore_status_t
@@ -336,6 +347,7 @@ ecore_sp_eth_vport_start(struct ecore_hwfn *p_hwfn,
 	struct vport_start_ramrod_data *p_ramrod = OSAL_NULL;
 	struct ecore_spq_entry *p_ent = OSAL_NULL;
 	struct ecore_sp_init_data init_data;
+	struct eth_vport_tpa_param *p_tpa;
 	u16 rx_mode = 0, tx_err = 0;
 	u8 abs_vport_id = 0;
 	enum _ecore_status_t rc = ECORE_NOTIMPL;
@@ -360,8 +372,8 @@ ecore_sp_eth_vport_start(struct ecore_hwfn *p_hwfn,
 	p_ramrod->vport_id = abs_vport_id;
 
 	p_ramrod->mtu = OSAL_CPU_TO_LE16(p_params->mtu);
-	p_ramrod->inner_vlan_removal_en = p_params->remove_inner_vlan;
 	p_ramrod->handle_ptp_pkts = p_params->handle_ptp_pkts;
+	p_ramrod->inner_vlan_removal_en	= p_params->remove_inner_vlan;
 	p_ramrod->drop_ttl0_en = p_params->drop_ttl0;
 	p_ramrod->untagged = p_params->only_untagged;
 	p_ramrod->zero_placement_offset = p_params->zero_placement_offset;
@@ -396,22 +408,22 @@ ecore_sp_eth_vport_start(struct ecore_hwfn *p_hwfn,
 	p_ramrod->tx_err_behav.values = OSAL_CPU_TO_LE16(tx_err);
 
 	/* TPA related fields */
-	OSAL_MEMSET(&p_ramrod->tpa_param, 0,
-		    sizeof(struct eth_vport_tpa_param));
-	p_ramrod->tpa_param.max_buff_num = p_params->max_buffers_per_cqe;
+	p_tpa = &p_ramrod->tpa_param;
+	OSAL_MEMSET(p_tpa, 0, sizeof(struct eth_vport_tpa_param));
+	p_tpa->max_buff_num = p_params->max_buffers_per_cqe;
 
 	switch (p_params->tpa_mode) {
 	case ECORE_TPA_MODE_GRO:
-		p_ramrod->tpa_param.tpa_max_aggs_num = ETH_TPA_MAX_AGGS_NUM;
-		p_ramrod->tpa_param.tpa_max_size = (u16)-1;
-		p_ramrod->tpa_param.tpa_min_size_to_cont = p_params->mtu / 2;
-		p_ramrod->tpa_param.tpa_min_size_to_start = p_params->mtu / 2;
-		p_ramrod->tpa_param.tpa_ipv4_en_flg = 1;
-		p_ramrod->tpa_param.tpa_ipv6_en_flg = 1;
-		p_ramrod->tpa_param.tpa_ipv4_tunn_en_flg = 1;
-		p_ramrod->tpa_param.tpa_ipv6_tunn_en_flg = 1;
-		p_ramrod->tpa_param.tpa_pkt_split_flg = 1;
-		p_ramrod->tpa_param.tpa_gro_consistent_flg = 1;
+		p_tpa->tpa_max_aggs_num = ETH_TPA_MAX_AGGS_NUM;
+		p_tpa->tpa_max_size = (u16)-1;
+		p_tpa->tpa_min_size_to_cont = p_params->mtu / 2;
+		p_tpa->tpa_min_size_to_start = p_params->mtu / 2;
+		p_tpa->tpa_ipv4_en_flg = 1;
+		p_tpa->tpa_ipv6_en_flg = 1;
+		p_tpa->tpa_ipv4_tunn_en_flg = 1;
+		p_tpa->tpa_ipv6_tunn_en_flg = 1;
+		p_tpa->tpa_pkt_split_flg = 1;
+		p_tpa->tpa_gro_consistent_flg = 1;
 		break;
 	default:
 		break;
@@ -427,8 +439,7 @@ ecore_sp_eth_vport_start(struct ecore_hwfn *p_hwfn,
 	p_ramrod->ctl_frame_ethtype_check_en = !!p_params->check_ethtype;
 
 	/* Software Function ID in hwfn (PFs are 0 - 15, VFs are 16 - 135) */
-	p_ramrod->sw_fid = ecore_concrete_to_sw_fid(p_hwfn->p_dev,
-						    p_params->concrete_fid);
+	p_ramrod->sw_fid = ecore_concrete_to_sw_fid(p_params->concrete_fid);
 
 	return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL);
 }
@@ -454,6 +465,7 @@ ecore_sp_vport_update_rss(struct ecore_hwfn *p_hwfn,
 			  struct ecore_rss_params *p_rss)
 {
 	struct eth_vport_rss_config *p_config;
+	u16 capabilities = 0;
 	int i, table_size;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 
@@ -480,26 +492,26 @@ ecore_sp_vport_update_rss(struct ecore_hwfn *p_hwfn,
 
 	p_config->capabilities = 0;
 
-	SET_FIELD(p_config->capabilities,
+	SET_FIELD(capabilities,
 		  ETH_VPORT_RSS_CONFIG_IPV4_CAPABILITY,
 		  !!(p_rss->rss_caps & ECORE_RSS_IPV4));
-	SET_FIELD(p_config->capabilities,
+	SET_FIELD(capabilities,
 		  ETH_VPORT_RSS_CONFIG_IPV6_CAPABILITY,
 		  !!(p_rss->rss_caps & ECORE_RSS_IPV6));
-	SET_FIELD(p_config->capabilities,
+	SET_FIELD(capabilities,
 		  ETH_VPORT_RSS_CONFIG_IPV4_TCP_CAPABILITY,
 		  !!(p_rss->rss_caps & ECORE_RSS_IPV4_TCP));
-	SET_FIELD(p_config->capabilities,
+	SET_FIELD(capabilities,
 		  ETH_VPORT_RSS_CONFIG_IPV6_TCP_CAPABILITY,
 		  !!(p_rss->rss_caps & ECORE_RSS_IPV6_TCP));
-	SET_FIELD(p_config->capabilities,
+	SET_FIELD(capabilities,
 		  ETH_VPORT_RSS_CONFIG_IPV4_UDP_CAPABILITY,
 		  !!(p_rss->rss_caps & ECORE_RSS_IPV4_UDP));
-	SET_FIELD(p_config->capabilities,
+	SET_FIELD(capabilities,
 		  ETH_VPORT_RSS_CONFIG_IPV6_UDP_CAPABILITY,
 		  !!(p_rss->rss_caps & ECORE_RSS_IPV6_UDP));
 	p_config->tbl_size = p_rss->rss_table_size_log;
-	p_config->capabilities = OSAL_CPU_TO_LE16(p_config->capabilities);
+	p_config->capabilities = OSAL_CPU_TO_LE16(capabilities);
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_IFUP,
 		   "update rss flag %d, rss_mode = %d, update_caps = %d, capabilities = %d, update_ind = %d, update_rss_key = %d\n",
@@ -627,11 +639,11 @@ ecore_sp_update_accept_mode(struct ecore_hwfn *p_hwfn,
 }
 
 static void
-ecore_sp_vport_update_sge_tpa(struct ecore_hwfn *p_hwfn,
-			      struct vport_update_ramrod_data *p_ramrod,
+ecore_sp_vport_update_sge_tpa(struct vport_update_ramrod_data *p_ramrod,
 			      struct ecore_sge_tpa_params *p_params)
 {
 	struct eth_vport_tpa_param *p_tpa;
+	u16 val;
 
 	if (!p_params) {
 		p_ramrod->common.update_tpa_param_flg = 0;
@@ -653,14 +665,16 @@ ecore_sp_vport_update_sge_tpa(struct ecore_hwfn *p_hwfn,
 	p_tpa->tpa_hdr_data_split_flg = p_params->tpa_hdr_data_split_flg;
 	p_tpa->tpa_gro_consistent_flg = p_params->tpa_gro_consistent_flg;
 	p_tpa->tpa_max_aggs_num = p_params->tpa_max_aggs_num;
-	p_tpa->tpa_max_size = p_params->tpa_max_size;
-	p_tpa->tpa_min_size_to_start = p_params->tpa_min_size_to_start;
-	p_tpa->tpa_min_size_to_cont = p_params->tpa_min_size_to_cont;
+	val = p_params->tpa_max_size;
+	p_tpa->tpa_max_size = OSAL_CPU_TO_LE16(val);
+	val = p_params->tpa_min_size_to_start;
+	p_tpa->tpa_min_size_to_start = OSAL_CPU_TO_LE16(val);
+	val = p_params->tpa_min_size_to_cont;
+	p_tpa->tpa_min_size_to_cont = OSAL_CPU_TO_LE16(val);
 }
 
 static void
-ecore_sp_update_mcast_bin(struct ecore_hwfn *p_hwfn,
-			  struct vport_update_ramrod_data *p_ramrod,
+ecore_sp_update_mcast_bin(struct vport_update_ramrod_data *p_ramrod,
 			  struct ecore_sp_vport_update_params *p_params)
 {
 	int i;
@@ -769,11 +783,10 @@ ecore_sp_vport_update(struct ecore_hwfn *p_hwfn,
 	}
 
 	/* Update mcast bins for VFs, PF doesn't use this functionality */
-	ecore_sp_update_mcast_bin(p_hwfn, p_ramrod, p_params);
+	ecore_sp_update_mcast_bin(p_ramrod, p_params);
 
 	ecore_sp_update_accept_mode(p_hwfn, p_ramrod, p_params->accept_flags);
-	ecore_sp_vport_update_sge_tpa(p_hwfn, p_ramrod,
-				      p_params->sge_tpa_params);
+	ecore_sp_vport_update_sge_tpa(p_ramrod, p_params->sge_tpa_params);
 	if (p_params->mtu) {
 		p_ramrod->common.update_mtu_flg = 1;
 		p_ramrod->common.mtu = OSAL_CPU_TO_LE16(p_params->mtu);
@@ -897,7 +910,7 @@ ecore_eth_rxq_start_ramrod(struct ecore_hwfn *p_hwfn,
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
 		   "opaque_fid=0x%x, cid=0x%x, rx_qzone=0x%x, vport_id=0x%x, sb_id=0x%x\n",
 		   p_cid->opaque_fid, p_cid->cid, p_cid->abs.queue_id,
-		   p_cid->abs.vport_id, p_cid->abs.sb);
+		   p_cid->abs.vport_id, p_cid->sb_igu_id);
 
 	/* Get SPQ entry */
 	OSAL_MEMSET(&init_data, 0, sizeof(init_data));
@@ -913,8 +926,8 @@ ecore_eth_rxq_start_ramrod(struct ecore_hwfn *p_hwfn,
 
 	p_ramrod = &p_ent->ramrod.rx_queue_start;
 
-	p_ramrod->sb_id = OSAL_CPU_TO_LE16(p_cid->abs.sb);
-	p_ramrod->sb_index = p_cid->abs.sb_idx;
+	p_ramrod->sb_id = OSAL_CPU_TO_LE16(p_cid->sb_igu_id);
+	p_ramrod->sb_index = p_cid->sb_idx;
 	p_ramrod->vport_id = p_cid->abs.vport_id;
 	p_ramrod->stats_counter_id = p_cid->abs.stats_id;
 	p_ramrod->rx_queue_id = OSAL_CPU_TO_LE16(p_cid->abs.queue_id);
@@ -928,12 +941,15 @@ ecore_eth_rxq_start_ramrod(struct ecore_hwfn *p_hwfn,
 	DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr);
 
 	if (p_cid->vfid != ECORE_QUEUE_CID_PF) {
+		bool b_legacy_vf = !!(p_cid->vf_legacy &
+				      ECORE_QCID_LEGACY_VF_RX_PROD);
+
 		p_ramrod->vf_rx_prod_index = p_cid->vf_qid;
 		DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
 			   "Queue%s is meant for VF rxq[%02x]\n",
-			   !!p_cid->b_legacy_vf ? " [legacy]" : "",
+			   b_legacy_vf ? " [legacy]" : "",
 			   p_cid->vf_qid);
-		p_ramrod->vf_rx_prod_use_zone_a = !!p_cid->b_legacy_vf;
+		p_ramrod->vf_rx_prod_use_zone_a = b_legacy_vf;
 	}
 
 	return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL);
@@ -979,7 +995,7 @@ ecore_eth_rx_queue_start(struct ecore_hwfn *p_hwfn,
 	enum _ecore_status_t rc;
 
 	/* Allocate a CID for the queue */
-	p_cid = ecore_eth_queue_to_cid_pf(p_hwfn, opaque_fid, p_params);
+	p_cid = ecore_eth_queue_to_cid_pf(p_hwfn, opaque_fid, true, p_params);
 	if (p_cid == OSAL_NULL)
 		return ECORE_NOMEM;
 
@@ -1146,8 +1162,8 @@ ecore_eth_txq_start_ramrod(struct ecore_hwfn *p_hwfn,
 	p_ramrod = &p_ent->ramrod.tx_queue_start;
 	p_ramrod->vport_id = p_cid->abs.vport_id;
 
-	p_ramrod->sb_id = OSAL_CPU_TO_LE16(p_cid->abs.sb);
-	p_ramrod->sb_index = p_cid->abs.sb_idx;
+	p_ramrod->sb_id = OSAL_CPU_TO_LE16(p_cid->sb_igu_id);
+	p_ramrod->sb_index = p_cid->sb_idx;
 	p_ramrod->stats_counter_id = p_cid->abs.stats_id;
 
 	p_ramrod->queue_zone_id = OSAL_CPU_TO_LE16(p_cid->abs.queue_id);
@@ -1195,7 +1211,7 @@ ecore_eth_tx_queue_start(struct ecore_hwfn *p_hwfn, u16 opaque_fid,
 	struct ecore_queue_cid *p_cid;
 	enum _ecore_status_t rc;
 
-	p_cid = ecore_eth_queue_to_cid_pf(p_hwfn, opaque_fid, p_params);
+	p_cid = ecore_eth_queue_to_cid_pf(p_hwfn, opaque_fid, false, p_params);
 	if (p_cid == OSAL_NULL)
 		return ECORE_INVAL;
 
@@ -1494,8 +1510,7 @@ ecore_sp_eth_filter_ucast(struct ecore_hwfn *p_hwfn,
  *         Note: crc32_length MUST be aligned to 8
  * Return:
  ******************************************************************************/
-static u32 ecore_calc_crc32c(u8 *crc32_packet,
-			     u32 crc32_length, u32 crc32_seed, u8 complement)
+static u32 ecore_calc_crc32c(u8 *crc32_packet, u32 crc32_length, u32 crc32_seed)
 {
 	u32 byte = 0, bit = 0, crc32_result = crc32_seed;
 	u8 msb = 0, current_byte = 0;
@@ -1520,25 +1535,23 @@ static u32 ecore_calc_crc32c(u8 *crc32_packet,
 	return crc32_result;
 }
 
-static u32 ecore_crc32c_le(u32 seed, u8 *mac, u32 len)
+static u32 ecore_crc32c_le(u32 seed, u8 *mac)
 {
 	u32 packet_buf[2] = { 0 };
 
 	OSAL_MEMCPY((u8 *)(&packet_buf[0]), &mac[0], 6);
-	return ecore_calc_crc32c((u8 *)packet_buf, 8, seed, 0);
+	return ecore_calc_crc32c((u8 *)packet_buf, 8, seed);
 }
 
 u8 ecore_mcast_bin_from_mac(u8 *mac)
 {
-	u32 crc = ecore_crc32c_le(ETH_MULTICAST_BIN_FROM_MAC_SEED,
-				  mac, ETH_ALEN);
+	u32 crc = ecore_crc32c_le(ETH_MULTICAST_BIN_FROM_MAC_SEED, mac);
 
 	return crc & 0xff;
 }
 
 static enum _ecore_status_t
 ecore_sp_eth_filter_mcast(struct ecore_hwfn *p_hwfn,
-			  u16 opaque_fid,
 			  struct ecore_filter_mcast *p_filter_cmd,
 			  enum spq_mode comp_mode,
 			  struct ecore_spq_comp_cb *p_comp_data)
@@ -1633,16 +1646,13 @@ ecore_filter_mcast_cmd(struct ecore_dev *p_dev,
 
 	for_each_hwfn(p_dev, i) {
 		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
-		u16 opaque_fid;
 
 		if (IS_VF(p_dev)) {
 			ecore_vf_pf_filter_mcast(p_hwfn, p_filter_cmd);
 			continue;
 		}
 
-		opaque_fid = p_hwfn->hw_info.opaque_fid;
 		rc = ecore_sp_eth_filter_mcast(p_hwfn,
-					       opaque_fid,
 					       p_filter_cmd,
 					       comp_mode, p_comp_data);
 		if (rc != ECORE_SUCCESS)
@@ -1732,8 +1742,7 @@ static void __ecore_get_vport_pstats(struct ecore_hwfn *p_hwfn,
 
 static void __ecore_get_vport_tstats(struct ecore_hwfn *p_hwfn,
 				     struct ecore_ptt *p_ptt,
-				     struct ecore_eth_stats *p_stats,
-				     u16 statistics_bin)
+				     struct ecore_eth_stats *p_stats)
 {
 	struct tstorm_per_port_stat tstats;
 	u32 tstats_addr, tstats_len;
@@ -1945,7 +1954,7 @@ void __ecore_get_vport_stats(struct ecore_hwfn *p_hwfn,
 {
 	__ecore_get_vport_mstats(p_hwfn, p_ptt, stats, statistics_bin);
 	__ecore_get_vport_ustats(p_hwfn, p_ptt, stats, statistics_bin);
-	__ecore_get_vport_tstats(p_hwfn, p_ptt, stats, statistics_bin);
+	__ecore_get_vport_tstats(p_hwfn, p_ptt, stats);
 	__ecore_get_vport_pstats(p_hwfn, p_ptt, stats, statistics_bin);
 
 #ifndef ASIC_ONLY
@@ -1970,6 +1979,7 @@ static void _ecore_get_vport_stats(struct ecore_dev *p_dev,
 		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
 		struct ecore_ptt *p_ptt = IS_PF(p_dev) ?
 		    ecore_ptt_acquire(p_hwfn) : OSAL_NULL;
+		bool b_get_port_stats;
 
 		if (IS_PF(p_dev)) {
 			/* The main vport index is relative first */
@@ -1984,8 +1994,9 @@ static void _ecore_get_vport_stats(struct ecore_dev *p_dev,
 			continue;
 		}
 
+		b_get_port_stats = IS_PF(p_dev) && IS_LEAD_HWFN(p_hwfn);
 		__ecore_get_vport_stats(p_hwfn, p_ptt, stats, fw_vport,
-					IS_PF(p_dev) ? true : false);
+					b_get_port_stats);
 
 out:
 		if (IS_PF(p_dev) && p_ptt)
@@ -2061,12 +2072,16 @@ void ecore_arfs_mode_configure(struct ecore_hwfn *p_hwfn,
 			       struct ecore_ptt *p_ptt,
 			       struct ecore_arfs_config_params *p_cfg_params)
 {
+	if (OSAL_TEST_BIT(ECORE_MF_DISABLE_ARFS, &p_hwfn->p_dev->mf_bits))
+		return;
+
 	if (p_cfg_params->arfs_enable) {
-		ecore_set_rfs_mode_enable(p_hwfn, p_ptt, p_hwfn->rel_pf_id,
-					  p_cfg_params->tcp,
-					  p_cfg_params->udp,
-					  p_cfg_params->ipv4,
-					  p_cfg_params->ipv6);
+		ecore_gft_config(p_hwfn, p_ptt, p_hwfn->rel_pf_id,
+				 p_cfg_params->tcp,
+				 p_cfg_params->udp,
+				 p_cfg_params->ipv4,
+				 p_cfg_params->ipv6,
+				 GFT_PROFILE_TYPE_4_TUPLE);
 		DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
 			   "tcp = %s, udp = %s, ipv4 = %s, ipv6 =%s\n",
 			   p_cfg_params->tcp ? "Enable" : "Disable",
@@ -2074,7 +2089,7 @@ void ecore_arfs_mode_configure(struct ecore_hwfn *p_hwfn,
 			   p_cfg_params->ipv4 ? "Enable" : "Disable",
 			   p_cfg_params->ipv6 ? "Enable" : "Disable");
 	} else {
-		ecore_set_rfs_mode_disable(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
+		ecore_gft_disable(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
 	}
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SP, "Configured ARFS mode : %s\n",
 		   p_cfg_params->arfs_enable ? "Enable" : "Disable");
@@ -2082,7 +2097,6 @@ void ecore_arfs_mode_configure(struct ecore_hwfn *p_hwfn,
 
 enum _ecore_status_t
 ecore_configure_rfs_ntuple_filter(struct ecore_hwfn *p_hwfn,
-				  struct ecore_ptt *p_ptt,
 				  struct ecore_spq_comp_cb *p_cb,
 				  dma_addr_t p_addr, u16 length,
 				  u16 qid, u8 vport_id,
@@ -2126,9 +2140,17 @@ ecore_configure_rfs_ntuple_filter(struct ecore_hwfn *p_hwfn,
 
 	DMA_REGPAIR_LE(p_ramrod->pkt_hdr_addr, p_addr);
 	p_ramrod->pkt_hdr_length = OSAL_CPU_TO_LE16(length);
-	p_ramrod->rx_qid_or_action_icid = OSAL_CPU_TO_LE16(abs_rx_q_id);
+
+	p_ramrod->action_icid_valid = 0;
+	p_ramrod->action_icid = 0;
+
+	p_ramrod->rx_qid_valid = 1;
+	p_ramrod->rx_qid = OSAL_CPU_TO_LE16(abs_rx_q_id);
+
+	p_ramrod->flow_id_valid = 0;
+	p_ramrod->flow_id = 0;
+
 	p_ramrod->vport_id = abs_vport_id;
-	p_ramrod->filter_type = RFS_FILTER_TYPE;
 	p_ramrod->filter_action = b_is_add ? GFT_ADD_FILTER
 					   : GFT_DELETE_FILTER;
 
@@ -2140,3 +2162,108 @@ ecore_configure_rfs_ntuple_filter(struct ecore_hwfn *p_hwfn,
 
 	return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL);
 }
+
+int ecore_get_rxq_coalesce(struct ecore_hwfn *p_hwfn,
+			   struct ecore_ptt *p_ptt,
+			   struct ecore_queue_cid *p_cid,
+			   u16 *p_rx_coal)
+{
+	u32 coalesce, address, is_valid;
+	struct cau_sb_entry sb_entry;
+	u8 timer_res;
+	enum _ecore_status_t rc;
+
+	rc = ecore_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
+				 p_cid->sb_igu_id * sizeof(u64),
+				 (u64)(osal_uintptr_t)&sb_entry, 2, 0);
+	if (rc != ECORE_SUCCESS) {
+		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
+		return rc;
+	}
+
+	timer_res = GET_FIELD(sb_entry.params, CAU_SB_ENTRY_TIMER_RES0);
+
+	address = BAR0_MAP_REG_USDM_RAM +
+		  USTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id);
+	coalesce = ecore_rd(p_hwfn, p_ptt, address);
+
+	is_valid = GET_FIELD(coalesce, COALESCING_TIMESET_VALID);
+	if (!is_valid)
+		return ECORE_INVAL;
+
+	coalesce = GET_FIELD(coalesce, COALESCING_TIMESET_TIMESET);
+	*p_rx_coal = (u16)(coalesce << timer_res);
+
+	return ECORE_SUCCESS;
+}
+
+int ecore_get_txq_coalesce(struct ecore_hwfn *p_hwfn,
+			   struct ecore_ptt *p_ptt,
+			   struct ecore_queue_cid *p_cid,
+			   u16 *p_tx_coal)
+{
+	u32 coalesce, address, is_valid;
+	struct cau_sb_entry sb_entry;
+	u8 timer_res;
+	enum _ecore_status_t rc;
+
+	rc = ecore_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
+				 p_cid->sb_igu_id * sizeof(u64),
+				 (u64)(osal_uintptr_t)&sb_entry, 2, 0);
+	if (rc != ECORE_SUCCESS) {
+		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
+		return rc;
+	}
+
+	timer_res = GET_FIELD(sb_entry.params, CAU_SB_ENTRY_TIMER_RES1);
+
+	address = BAR0_MAP_REG_XSDM_RAM +
+		  XSTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id);
+	coalesce = ecore_rd(p_hwfn, p_ptt, address);
+
+	is_valid = GET_FIELD(coalesce, COALESCING_TIMESET_VALID);
+	if (!is_valid)
+		return ECORE_INVAL;
+
+	coalesce = GET_FIELD(coalesce, COALESCING_TIMESET_TIMESET);
+	*p_tx_coal = (u16)(coalesce << timer_res);
+
+	return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t
+ecore_get_queue_coalesce(struct ecore_hwfn *p_hwfn, u16 *p_coal,
+			 void *handle)
+{
+	struct ecore_queue_cid *p_cid = (struct ecore_queue_cid *)handle;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
+	struct ecore_ptt *p_ptt;
+
+	if (IS_VF(p_hwfn->p_dev)) {
+		rc = ecore_vf_pf_get_coalesce(p_hwfn, p_coal, p_cid);
+		if (rc != ECORE_SUCCESS)
+			DP_NOTICE(p_hwfn, false,
+				  "Unable to read queue calescing\n");
+
+		return rc;
+	}
+
+	p_ptt = ecore_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return ECORE_AGAIN;
+
+	if (p_cid->b_is_rx) {
+		rc = ecore_get_rxq_coalesce(p_hwfn, p_ptt, p_cid, p_coal);
+		if (rc != ECORE_SUCCESS)
+			goto out;
+	} else {
+		rc = ecore_get_txq_coalesce(p_hwfn, p_ptt, p_cid, p_coal);
+		if (rc != ECORE_SUCCESS)
+			goto out;
+	}
+
+out:
+	ecore_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
diff --git a/drivers/net/qede/base/ecore_l2.h b/drivers/net/qede/base/ecore_l2.h
index 7fe4cbcb..f4212cf2 100644
--- a/drivers/net/qede/base/ecore_l2.h
+++ b/drivers/net/qede/base/ecore_l2.h
@@ -18,7 +18,16 @@
 #define MAX_QUEUES_PER_QZONE	(sizeof(unsigned long) * 8)
 #define ECORE_QUEUE_CID_PF	(0xff)
 
-/* Additional parameters required for initialization of the queue_cid
+/* Almost identical to the ecore_queue_start_common_params,
+ * but here we maintain the SB index in IGU CAM.
+ */
+struct ecore_queue_cid_params {
+	u8 vport_id;
+	u16 queue_id;
+	u8 stats_id;
+};
+
+ /* Additional parameters required for initialization of the queue_cid
  * and are relevant only for a PF initializing one for its VFs.
  */
 struct ecore_queue_cid_vf_params {
@@ -34,7 +43,7 @@ struct ecore_queue_cid_vf_params {
 	 *  - Producers would be placed in a different place.
 	 *  - Makes assumptions regarding the CIDs.
 	 */
-	bool b_legacy;
+	u8 vf_legacy;
 
 	/* For VFs, this index arrives via TLV to diffrentiate between
 	 * different queues opened on the same qzone, and is passed
@@ -44,16 +53,19 @@ struct ecore_queue_cid_vf_params {
 };
 
 struct ecore_queue_cid {
-	/* 'Relative' is a relative term ;-). Usually the indices [not counting
-	 * SBs] would be PF-relative, but there are some cases where that isn't
-	 * the case - specifically for a PF configuring its VF indices it's
-	 * possible some fields [E.g., stats-id] in 'rel' would already be abs.
-	 */
-	struct ecore_queue_start_common_params rel;
-	struct ecore_queue_start_common_params abs;
+	/* For stats-id, the `rel' is actually absolute as well */
+	struct ecore_queue_cid_params rel;
+	struct ecore_queue_cid_params abs;
+
+	/* These have no 'relative' meaning */
+	u16 sb_igu_id;
+	u8 sb_idx;
+
 	u32 cid;
 	u16 opaque_fid;
 
+	bool b_is_rx;
+
 	/* VFs queues are mapped differently, so we need to know the
 	 * relative queue associated with them [0-based].
 	 * Notice this is relevant on the *PF* queue-cid of its VF's queues,
@@ -69,7 +81,9 @@ struct ecore_queue_cid {
 	u8 qid_usage_idx;
 
 	/* Legacy VFs might have Rx producer located elsewhere */
-	bool b_legacy_vf;
+	u8 vf_legacy;
+#define ECORE_QCID_LEGACY_VF_RX_PROD	(1 << 0)
+#define ECORE_QCID_LEGACY_VF_CID	(1 << 1)
 
 	struct ecore_hwfn *p_owner;
 };
@@ -84,6 +98,7 @@ void ecore_eth_queue_cid_release(struct ecore_hwfn *p_hwfn,
 struct ecore_queue_cid *
 ecore_eth_queue_to_cid(struct ecore_hwfn *p_hwfn, u16 opaque_fid,
 		       struct ecore_queue_start_common_params *p_params,
+		       bool b_is_rx,
 		       struct ecore_queue_cid_vf_params *p_vf_params);
 
 enum _ecore_status_t
@@ -129,31 +144,24 @@ ecore_eth_txq_start_ramrod(struct ecore_hwfn *p_hwfn,
 
 u8 ecore_mcast_bin_from_mac(u8 *mac);
 
-/**
- * @brief - ecore_configure_rfs_ntuple_filter
- *
- * This ramrod should be used to add or remove arfs hw filter
- *
- * @params p_hwfn
- * @params p_ptt
- * @params p_cb		Used for ECORE_SPQ_MODE_CB,where client would initialize
-			it with cookie and callback function address, if not
-			using this mode then client must pass NULL.
- * @params p_addr	p_addr is an actual packet header that needs to be
- *			filter. It has to mapped with IO to read prior to
- *			calling this, [contains 4 tuples- src ip, dest ip,
- *			src port, dest port].
- * @params length	length of p_addr header up to past the transport header.
- * @params qid		receive packet will be directed to this queue.
- * @params vport_id
- * @params b_is_add	flag to add or remove filter.
- *
- */
-enum _ecore_status_t
-ecore_configure_rfs_ntuple_filter(struct ecore_hwfn *p_hwfn,
-				  struct ecore_ptt *p_ptt,
-				  struct ecore_spq_comp_cb *p_cb,
-				  dma_addr_t p_addr, u16 length,
-				  u16 qid, u8 vport_id,
-				  bool b_is_add);
+enum _ecore_status_t ecore_set_rxq_coalesce(struct ecore_hwfn *p_hwfn,
+					    struct ecore_ptt *p_ptt,
+					    u16 coalesce,
+					    struct ecore_queue_cid *p_cid);
+
+enum _ecore_status_t ecore_set_txq_coalesce(struct ecore_hwfn *p_hwfn,
+					    struct ecore_ptt *p_ptt,
+					    u16 coalesce,
+					    struct ecore_queue_cid *p_cid);
+
+enum _ecore_status_t ecore_get_rxq_coalesce(struct ecore_hwfn *p_hwfn,
+					    struct ecore_ptt *p_ptt,
+					    struct ecore_queue_cid *p_cid,
+					    u16 *p_hw_coal);
+
+enum _ecore_status_t ecore_get_txq_coalesce(struct ecore_hwfn *p_hwfn,
+					    struct ecore_ptt *p_ptt,
+					    struct ecore_queue_cid *p_cid,
+					    u16 *p_hw_coal);
+
 #endif
diff --git a/drivers/net/qede/base/ecore_l2_api.h b/drivers/net/qede/base/ecore_l2_api.h
index d09f3c4a..ed9837bf 100644
--- a/drivers/net/qede/base/ecore_l2_api.h
+++ b/drivers/net/qede/base/ecore_l2_api.h
@@ -11,6 +11,7 @@
 
 #include "ecore_status.h"
 #include "ecore_sp_api.h"
+#include "ecore_int_api.h"
 
 #ifndef __EXTRACT__LINUX__
 enum ecore_rss_caps {
@@ -35,8 +36,7 @@ struct ecore_queue_start_common_params {
 	/* Relative, but relevant only for PFs */
 	u8 stats_id;
 
-	/* These are always absolute */
-	u16 sb;
+	struct ecore_sb_info *p_sb;
 	u8 sb_idx;
 };
 
@@ -436,4 +436,30 @@ void ecore_reset_vport_stats(struct ecore_dev *p_dev);
 void ecore_arfs_mode_configure(struct ecore_hwfn *p_hwfn,
 			       struct ecore_ptt *p_ptt,
 			       struct ecore_arfs_config_params *p_cfg_params);
+
+/**
+ * @brief - ecore_configure_rfs_ntuple_filter
+ *
+ * This ramrod should be used to add or remove arfs hw filter
+ *
+ * @params p_hwfn
+ * @params p_cb		Used for ECORE_SPQ_MODE_CB,where client would initialize
+ *			it with cookie and callback function address, if not
+ *			using this mode then client must pass NULL.
+ * @params p_addr	p_addr is an actual packet header that needs to be
+ *			filter. It has to mapped with IO to read prior to
+ *			calling this, [contains 4 tuples- src ip, dest ip,
+ *			src port, dest port].
+ * @params length	length of p_addr header up to past the transport header.
+ * @params qid		receive packet will be directed to this queue.
+ * @params vport_id
+ * @params b_is_add	flag to add or remove filter.
+ *
+ */
+enum _ecore_status_t
+ecore_configure_rfs_ntuple_filter(struct ecore_hwfn *p_hwfn,
+				  struct ecore_spq_comp_cb *p_cb,
+				  dma_addr_t p_addr, u16 length,
+				  u16 qid, u8 vport_id,
+				  bool b_is_add);
 #endif
diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c
index 88c5ceb0..8edd2e96 100644
--- a/drivers/net/qede/base/ecore_mcp.c
+++ b/drivers/net/qede/base/ecore_mcp.c
@@ -20,6 +20,8 @@
 #include "ecore_gtt_reg_addr.h"
 #include "ecore_iro.h"
 #include "ecore_dcbx.h"
+#include "ecore_sp_commands.h"
+#include "ecore_cxt.h"
 
 #define CHIP_MCP_RESP_ITER_US 10
 #define EMUL_MCP_RESP_ITER_US (1000 * 1000)
@@ -43,9 +45,9 @@
 		     OFFSETOF(struct public_drv_mb, _field))
 
 #define PDA_COMP (((FW_MAJOR_VERSION) + (FW_MINOR_VERSION << 8)) << \
-	DRV_ID_PDA_COMP_VER_SHIFT)
+	DRV_ID_PDA_COMP_VER_OFFSET)
 
-#define MCP_BYTES_PER_MBIT_SHIFT 17
+#define MCP_BYTES_PER_MBIT_OFFSET 17
 
 #ifndef ASIC_ONLY
 static int loaded;
@@ -96,13 +98,81 @@ void ecore_mcp_read_mb(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 	}
 }
 
+struct ecore_mcp_cmd_elem {
+	osal_list_entry_t list;
+	struct ecore_mcp_mb_params *p_mb_params;
+	u16 expected_seq_num;
+	bool b_is_completed;
+};
+
+/* Must be called while cmd_lock is acquired */
+static struct ecore_mcp_cmd_elem *
+ecore_mcp_cmd_add_elem(struct ecore_hwfn *p_hwfn,
+		       struct ecore_mcp_mb_params *p_mb_params,
+		       u16 expected_seq_num)
+{
+	struct ecore_mcp_cmd_elem *p_cmd_elem = OSAL_NULL;
+
+	p_cmd_elem = OSAL_ZALLOC(p_hwfn->p_dev, GFP_ATOMIC,
+				 sizeof(*p_cmd_elem));
+	if (!p_cmd_elem) {
+		DP_NOTICE(p_hwfn, false,
+			  "Failed to allocate `struct ecore_mcp_cmd_elem'\n");
+		goto out;
+	}
+
+	p_cmd_elem->p_mb_params = p_mb_params;
+	p_cmd_elem->expected_seq_num = expected_seq_num;
+	OSAL_LIST_PUSH_HEAD(&p_cmd_elem->list, &p_hwfn->mcp_info->cmd_list);
+out:
+	return p_cmd_elem;
+}
+
+/* Must be called while cmd_lock is acquired */
+static void ecore_mcp_cmd_del_elem(struct ecore_hwfn *p_hwfn,
+				   struct ecore_mcp_cmd_elem *p_cmd_elem)
+{
+	OSAL_LIST_REMOVE_ENTRY(&p_cmd_elem->list, &p_hwfn->mcp_info->cmd_list);
+	OSAL_FREE(p_hwfn->p_dev, p_cmd_elem);
+}
+
+/* Must be called while cmd_lock is acquired */
+static struct ecore_mcp_cmd_elem *
+ecore_mcp_cmd_get_elem(struct ecore_hwfn *p_hwfn, u16 seq_num)
+{
+	struct ecore_mcp_cmd_elem *p_cmd_elem = OSAL_NULL;
+
+	OSAL_LIST_FOR_EACH_ENTRY(p_cmd_elem, &p_hwfn->mcp_info->cmd_list, list,
+				 struct ecore_mcp_cmd_elem) {
+		if (p_cmd_elem->expected_seq_num == seq_num)
+			return p_cmd_elem;
+	}
+
+	return OSAL_NULL;
+}
+
 enum _ecore_status_t ecore_mcp_free(struct ecore_hwfn *p_hwfn)
 {
 	if (p_hwfn->mcp_info) {
+		struct ecore_mcp_cmd_elem *p_cmd_elem = OSAL_NULL, *p_tmp;
+
 		OSAL_FREE(p_hwfn->p_dev, p_hwfn->mcp_info->mfw_mb_cur);
 		OSAL_FREE(p_hwfn->p_dev, p_hwfn->mcp_info->mfw_mb_shadow);
-		OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->mcp_info->lock);
+
+		OSAL_SPIN_LOCK(&p_hwfn->mcp_info->cmd_lock);
+		OSAL_LIST_FOR_EACH_ENTRY_SAFE(p_cmd_elem, p_tmp,
+					      &p_hwfn->mcp_info->cmd_list, list,
+					      struct ecore_mcp_cmd_elem) {
+			ecore_mcp_cmd_del_elem(p_hwfn, p_cmd_elem);
+		}
+		OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->cmd_lock);
+
+#ifdef CONFIG_ECORE_LOCK_ALLOC
+		OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->mcp_info->cmd_lock);
+		OSAL_SPIN_LOCK_DEALLOC(&p_hwfn->mcp_info->link_lock);
+#endif
 	}
+
 	OSAL_FREE(p_hwfn->p_dev, p_hwfn->mcp_info);
 
 	return ECORE_SUCCESS;
@@ -157,8 +227,7 @@ static enum _ecore_status_t ecore_load_mcp_offsets(struct ecore_hwfn *p_hwfn,
 	p_info->drv_pulse_seq = DRV_MB_RD(p_hwfn, p_ptt, drv_pulse_mb) &
 	    DRV_PULSE_SEQ_MASK;
 
-	p_info->mcp_hist = (u16)ecore_rd(p_hwfn, p_ptt,
-					  MISCS_REG_GENERIC_POR_0);
+	p_info->mcp_hist = ecore_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0);
 
 	return ECORE_SUCCESS;
 }
@@ -176,6 +245,16 @@ enum _ecore_status_t ecore_mcp_cmd_init(struct ecore_hwfn *p_hwfn,
 		goto err;
 	p_info = p_hwfn->mcp_info;
 
+	/* Initialize the MFW spinlocks */
+#ifdef CONFIG_ECORE_LOCK_ALLOC
+	OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_info->cmd_lock);
+	OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_info->link_lock);
+#endif
+	OSAL_SPIN_LOCK_INIT(&p_info->cmd_lock);
+	OSAL_SPIN_LOCK_INIT(&p_info->link_lock);
+
+	OSAL_LIST_INIT(&p_info->cmd_list);
+
 	if (ecore_load_mcp_offsets(p_hwfn, p_ptt) != ECORE_SUCCESS) {
 		DP_NOTICE(p_hwfn, false, "MCP is not initialized\n");
 		/* Do not free mcp_info here, since public_base indicate that
@@ -190,10 +269,6 @@ enum _ecore_status_t ecore_mcp_cmd_init(struct ecore_hwfn *p_hwfn,
 	if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
 		goto err;
 
-	/* Initialize the MFW spinlock */
-	OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_info->lock);
-	OSAL_SPIN_LOCK_INIT(&p_info->lock);
-
 	return ECORE_SUCCESS;
 
 err:
@@ -202,58 +277,28 @@ err:
 	return ECORE_NOMEM;
 }
 
-/* Locks the MFW mailbox of a PF to ensure a single access.
- * The lock is achieved in most cases by holding a spinlock, causing other
- * threads to wait till a previous access is done.
- * In some cases (currently when a [UN]LOAD_REQ commands are sent), the single
- * access is achieved by setting a blocking flag, which will fail other
- * competing contexts to send their mailboxes.
- */
-static enum _ecore_status_t ecore_mcp_mb_lock(struct ecore_hwfn *p_hwfn,
-					      u32 cmd)
-{
-	OSAL_SPIN_LOCK(&p_hwfn->mcp_info->lock);
-
-	/* The spinlock shouldn't be acquired when the mailbox command is
-	 * [UN]LOAD_REQ, since the engine is locked by the MFW, and a parallel
-	 * pending [UN]LOAD_REQ command of another PF together with a spinlock
-	 * (i.e. interrupts are disabled) - can lead to a deadlock.
-	 * It is assumed that for a single PF, no other mailbox commands can be
-	 * sent from another context while sending LOAD_REQ, and that any
-	 * parallel commands to UNLOAD_REQ can be cancelled.
-	 */
-	if (cmd == DRV_MSG_CODE_LOAD_DONE || cmd == DRV_MSG_CODE_UNLOAD_DONE)
-		p_hwfn->mcp_info->block_mb_sending = false;
+static void ecore_mcp_reread_offsets(struct ecore_hwfn *p_hwfn,
+				     struct ecore_ptt *p_ptt)
+{
+	u32 generic_por_0 = ecore_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0);
 
-	if (p_hwfn->mcp_info->block_mb_sending) {
-		DP_NOTICE(p_hwfn, false,
-			  "Trying to send a MFW mailbox command [0x%x]"
-			  " in parallel to [UN]LOAD_REQ. Aborting.\n",
-			  cmd);
-		OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->lock);
-		return ECORE_BUSY;
-	}
+	/* Use MCP history register to check if MCP reset occurred between init
+	 * time and now.
+	 */
+	if (p_hwfn->mcp_info->mcp_hist != generic_por_0) {
+		DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
+			   "Rereading MCP offsets [mcp_hist 0x%08x, generic_por_0 0x%08x]\n",
+			   p_hwfn->mcp_info->mcp_hist, generic_por_0);
 
-	if (cmd == DRV_MSG_CODE_LOAD_REQ || cmd == DRV_MSG_CODE_UNLOAD_REQ) {
-		p_hwfn->mcp_info->block_mb_sending = true;
-		OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->lock);
+		ecore_load_mcp_offsets(p_hwfn, p_ptt);
+		ecore_mcp_cmd_port_init(p_hwfn, p_ptt);
 	}
-
-	return ECORE_SUCCESS;
-}
-
-static void ecore_mcp_mb_unlock(struct ecore_hwfn *p_hwfn, u32 cmd)
-{
-	if (cmd != DRV_MSG_CODE_LOAD_REQ && cmd != DRV_MSG_CODE_UNLOAD_REQ)
-		OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->lock);
 }
 
 enum _ecore_status_t ecore_mcp_reset(struct ecore_hwfn *p_hwfn,
 				     struct ecore_ptt *p_ptt)
 {
-	u32 seq = ++p_hwfn->mcp_info->drv_mb_seq;
-	u32 delay = CHIP_MCP_RESP_ITER_US;
-	u32 org_mcp_reset_seq, cnt = 0;
+	u32 org_mcp_reset_seq, seq, delay = CHIP_MCP_RESP_ITER_US, cnt = 0;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 
 #ifndef ASIC_ONLY
@@ -261,15 +306,20 @@ enum _ecore_status_t ecore_mcp_reset(struct ecore_hwfn *p_hwfn,
 		delay = EMUL_MCP_RESP_ITER_US;
 #endif
 
-	/* Ensure that only a single thread is accessing the mailbox at a
-	 * certain time.
-	 */
-	rc = ecore_mcp_mb_lock(p_hwfn, DRV_MSG_CODE_MCP_RESET);
-	if (rc != ECORE_SUCCESS)
-		return rc;
+	if (p_hwfn->mcp_info->b_block_cmd) {
+		DP_NOTICE(p_hwfn, false,
+			  "The MFW is not responsive. Avoid sending MCP_RESET mailbox command.\n");
+		return ECORE_ABORTED;
+	}
+
+	/* Ensure that only a single thread is accessing the mailbox */
+	OSAL_SPIN_LOCK(&p_hwfn->mcp_info->cmd_lock);
 
-	/* Set drv command along with the updated sequence */
 	org_mcp_reset_seq = ecore_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0);
+
+	/* Set drv command along with the updated sequence */
+	ecore_mcp_reread_offsets(p_hwfn, p_ptt);
+	seq = ++p_hwfn->mcp_info->drv_mb_seq;
 	DRV_MB_WR(p_hwfn, p_ptt, drv_mb_header, (DRV_MSG_CODE_MCP_RESET | seq));
 
 	do {
@@ -289,73 +339,238 @@ enum _ecore_status_t ecore_mcp_reset(struct ecore_hwfn *p_hwfn,
 		rc = ECORE_AGAIN;
 	}
 
-	ecore_mcp_mb_unlock(p_hwfn, DRV_MSG_CODE_MCP_RESET);
+	OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->cmd_lock);
 
 	return rc;
 }
 
-static enum _ecore_status_t ecore_do_mcp_cmd(struct ecore_hwfn *p_hwfn,
-					     struct ecore_ptt *p_ptt,
-					     u32 cmd, u32 param,
-					     u32 *o_mcp_resp,
-					     u32 *o_mcp_param)
+/* Must be called while cmd_lock is acquired */
+static bool ecore_mcp_has_pending_cmd(struct ecore_hwfn *p_hwfn)
 {
-	u32 delay = CHIP_MCP_RESP_ITER_US;
-	u32 max_retries = ECORE_DRV_MB_MAX_RETRIES;
-	u32 seq, cnt = 1, actual_mb_seq;
+	struct ecore_mcp_cmd_elem *p_cmd_elem = OSAL_NULL;
+
+	/* There is at most one pending command at a certain time, and if it
+	 * exists - it is placed at the HEAD of the list.
+	 */
+	if (!OSAL_LIST_IS_EMPTY(&p_hwfn->mcp_info->cmd_list)) {
+		p_cmd_elem = OSAL_LIST_FIRST_ENTRY(&p_hwfn->mcp_info->cmd_list,
+						   struct ecore_mcp_cmd_elem,
+						   list);
+		return !p_cmd_elem->b_is_completed;
+	}
+
+	return false;
+}
+
+/* Must be called while cmd_lock is acquired */
+static enum _ecore_status_t
+ecore_mcp_update_pending_cmd(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
+{
+	struct ecore_mcp_mb_params *p_mb_params;
+	struct ecore_mcp_cmd_elem *p_cmd_elem;
+	u32 mcp_resp;
+	u16 seq_num;
+
+	mcp_resp = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_header);
+	seq_num = (u16)(mcp_resp & FW_MSG_SEQ_NUMBER_MASK);
+
+	/* Return if no new non-handled response has been received */
+	if (seq_num != p_hwfn->mcp_info->drv_mb_seq)
+		return ECORE_AGAIN;
+
+	p_cmd_elem = ecore_mcp_cmd_get_elem(p_hwfn, seq_num);
+	if (!p_cmd_elem) {
+		DP_ERR(p_hwfn,
+		       "Failed to find a pending mailbox cmd that expects sequence number %d\n",
+		       seq_num);
+		return ECORE_UNKNOWN_ERROR;
+	}
+
+	p_mb_params = p_cmd_elem->p_mb_params;
+
+	/* Get the MFW response along with the sequence number */
+	p_mb_params->mcp_resp = mcp_resp;
+
+	/* Get the MFW param */
+	p_mb_params->mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param);
+
+	/* Get the union data */
+	if (p_mb_params->p_data_dst != OSAL_NULL &&
+	    p_mb_params->data_dst_size) {
+		u32 union_data_addr = p_hwfn->mcp_info->drv_mb_addr +
+				      OFFSETOF(struct public_drv_mb,
+					       union_data);
+		ecore_memcpy_from(p_hwfn, p_ptt, p_mb_params->p_data_dst,
+				  union_data_addr, p_mb_params->data_dst_size);
+	}
+
+	p_cmd_elem->b_is_completed = true;
+
+	return ECORE_SUCCESS;
+}
+
+/* Must be called while cmd_lock is acquired */
+static void __ecore_mcp_cmd_and_union(struct ecore_hwfn *p_hwfn,
+				      struct ecore_ptt *p_ptt,
+				      struct ecore_mcp_mb_params *p_mb_params,
+				      u16 seq_num)
+{
+	union drv_union_data union_data;
+	u32 union_data_addr;
+
+	/* Set the union data */
+	union_data_addr = p_hwfn->mcp_info->drv_mb_addr +
+			  OFFSETOF(struct public_drv_mb, union_data);
+	OSAL_MEM_ZERO(&union_data, sizeof(union_data));
+	if (p_mb_params->p_data_src != OSAL_NULL && p_mb_params->data_src_size)
+		OSAL_MEMCPY(&union_data, p_mb_params->p_data_src,
+			    p_mb_params->data_src_size);
+	ecore_memcpy_to(p_hwfn, p_ptt, union_data_addr, &union_data,
+			sizeof(union_data));
+
+	/* Set the drv param */
+	DRV_MB_WR(p_hwfn, p_ptt, drv_mb_param, p_mb_params->param);
+
+	/* Set the drv command along with the sequence number */
+	DRV_MB_WR(p_hwfn, p_ptt, drv_mb_header, (p_mb_params->cmd | seq_num));
+
+	DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
+		   "MFW mailbox: command 0x%08x param 0x%08x\n",
+		   (p_mb_params->cmd | seq_num), p_mb_params->param);
+}
+
+static void ecore_mcp_cmd_set_blocking(struct ecore_hwfn *p_hwfn,
+				       bool block_cmd)
+{
+	p_hwfn->mcp_info->b_block_cmd = block_cmd;
+
+	DP_INFO(p_hwfn, "%s sending of mailbox commands to the MFW\n",
+		block_cmd ? "Block" : "Unblock");
+}
+
+void ecore_mcp_print_cpu_info(struct ecore_hwfn *p_hwfn,
+			      struct ecore_ptt *p_ptt)
+{
+	u32 cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2;
+
+	cpu_mode = ecore_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+	cpu_state = ecore_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
+	cpu_pc_0 = ecore_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+	OSAL_UDELAY(CHIP_MCP_RESP_ITER_US);
+	cpu_pc_1 = ecore_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+	OSAL_UDELAY(CHIP_MCP_RESP_ITER_US);
+	cpu_pc_2 = ecore_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+
+	DP_NOTICE(p_hwfn, false,
+		  "MCP CPU info: mode 0x%08x, state 0x%08x, pc {0x%08x, 0x%08x, 0x%08x}\n",
+		  cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2);
+}
+
+static enum _ecore_status_t
+_ecore_mcp_cmd_and_union(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			 struct ecore_mcp_mb_params *p_mb_params,
+			 u32 max_retries, u32 delay)
+{
+	struct ecore_mcp_cmd_elem *p_cmd_elem;
+	u32 cnt = 0;
+	u16 seq_num;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 
-#ifndef ASIC_ONLY
-	if (CHIP_REV_IS_EMUL(p_hwfn->p_dev))
-		delay = EMUL_MCP_RESP_ITER_US;
-	/* There is a built-in delay of 100usec in each MFW response read */
-	if (CHIP_REV_IS_FPGA(p_hwfn->p_dev))
-		max_retries /= 10;
-#endif
+	/* Wait until the mailbox is non-occupied */
+	do {
+		/* Exit the loop if there is no pending command, or if the
+		 * pending command is completed during this iteration.
+		 * The spinlock stays locked until the command is sent.
+		 */
 
-	/* Get actual driver mailbox sequence */
-	actual_mb_seq = DRV_MB_RD(p_hwfn, p_ptt, drv_mb_header) &
-	    DRV_MSG_SEQ_NUMBER_MASK;
+		OSAL_SPIN_LOCK(&p_hwfn->mcp_info->cmd_lock);
 
-	/* Use MCP history register to check if MCP reset occurred between
-	 * init time and now.
-	 */
-	if (p_hwfn->mcp_info->mcp_hist !=
-	    ecore_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0)) {
-		DP_VERBOSE(p_hwfn, ECORE_MSG_SP, "Rereading MCP offsets\n");
-		ecore_load_mcp_offsets(p_hwfn, p_ptt);
-		ecore_mcp_cmd_port_init(p_hwfn, p_ptt);
+		if (!ecore_mcp_has_pending_cmd(p_hwfn))
+			break;
+
+		rc = ecore_mcp_update_pending_cmd(p_hwfn, p_ptt);
+		if (rc == ECORE_SUCCESS)
+			break;
+		else if (rc != ECORE_AGAIN)
+			goto err;
+
+		OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->cmd_lock);
+		OSAL_UDELAY(delay);
+		OSAL_MFW_CMD_PREEMPT(p_hwfn);
+	} while (++cnt < max_retries);
+
+	if (cnt >= max_retries) {
+		DP_NOTICE(p_hwfn, false,
+			  "The MFW mailbox is occupied by an uncompleted command. Failed to send command 0x%08x [param 0x%08x].\n",
+			  p_mb_params->cmd, p_mb_params->param);
+		return ECORE_AGAIN;
 	}
-	seq = ++p_hwfn->mcp_info->drv_mb_seq;
 
-	/* Set drv param */
-	DRV_MB_WR(p_hwfn, p_ptt, drv_mb_param, param);
+	/* Send the mailbox command */
+	ecore_mcp_reread_offsets(p_hwfn, p_ptt);
+	seq_num = ++p_hwfn->mcp_info->drv_mb_seq;
+	p_cmd_elem = ecore_mcp_cmd_add_elem(p_hwfn, p_mb_params, seq_num);
+	if (!p_cmd_elem) {
+		rc = ECORE_NOMEM;
+		goto err;
+	}
 
-	/* Set drv command along with the updated sequence */
-	DRV_MB_WR(p_hwfn, p_ptt, drv_mb_header, (cmd | seq));
+	__ecore_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, seq_num);
+	OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->cmd_lock);
 
+	/* Wait for the MFW response */
 	do {
-		/* Wait for MFW response */
+		/* Exit the loop if the command is already completed, or if the
+		 * command is completed during this iteration.
+		 * The spinlock stays locked until the list element is removed.
+		 */
+
 		OSAL_UDELAY(delay);
-		*o_mcp_resp = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_header);
+		OSAL_SPIN_LOCK(&p_hwfn->mcp_info->cmd_lock);
 
-		/* Give the FW up to 5 second (500*10ms) */
-	} while ((seq != (*o_mcp_resp & FW_MSG_SEQ_NUMBER_MASK)) &&
-		 (cnt++ < max_retries));
+		if (p_cmd_elem->b_is_completed)
+			break;
 
-	/* Is this a reply to our command? */
-	if (seq == (*o_mcp_resp & FW_MSG_SEQ_NUMBER_MASK)) {
-		*o_mcp_resp &= FW_MSG_CODE_MASK;
-		/* Get the MCP param */
-		*o_mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param);
-	} else {
-		/* FW BUG! */
-		DP_ERR(p_hwfn, "MFW failed to respond [cmd 0x%x param 0x%x]\n",
-		       cmd, param);
-		*o_mcp_resp = 0;
-		rc = ECORE_AGAIN;
+		rc = ecore_mcp_update_pending_cmd(p_hwfn, p_ptt);
+		if (rc == ECORE_SUCCESS)
+			break;
+		else if (rc != ECORE_AGAIN)
+			goto err;
+
+		OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->cmd_lock);
+		OSAL_MFW_CMD_PREEMPT(p_hwfn);
+	} while (++cnt < max_retries);
+
+	if (cnt >= max_retries) {
+		DP_NOTICE(p_hwfn, false,
+			  "The MFW failed to respond to command 0x%08x [param 0x%08x].\n",
+			  p_mb_params->cmd, p_mb_params->param);
+		ecore_mcp_print_cpu_info(p_hwfn, p_ptt);
+
+		OSAL_SPIN_LOCK(&p_hwfn->mcp_info->cmd_lock);
+		ecore_mcp_cmd_del_elem(p_hwfn, p_cmd_elem);
+		OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->cmd_lock);
+
+		ecore_mcp_cmd_set_blocking(p_hwfn, true);
 		ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_MFW_RESP_FAIL);
+		return ECORE_AGAIN;
 	}
+
+	ecore_mcp_cmd_del_elem(p_hwfn, p_cmd_elem);
+	OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->cmd_lock);
+
+	DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
+		   "MFW mailbox: response 0x%08x param 0x%08x [after %d.%03d ms]\n",
+		   p_mb_params->mcp_resp, p_mb_params->mcp_param,
+		   (cnt * delay) / 1000, (cnt * delay) % 1000);
+
+	/* Clear the sequence number from the MFW response */
+	p_mb_params->mcp_resp &= FW_MSG_CODE_MASK;
+
+	return ECORE_SUCCESS;
+
+err:
+	OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->cmd_lock);
 	return rc;
 }
 
@@ -364,9 +579,17 @@ ecore_mcp_cmd_and_union(struct ecore_hwfn *p_hwfn,
 			struct ecore_ptt *p_ptt,
 			struct ecore_mcp_mb_params *p_mb_params)
 {
-	union drv_union_data union_data;
-	u32 union_data_addr;
-	enum _ecore_status_t rc;
+	osal_size_t union_data_size = sizeof(union drv_union_data);
+	u32 max_retries = ECORE_DRV_MB_MAX_RETRIES;
+	u32 delay = CHIP_MCP_RESP_ITER_US;
+
+#ifndef ASIC_ONLY
+	if (CHIP_REV_IS_EMUL(p_hwfn->p_dev))
+		delay = EMUL_MCP_RESP_ITER_US;
+	/* There is a built-in delay of 100usec in each MFW response read */
+	if (CHIP_REV_IS_FPGA(p_hwfn->p_dev))
+		max_retries /= 10;
+#endif
 
 	/* MCP not initialized */
 	if (!ecore_mcp_is_init(p_hwfn)) {
@@ -374,44 +597,24 @@ ecore_mcp_cmd_and_union(struct ecore_hwfn *p_hwfn,
 		return ECORE_BUSY;
 	}
 
-	if (p_mb_params->data_src_size > sizeof(union_data) ||
-	    p_mb_params->data_dst_size > sizeof(union_data)) {
+	if (p_mb_params->data_src_size > union_data_size ||
+	    p_mb_params->data_dst_size > union_data_size) {
 		DP_ERR(p_hwfn,
 		       "The provided size is larger than the union data size [src_size %u, dst_size %u, union_data_size %zu]\n",
 		       p_mb_params->data_src_size, p_mb_params->data_dst_size,
-		       sizeof(union_data));
+		       union_data_size);
 		return ECORE_INVAL;
 	}
 
-	union_data_addr = p_hwfn->mcp_info->drv_mb_addr +
-			  OFFSETOF(struct public_drv_mb, union_data);
-
-	/* Ensure that only a single thread is accessing the mailbox at a
-	 * certain time.
-	 */
-	rc = ecore_mcp_mb_lock(p_hwfn, p_mb_params->cmd);
-	if (rc != ECORE_SUCCESS)
-		return rc;
-
-	OSAL_MEM_ZERO(&union_data, sizeof(union_data));
-	if (p_mb_params->p_data_src != OSAL_NULL && p_mb_params->data_src_size)
-		OSAL_MEMCPY(&union_data, p_mb_params->p_data_src,
-			    p_mb_params->data_src_size);
-	ecore_memcpy_to(p_hwfn, p_ptt, union_data_addr, &union_data,
-			sizeof(union_data));
-
-	rc = ecore_do_mcp_cmd(p_hwfn, p_ptt, p_mb_params->cmd,
-			      p_mb_params->param, &p_mb_params->mcp_resp,
-			      &p_mb_params->mcp_param);
-
-	if (p_mb_params->p_data_dst != OSAL_NULL &&
-	    p_mb_params->data_dst_size)
-		ecore_memcpy_from(p_hwfn, p_ptt, p_mb_params->p_data_dst,
-				  union_data_addr, p_mb_params->data_dst_size);
-
-	ecore_mcp_mb_unlock(p_hwfn, p_mb_params->cmd);
+	if (p_hwfn->mcp_info->b_block_cmd) {
+		DP_NOTICE(p_hwfn, false,
+			  "The MFW is not responsive. Avoid sending mailbox command 0x%08x [param 0x%08x].\n",
+			  p_mb_params->cmd, p_mb_params->param);
+		return ECORE_ABORTED;
+	}
 
-	return rc;
+	return _ecore_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, max_retries,
+					delay);
 }
 
 enum _ecore_status_t ecore_mcp_cmd(struct ecore_hwfn *p_hwfn,
@@ -520,7 +723,7 @@ static void ecore_mcp_mf_workaround(struct ecore_hwfn *p_hwfn,
 		load_phase = FW_MSG_CODE_DRV_LOAD_FUNCTION;
 
 	/* On CMT, always tell that it's engine */
-	if (p_hwfn->p_dev->num_hwfns > 1)
+	if (ECORE_IS_CMT(p_hwfn->p_dev))
 		load_phase = FW_MSG_CODE_DRV_LOAD_ENGINE;
 
 	*p_load_code = load_phase;
@@ -534,11 +737,28 @@ static void ecore_mcp_mf_workaround(struct ecore_hwfn *p_hwfn,
 }
 #endif
 
-static bool ecore_mcp_can_force_load(u8 drv_role, u8 exist_drv_role)
+static bool
+ecore_mcp_can_force_load(u8 drv_role, u8 exist_drv_role,
+			 enum ecore_override_force_load override_force_load)
 {
-	return (drv_role == DRV_ROLE_OS &&
-		exist_drv_role == DRV_ROLE_PREBOOT) ||
-	       (drv_role == DRV_ROLE_KDUMP && exist_drv_role == DRV_ROLE_OS);
+	bool can_force_load = false;
+
+	switch (override_force_load) {
+	case ECORE_OVERRIDE_FORCE_LOAD_ALWAYS:
+		can_force_load = true;
+		break;
+	case ECORE_OVERRIDE_FORCE_LOAD_NEVER:
+		can_force_load = false;
+		break;
+	default:
+		can_force_load = (drv_role == DRV_ROLE_OS &&
+				  exist_drv_role == DRV_ROLE_PREBOOT) ||
+				 (drv_role == DRV_ROLE_KDUMP &&
+				  exist_drv_role == DRV_ROLE_OS);
+		break;
+	}
+
+	return can_force_load;
 }
 
 static enum _ecore_status_t ecore_mcp_cancel_load_req(struct ecore_hwfn *p_hwfn,
@@ -631,18 +851,16 @@ __ecore_mcp_load_req(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	load_req.drv_ver_0 = p_in_params->drv_ver_0;
 	load_req.drv_ver_1 = p_in_params->drv_ver_1;
 	load_req.fw_ver = p_in_params->fw_ver;
-	ECORE_MFW_SET_FIELD(load_req.misc0, LOAD_REQ_ROLE,
-			    p_in_params->drv_role);
-	ECORE_MFW_SET_FIELD(load_req.misc0, LOAD_REQ_LOCK_TO,
-			    p_in_params->timeout_val);
-	ECORE_MFW_SET_FIELD(load_req.misc0, LOAD_REQ_FORCE,
-			    p_in_params->force_cmd);
-	ECORE_MFW_SET_FIELD(load_req.misc0, LOAD_REQ_FLAGS0,
-			    p_in_params->avoid_eng_reset);
+	SET_MFW_FIELD(load_req.misc0, LOAD_REQ_ROLE, p_in_params->drv_role);
+	SET_MFW_FIELD(load_req.misc0, LOAD_REQ_LOCK_TO,
+		      p_in_params->timeout_val);
+	SET_MFW_FIELD(load_req.misc0, LOAD_REQ_FORCE, p_in_params->force_cmd);
+	SET_MFW_FIELD(load_req.misc0, LOAD_REQ_FLAGS0,
+		      p_in_params->avoid_eng_reset);
 
 	hsi_ver = (p_in_params->hsi_ver == ECORE_LOAD_REQ_HSI_VER_DEFAULT) ?
 		  DRV_ID_MCP_HSI_VER_CURRENT :
-		  (p_in_params->hsi_ver << DRV_ID_MCP_HSI_VER_SHIFT);
+		  (p_in_params->hsi_ver << DRV_ID_MCP_HSI_VER_OFFSET);
 
 	OSAL_MEM_ZERO(&mb_params, sizeof(mb_params));
 	mb_params.cmd = DRV_MSG_CODE_LOAD_REQ;
@@ -655,22 +873,20 @@ __ecore_mcp_load_req(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
 		   "Load Request: param 0x%08x [init_hw %d, drv_type %d, hsi_ver %d, pda 0x%04x]\n",
 		   mb_params.param,
-		   ECORE_MFW_GET_FIELD(mb_params.param, DRV_ID_DRV_INIT_HW),
-		   ECORE_MFW_GET_FIELD(mb_params.param, DRV_ID_DRV_TYPE),
-		   ECORE_MFW_GET_FIELD(mb_params.param, DRV_ID_MCP_HSI_VER),
-		   ECORE_MFW_GET_FIELD(mb_params.param, DRV_ID_PDA_COMP_VER));
+		   GET_MFW_FIELD(mb_params.param, DRV_ID_DRV_INIT_HW),
+		   GET_MFW_FIELD(mb_params.param, DRV_ID_DRV_TYPE),
+		   GET_MFW_FIELD(mb_params.param, DRV_ID_MCP_HSI_VER),
+		   GET_MFW_FIELD(mb_params.param, DRV_ID_PDA_COMP_VER));
 
 	if (p_in_params->hsi_ver != ECORE_LOAD_REQ_HSI_VER_1)
 		DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
 			   "Load Request: drv_ver 0x%08x_0x%08x, fw_ver 0x%08x, misc0 0x%08x [role %d, timeout %d, force %d, flags0 0x%x]\n",
 			   load_req.drv_ver_0, load_req.drv_ver_1,
 			   load_req.fw_ver, load_req.misc0,
-			   ECORE_MFW_GET_FIELD(load_req.misc0, LOAD_REQ_ROLE),
-			   ECORE_MFW_GET_FIELD(load_req.misc0,
-					       LOAD_REQ_LOCK_TO),
-			   ECORE_MFW_GET_FIELD(load_req.misc0, LOAD_REQ_FORCE),
-			   ECORE_MFW_GET_FIELD(load_req.misc0,
-					       LOAD_REQ_FLAGS0));
+			   GET_MFW_FIELD(load_req.misc0, LOAD_REQ_ROLE),
+			   GET_MFW_FIELD(load_req.misc0, LOAD_REQ_LOCK_TO),
+			   GET_MFW_FIELD(load_req.misc0, LOAD_REQ_FORCE),
+			   GET_MFW_FIELD(load_req.misc0, LOAD_REQ_FLAGS0));
 
 	rc = ecore_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
 	if (rc != ECORE_SUCCESS) {
@@ -689,29 +905,27 @@ __ecore_mcp_load_req(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 			   "Load Response: exist_drv_ver 0x%08x_0x%08x, exist_fw_ver 0x%08x, misc0 0x%08x [exist_role %d, mfw_hsi %d, flags0 0x%x]\n",
 			   load_rsp.drv_ver_0, load_rsp.drv_ver_1,
 			   load_rsp.fw_ver, load_rsp.misc0,
-			   ECORE_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_ROLE),
-			   ECORE_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_HSI),
-			   ECORE_MFW_GET_FIELD(load_rsp.misc0,
-					       LOAD_RSP_FLAGS0));
+			   GET_MFW_FIELD(load_rsp.misc0, LOAD_RSP_ROLE),
+			   GET_MFW_FIELD(load_rsp.misc0, LOAD_RSP_HSI),
+			   GET_MFW_FIELD(load_rsp.misc0, LOAD_RSP_FLAGS0));
 
 		p_out_params->exist_drv_ver_0 = load_rsp.drv_ver_0;
 		p_out_params->exist_drv_ver_1 = load_rsp.drv_ver_1;
 		p_out_params->exist_fw_ver = load_rsp.fw_ver;
 		p_out_params->exist_drv_role =
-			ECORE_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_ROLE);
+			GET_MFW_FIELD(load_rsp.misc0, LOAD_RSP_ROLE);
 		p_out_params->mfw_hsi_ver =
-			ECORE_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_HSI);
+			GET_MFW_FIELD(load_rsp.misc0, LOAD_RSP_HSI);
 		p_out_params->drv_exists =
-			ECORE_MFW_GET_FIELD(load_rsp.misc0, LOAD_RSP_FLAGS0) &
+			GET_MFW_FIELD(load_rsp.misc0, LOAD_RSP_FLAGS0) &
 			LOAD_RSP_FLAGS0_DRV_EXISTS;
 	}
 
 	return ECORE_SUCCESS;
 }
 
-static enum _ecore_status_t eocre_get_mfw_drv_role(struct ecore_hwfn *p_hwfn,
-						   enum ecore_drv_role drv_role,
-						   u8 *p_mfw_drv_role)
+static void ecore_get_mfw_drv_role(enum ecore_drv_role drv_role,
+				   u8 *p_mfw_drv_role)
 {
 	switch (drv_role) {
 	case ECORE_DRV_ROLE_OS:
@@ -720,12 +934,7 @@ static enum _ecore_status_t eocre_get_mfw_drv_role(struct ecore_hwfn *p_hwfn,
 	case ECORE_DRV_ROLE_KDUMP:
 		*p_mfw_drv_role = DRV_ROLE_KDUMP;
 		break;
-	default:
-		DP_ERR(p_hwfn, "Unexpected driver role %d\n", drv_role);
-		return ECORE_INVAL;
 	}
-
-	return ECORE_SUCCESS;
 }
 
 enum ecore_load_req_force {
@@ -734,10 +943,8 @@ enum ecore_load_req_force {
 	ECORE_LOAD_REQ_FORCE_ALL,
 };
 
-static enum _ecore_status_t
-ecore_get_mfw_force_cmd(struct ecore_hwfn *p_hwfn,
-			enum ecore_load_req_force force_cmd,
-			u8 *p_mfw_force_cmd)
+static void ecore_get_mfw_force_cmd(enum ecore_load_req_force force_cmd,
+				    u8 *p_mfw_force_cmd)
 {
 	switch (force_cmd) {
 	case ECORE_LOAD_REQ_FORCE_NONE:
@@ -749,12 +956,7 @@ ecore_get_mfw_force_cmd(struct ecore_hwfn *p_hwfn,
 	case ECORE_LOAD_REQ_FORCE_ALL:
 		*p_mfw_force_cmd = LOAD_REQ_FORCE_ALL;
 		break;
-	default:
-		DP_ERR(p_hwfn, "Unexpected force value %d\n", force_cmd);
-		return ECORE_INVAL;
 	}
-
-	return ECORE_SUCCESS;
 }
 
 enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
@@ -763,7 +965,7 @@ enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
 {
 	struct ecore_load_req_out_params out_params;
 	struct ecore_load_req_in_params in_params;
-	u8 mfw_drv_role, mfw_force_cmd;
+	u8 mfw_drv_role = 0, mfw_force_cmd;
 	enum _ecore_status_t rc;
 
 #ifndef ASIC_ONLY
@@ -778,17 +980,10 @@ enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
 	in_params.drv_ver_0 = ECORE_VERSION;
 	in_params.drv_ver_1 = ecore_get_config_bitmap();
 	in_params.fw_ver = STORM_FW_VERSION;
-	rc = eocre_get_mfw_drv_role(p_hwfn, p_params->drv_role, &mfw_drv_role);
-	if (rc != ECORE_SUCCESS)
-		return rc;
-
+	ecore_get_mfw_drv_role(p_params->drv_role, &mfw_drv_role);
 	in_params.drv_role = mfw_drv_role;
 	in_params.timeout_val = p_params->timeout_val;
-	rc = ecore_get_mfw_force_cmd(p_hwfn, ECORE_LOAD_REQ_FORCE_NONE,
-				     &mfw_force_cmd);
-	if (rc != ECORE_SUCCESS)
-		return rc;
-
+	ecore_get_mfw_force_cmd(ECORE_LOAD_REQ_FORCE_NONE, &mfw_force_cmd);
 	in_params.force_cmd = mfw_force_cmd;
 	in_params.avoid_eng_reset = p_params->avoid_eng_reset;
 
@@ -805,9 +1000,6 @@ enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
 		DP_INFO(p_hwfn,
 			"MFW refused a load request due to HSI > 1. Resending with HSI = 1.\n");
 
-		/* The previous load request set the mailbox blocking */
-		p_hwfn->mcp_info->block_mb_sending = false;
-
 		in_params.hsi_ver = ECORE_LOAD_REQ_HSI_VER_1;
 		OSAL_MEM_ZERO(&out_params, sizeof(out_params));
 		rc = __ecore_mcp_load_req(p_hwfn, p_ptt, &in_params,
@@ -816,23 +1008,20 @@ enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
 			return rc;
 	} else if (out_params.load_code ==
 		   FW_MSG_CODE_DRV_LOAD_REFUSED_REQUIRES_FORCE) {
-		/* The previous load request set the mailbox blocking */
-		p_hwfn->mcp_info->block_mb_sending = false;
-
 		if (ecore_mcp_can_force_load(in_params.drv_role,
-					     out_params.exist_drv_role)) {
+					     out_params.exist_drv_role,
+					     p_params->override_force_load)) {
 			DP_INFO(p_hwfn,
-				"A force load is required [existing: role %d, fw_ver 0x%08x, drv_ver 0x%08x_0x%08x]. Sending a force load request.\n",
+				"A force load is required [{role, fw_ver, drv_ver}: loading={%d, 0x%08x, 0x%08x_%08x}, existing={%d, 0x%08x, 0x%08x_%08x}]\n",
+				in_params.drv_role, in_params.fw_ver,
+				in_params.drv_ver_0, in_params.drv_ver_1,
 				out_params.exist_drv_role,
 				out_params.exist_fw_ver,
 				out_params.exist_drv_ver_0,
 				out_params.exist_drv_ver_1);
 
-			rc = ecore_get_mfw_force_cmd(p_hwfn,
-						     ECORE_LOAD_REQ_FORCE_ALL,
-						     &mfw_force_cmd);
-			if (rc != ECORE_SUCCESS)
-				return rc;
+			ecore_get_mfw_force_cmd(ECORE_LOAD_REQ_FORCE_ALL,
+						&mfw_force_cmd);
 
 			in_params.force_cmd = mfw_force_cmd;
 			OSAL_MEM_ZERO(&out_params, sizeof(out_params));
@@ -842,7 +1031,9 @@ enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
 				return rc;
 		} else {
 			DP_NOTICE(p_hwfn, false,
-				  "A force load is required [existing: role %d, fw_ver 0x%08x, drv_ver 0x%08x_0x%08x]. Avoiding to prevent disruption of active PFs.\n",
+				  "A force load is required [{role, fw_ver, drv_ver}: loading={%d, 0x%08x, x%08x_0x%08x}, existing={%d, 0x%08x, 0x%08x_0x%08x}] - Avoid\n",
+				  in_params.drv_role, in_params.fw_ver,
+				  in_params.drv_ver_0, in_params.drv_ver_1,
 				  out_params.exist_drv_role,
 				  out_params.exist_fw_ver,
 				  out_params.exist_drv_ver_0,
@@ -873,19 +1064,11 @@ enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
 			return ECORE_INVAL;
 		}
 		break;
-	case FW_MSG_CODE_DRV_LOAD_REFUSED_PDA:
-	case FW_MSG_CODE_DRV_LOAD_REFUSED_DIAG:
-	case FW_MSG_CODE_DRV_LOAD_REFUSED_HSI:
-	case FW_MSG_CODE_DRV_LOAD_REFUSED_REJECT:
-		DP_NOTICE(p_hwfn, false,
-			  "MFW refused a load request [resp 0x%08x]. Aborting.\n",
-			  out_params.load_code);
-		return ECORE_BUSY;
 	default:
 		DP_NOTICE(p_hwfn, false,
-			  "Unexpected response to load request [resp 0x%08x]. Aborting.\n",
+			  "Unexpected refusal to load request [resp 0x%08x]. Aborting.\n",
 			  out_params.load_code);
-		break;
+		return ECORE_BUSY;
 	}
 
 	p_params->load_code = out_params.load_code;
@@ -907,8 +1090,6 @@ enum _ecore_status_t ecore_mcp_load_done(struct ecore_hwfn *p_hwfn,
 		return rc;
 	}
 
-#define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR     (1 << 0)
-
 	/* Check if there is a DID mismatch between nvm-cfg/efuse */
 	if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR)
 		DP_NOTICE(p_hwfn, false,
@@ -1029,12 +1210,60 @@ static void ecore_mcp_handle_transceiver_change(struct ecore_hwfn *p_hwfn,
 					    OFFSETOF(struct public_port,
 						     transceiver_data)));
 
-	transceiver_state = GET_FIELD(transceiver_state, ETH_TRANSCEIVER_STATE);
+	transceiver_state = GET_MFW_FIELD(transceiver_state,
+					  ETH_TRANSCEIVER_STATE);
 
 	if (transceiver_state == ETH_TRANSCEIVER_STATE_PRESENT)
 		DP_NOTICE(p_hwfn, false, "Transceiver is present.\n");
 	else
 		DP_NOTICE(p_hwfn, false, "Transceiver is unplugged.\n");
+
+	OSAL_TRANSCEIVER_UPDATE(p_hwfn);
+}
+
+static void ecore_mcp_read_eee_config(struct ecore_hwfn *p_hwfn,
+				      struct ecore_ptt *p_ptt,
+				      struct ecore_mcp_link_state *p_link)
+{
+	u32 eee_status, val;
+
+	p_link->eee_adv_caps = 0;
+	p_link->eee_lp_adv_caps = 0;
+	eee_status = ecore_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
+				     OFFSETOF(struct public_port, eee_status));
+	p_link->eee_active = !!(eee_status & EEE_ACTIVE_BIT);
+	val = (eee_status & EEE_LD_ADV_STATUS_MASK) >> EEE_LD_ADV_STATUS_OFFSET;
+	if (val & EEE_1G_ADV)
+		p_link->eee_adv_caps |= ECORE_EEE_1G_ADV;
+	if (val & EEE_10G_ADV)
+		p_link->eee_adv_caps |= ECORE_EEE_10G_ADV;
+	val = (eee_status & EEE_LP_ADV_STATUS_MASK) >> EEE_LP_ADV_STATUS_OFFSET;
+	if (val & EEE_1G_ADV)
+		p_link->eee_lp_adv_caps |= ECORE_EEE_1G_ADV;
+	if (val & EEE_10G_ADV)
+		p_link->eee_lp_adv_caps |= ECORE_EEE_10G_ADV;
+}
+
+static u32 ecore_mcp_get_shmem_func(struct ecore_hwfn *p_hwfn,
+				    struct ecore_ptt *p_ptt,
+				    struct public_func *p_data,
+				    int pfid)
+{
+	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
+					PUBLIC_FUNC);
+	u32 mfw_path_offsize = ecore_rd(p_hwfn, p_ptt, addr);
+	u32 func_addr = SECTION_ADDR(mfw_path_offsize, pfid);
+	u32 i, size;
+
+	OSAL_MEM_ZERO(p_data, sizeof(*p_data));
+
+	size = OSAL_MIN_T(u32, sizeof(*p_data),
+			  SECTION_SIZE(mfw_path_offsize));
+	for (i = 0; i < size / sizeof(u32); i++)
+		((u32 *)p_data)[i] = ecore_rd(p_hwfn, p_ptt,
+					      func_addr + (i << 2));
+
+	return size;
 }
 
 static void ecore_mcp_handle_link_change(struct ecore_hwfn *p_hwfn,
@@ -1045,6 +1274,9 @@ static void ecore_mcp_handle_link_change(struct ecore_hwfn *p_hwfn,
 	u8 max_bw, min_bw;
 	u32 status = 0;
 
+	/* Prevent SW/attentions from doing this at the same time */
+	OSAL_SPIN_LOCK(&p_hwfn->mcp_info->link_lock);
+
 	p_link = &p_hwfn->mcp_info->link_output;
 	OSAL_MEMSET(p_link, 0, sizeof(*p_link));
 	if (!b_reset) {
@@ -1060,13 +1292,27 @@ static void ecore_mcp_handle_link_change(struct ecore_hwfn *p_hwfn,
 	} else {
 		DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
 			   "Resetting link indications\n");
-		return;
+		goto out;
 	}
 
-	if (p_hwfn->b_drv_link_init)
-		p_link->link_up = !!(status & LINK_STATUS_LINK_UP);
-	else
+	if (p_hwfn->b_drv_link_init) {
+		/* Link indication with modern MFW arrives as per-PF
+		 * indication.
+		 */
+		if (p_hwfn->mcp_info->capabilities &
+		    FW_MB_PARAM_FEATURE_SUPPORT_VLINK) {
+			struct public_func shmem_info;
+
+			ecore_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
+						 MCP_PF_ID(p_hwfn));
+			p_link->link_up = !!(shmem_info.status &
+					     FUNC_STATUS_VIRTUAL_LINK_UP);
+		} else {
+			p_link->link_up = !!(status & LINK_STATUS_LINK_UP);
+		}
+	} else {
 		p_link->link_up = false;
+	}
 
 	p_link->full_duplex = true;
 	switch ((status & LINK_STATUS_SPEED_AND_DUPLEX_MASK)) {
@@ -1113,10 +1359,10 @@ static void ecore_mcp_handle_link_change(struct ecore_hwfn *p_hwfn,
 	__ecore_configure_pf_max_bandwidth(p_hwfn, p_ptt,
 					   p_link, max_bw);
 
-	/* Mintz bandwidth configuration */
+	/* Min bandwidth configuration */
 	__ecore_configure_pf_min_bandwidth(p_hwfn, p_ptt,
 					   p_link, min_bw);
-	ecore_configure_vp_wfq_on_link_change(p_hwfn->p_dev,
+	ecore_configure_vp_wfq_on_link_change(p_hwfn->p_dev, p_ptt,
 					      p_link->min_pf_rate);
 
 	p_link->an = !!(status & LINK_STATUS_AUTO_NEGOTIATE_ENABLED);
@@ -1171,7 +1417,12 @@ static void ecore_mcp_handle_link_change(struct ecore_hwfn *p_hwfn,
 
 	p_link->sfp_tx_fault = !!(status & LINK_STATUS_SFP_TX_FAULT);
 
+	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE)
+		ecore_mcp_read_eee_config(p_hwfn, p_ptt, p_link);
+
 	OSAL_LINK_UPDATE(p_hwfn);
+out:
+	OSAL_SPIN_UNLOCK(&p_hwfn->mcp_info->link_lock);
 }
 
 enum _ecore_status_t ecore_mcp_set_link(struct ecore_hwfn *p_hwfn,
@@ -1198,12 +1449,32 @@ enum _ecore_status_t ecore_mcp_set_link(struct ecore_hwfn *p_hwfn,
 	phy_cfg.pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0;
 	phy_cfg.adv_speed = params->speed.advertised_speeds;
 	phy_cfg.loopback_mode = params->loopback_mode;
+
+	/* There are MFWs that share this capability regardless of whether
+	 * this is feasible or not. And given that at the very least adv_caps
+	 * would be set internally by ecore, we want to make sure LFA would
+	 * still work.
+	 */
+	if ((p_hwfn->mcp_info->capabilities &
+	     FW_MB_PARAM_FEATURE_SUPPORT_EEE) &&
+	    params->eee.enable) {
+		phy_cfg.eee_cfg |= EEE_CFG_EEE_ENABLED;
+		if (params->eee.tx_lpi_enable)
+			phy_cfg.eee_cfg |= EEE_CFG_TX_LPI;
+		if (params->eee.adv_caps & ECORE_EEE_1G_ADV)
+			phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_1G;
+		if (params->eee.adv_caps & ECORE_EEE_10G_ADV)
+			phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_10G;
+		phy_cfg.eee_cfg |= (params->eee.tx_lpi_timer <<
+				    EEE_TX_TIMER_USEC_OFFSET) &
+					EEE_TX_TIMER_USEC_MASK;
+	}
+
 	p_hwfn->b_drv_link_init = b_up;
 
 	if (b_up)
 		DP_VERBOSE(p_hwfn, ECORE_MSG_LINK,
-			   "Configuring Link: Speed 0x%08x, Pause 0x%08x,"
-			   " adv_speed 0x%08x, loopback 0x%08x\n",
+			   "Configuring Link: Speed 0x%08x, Pause 0x%08x, adv_speed 0x%08x, loopback 0x%08x\n",
 			   phy_cfg.speed, phy_cfg.pause, phy_cfg.adv_speed,
 			   phy_cfg.loopback_mode);
 	else
@@ -1221,11 +1492,15 @@ enum _ecore_status_t ecore_mcp_set_link(struct ecore_hwfn *p_hwfn,
 		return rc;
 	}
 
-	/* Reset the link status if needed */
-	if (!b_up)
-		ecore_mcp_handle_link_change(p_hwfn, p_ptt, true);
+	/* Mimic link-change attention, done for several reasons:
+	 *  - On reset, there's no guarantee MFW would trigger
+	 *    an attention.
+	 *  - On initialization, older MFWs might not indicate link change
+	 *    during LFA, so we'll never get an UP indication.
+	 */
+	ecore_mcp_handle_link_change(p_hwfn, p_ptt, !b_up);
 
-	return rc;
+	return ECORE_SUCCESS;
 }
 
 u32 ecore_get_process_kill_counter(struct ecore_hwfn *p_hwfn,
@@ -1300,7 +1575,8 @@ static void ecore_mcp_send_protocol_stats(struct ecore_hwfn *p_hwfn,
 		hsi_param = DRV_MSG_CODE_STATS_TYPE_LAN;
 		break;
 	default:
-		DP_INFO(p_hwfn, "Invalid protocol type %d\n", type);
+		DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
+			   "Invalid protocol type %d\n", type);
 		return;
 	}
 
@@ -1331,7 +1607,7 @@ static void ecore_read_pf_bandwidth(struct ecore_hwfn *p_hwfn,
 	 */
 	p_info->bandwidth_min = (p_shmem_info->config &
 				 FUNC_MF_CFG_MIN_BW_MASK) >>
-	    FUNC_MF_CFG_MIN_BW_SHIFT;
+	    FUNC_MF_CFG_MIN_BW_OFFSET;
 	if (p_info->bandwidth_min < 1 || p_info->bandwidth_min > 100) {
 		DP_INFO(p_hwfn,
 			"bandwidth minimum out of bounds [%02x]. Set to 1\n",
@@ -1341,7 +1617,7 @@ static void ecore_read_pf_bandwidth(struct ecore_hwfn *p_hwfn,
 
 	p_info->bandwidth_max = (p_shmem_info->config &
 				 FUNC_MF_CFG_MAX_BW_MASK) >>
-	    FUNC_MF_CFG_MAX_BW_SHIFT;
+	    FUNC_MF_CFG_MAX_BW_OFFSET;
 	if (p_info->bandwidth_max < 1 || p_info->bandwidth_max > 100) {
 		DP_INFO(p_hwfn,
 			"bandwidth maximum out of bounds [%02x]. Set to 100\n",
@@ -1350,28 +1626,6 @@ static void ecore_read_pf_bandwidth(struct ecore_hwfn *p_hwfn,
 	}
 }
 
-static u32 ecore_mcp_get_shmem_func(struct ecore_hwfn *p_hwfn,
-				    struct ecore_ptt *p_ptt,
-				    struct public_func *p_data,
-				    int pfid)
-{
-	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
-					PUBLIC_FUNC);
-	u32 mfw_path_offsize = ecore_rd(p_hwfn, p_ptt, addr);
-	u32 func_addr = SECTION_ADDR(mfw_path_offsize, pfid);
-	u32 i, size;
-
-	OSAL_MEM_ZERO(p_data, sizeof(*p_data));
-
-	size = OSAL_MIN_T(u32, sizeof(*p_data),
-			  SECTION_SIZE(mfw_path_offsize));
-	for (i = 0; i < size / sizeof(u32); i++)
-		((u32 *)p_data)[i] = ecore_rd(p_hwfn, p_ptt,
-					      func_addr + (i << 2));
-
-	return size;
-}
-
 static void
 ecore_mcp_update_bw(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 {
@@ -1394,8 +1648,7 @@ ecore_mcp_update_bw(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 		      &param);
 }
 
-static void ecore_mcp_handle_fan_failure(struct ecore_hwfn *p_hwfn,
-					 struct ecore_ptt *p_ptt)
+static void ecore_mcp_handle_fan_failure(struct ecore_hwfn *p_hwfn)
 {
 	/* A single notification should be sent to upper driver in CMT mode */
 	if (p_hwfn != ECORE_LEADING_HWFN(p_hwfn->p_dev))
@@ -1436,11 +1689,16 @@ ecore_mcp_mdump_cmd(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 		return rc;
 
 	p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
+
 	if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
-		DP_NOTICE(p_hwfn, false,
-			  "MFW claims that the mdump command is illegal [mdump_cmd 0x%x]\n",
-			  p_mdump_cmd_params->cmd);
-		rc = ECORE_INVAL;
+		DP_INFO(p_hwfn,
+			"The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
+			p_mdump_cmd_params->cmd);
+		rc = ECORE_NOTIMPL;
+	} else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The mdump command is not supported by the MFW\n");
+		rc = ECORE_NOTIMPL;
 	}
 
 	return rc;
@@ -1498,16 +1756,10 @@ ecore_mcp_mdump_get_config(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
-	if (mdump_cmd_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
-		DP_INFO(p_hwfn,
-			"The mdump command is not supported by the MFW\n");
-		return ECORE_NOTIMPL;
-	}
-
 	if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
-		DP_NOTICE(p_hwfn, false,
-			  "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
-			  mdump_cmd_params.mcp_resp);
+		DP_INFO(p_hwfn,
+			"Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n",
+			mdump_cmd_params.mcp_resp);
 		rc = ECORE_UNKNOWN_ERROR;
 	}
 
@@ -1568,17 +1820,71 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
 	return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
 }
 
+enum _ecore_status_t
+ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			   struct ecore_mdump_retain_data *p_mdump_retain)
+{
+	struct ecore_mdump_cmd_params mdump_cmd_params;
+	struct mdump_retain_data_stc mfw_mdump_retain;
+	enum _ecore_status_t rc;
+
+	OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params));
+	mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
+	mdump_cmd_params.p_data_dst = &mfw_mdump_retain;
+	mdump_cmd_params.data_dst_size = sizeof(mfw_mdump_retain);
+
+	rc = ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+	if (rc != ECORE_SUCCESS)
+		return rc;
+
+	if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
+		DP_INFO(p_hwfn,
+			"Failed to get the mdump retained data [mcp_resp 0x%x]\n",
+			mdump_cmd_params.mcp_resp);
+		return ECORE_UNKNOWN_ERROR;
+	}
+
+	p_mdump_retain->valid = mfw_mdump_retain.valid;
+	p_mdump_retain->epoch = mfw_mdump_retain.epoch;
+	p_mdump_retain->pf = mfw_mdump_retain.pf;
+	p_mdump_retain->status = mfw_mdump_retain.status;
+
+	return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt)
+{
+	struct ecore_mdump_cmd_params mdump_cmd_params;
+
+	OSAL_MEM_ZERO(&mdump_cmd_params, sizeof(mdump_cmd_params));
+	mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_CLR_RETAIN;
+
+	return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+}
+
 static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
 					    struct ecore_ptt *p_ptt)
 {
+	struct ecore_mdump_retain_data mdump_retain;
+	enum _ecore_status_t rc;
+
 	/* In CMT mode - no need for more than a single acknowledgment to the
 	 * MFW, and no more than a single notification to the upper driver.
 	 */
 	if (p_hwfn != ECORE_LEADING_HWFN(p_hwfn->p_dev))
 		return;
 
-	DP_NOTICE(p_hwfn, false,
-		  "Received a critical error notification from the MFW!\n");
+	rc = ecore_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
+	if (rc == ECORE_SUCCESS && mdump_retain.valid) {
+		DP_NOTICE(p_hwfn, false,
+			  "The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
+			  mdump_retain.epoch, mdump_retain.pf,
+			  mdump_retain.status);
+	} else {
+		DP_NOTICE(p_hwfn, false,
+			  "The MFW notified that a critical error occurred in the device\n");
+	}
 
 	if (p_hwfn->p_dev->allow_mdump) {
 		DP_NOTICE(p_hwfn, false,
@@ -1586,10 +1892,80 @@ static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn,
 		return;
 	}
 
+	DP_NOTICE(p_hwfn, false,
+		  "Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
 	ecore_mcp_mdump_ack(p_hwfn, p_ptt);
 	ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_HW_ATTN);
 }
 
+void
+ecore_mcp_read_ufp_config(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
+{
+	struct public_func shmem_info;
+	u32 port_cfg, val;
+
+	if (!OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
+		return;
+
+	OSAL_MEMSET(&p_hwfn->ufp_info, 0, sizeof(p_hwfn->ufp_info));
+	port_cfg = ecore_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
+			    OFFSETOF(struct public_port, oem_cfg_port));
+	val = GET_MFW_FIELD(port_cfg, OEM_CFG_CHANNEL_TYPE);
+	if (val != OEM_CFG_CHANNEL_TYPE_STAGGED)
+		DP_NOTICE(p_hwfn, false, "Incorrect UFP Channel type  %d\n",
+			  val);
+
+	val = GET_MFW_FIELD(port_cfg, OEM_CFG_SCHED_TYPE);
+	if (val == OEM_CFG_SCHED_TYPE_ETS)
+		p_hwfn->ufp_info.mode = ECORE_UFP_MODE_ETS;
+	else if (val == OEM_CFG_SCHED_TYPE_VNIC_BW)
+		p_hwfn->ufp_info.mode = ECORE_UFP_MODE_VNIC_BW;
+	else
+		DP_NOTICE(p_hwfn, false, "Unknown UFP scheduling mode %d\n",
+			  val);
+
+	ecore_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
+				 MCP_PF_ID(p_hwfn));
+	val = GET_MFW_FIELD(shmem_info.oem_cfg_func, OEM_CFG_FUNC_TC);
+	p_hwfn->ufp_info.tc = (u8)val;
+	val = GET_MFW_FIELD(shmem_info.oem_cfg_func,
+			    OEM_CFG_FUNC_HOST_PRI_CTRL);
+	if (val == OEM_CFG_FUNC_HOST_PRI_CTRL_VNIC)
+		p_hwfn->ufp_info.pri_type = ECORE_UFP_PRI_VNIC;
+	else if (val == OEM_CFG_FUNC_HOST_PRI_CTRL_OS)
+		p_hwfn->ufp_info.pri_type = ECORE_UFP_PRI_OS;
+	else
+		DP_NOTICE(p_hwfn, false, "Unknown Host priority control %d\n",
+			  val);
+
+	DP_VERBOSE(p_hwfn, ECORE_MSG_DCB,
+		   "UFP shmem config: mode = %d tc = %d pri_type = %d\n",
+		   p_hwfn->ufp_info.mode, p_hwfn->ufp_info.tc,
+		   p_hwfn->ufp_info.pri_type);
+}
+
+static enum _ecore_status_t
+ecore_mcp_handle_ufp_event(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
+{
+	ecore_mcp_read_ufp_config(p_hwfn, p_ptt);
+
+	if (p_hwfn->ufp_info.mode == ECORE_UFP_MODE_VNIC_BW) {
+		p_hwfn->qm_info.ooo_tc = p_hwfn->ufp_info.tc;
+		p_hwfn->hw_info.offload_tc = p_hwfn->ufp_info.tc;
+
+		ecore_qm_reconf(p_hwfn, p_ptt);
+	} else {
+		/* Merge UFP TC with the dcbx TC data */
+		ecore_dcbx_mib_update_event(p_hwfn, p_ptt,
+					    ECORE_DCBX_OPERATIONAL_MIB);
+	}
+
+	/* update storm FW with negotiation results */
+	ecore_sp_pf_update_ufp(p_hwfn);
+
+	return ECORE_SUCCESS;
+}
+
 enum _ecore_status_t ecore_mcp_handle_events(struct ecore_hwfn *p_hwfn,
 					     struct ecore_ptt *p_ptt)
 {
@@ -1632,6 +2008,15 @@ enum _ecore_status_t ecore_mcp_handle_events(struct ecore_hwfn *p_hwfn,
 		case MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED:
 			ecore_dcbx_mib_update_event(p_hwfn, p_ptt,
 						    ECORE_DCBX_OPERATIONAL_MIB);
+			/* clear the user-config cache */
+			OSAL_MEMSET(&p_hwfn->p_dcbx_info->set, 0,
+				    sizeof(struct ecore_dcbx_set));
+			break;
+		case MFW_DRV_MSG_LLDP_RECEIVED_TLVS_UPDATED:
+			ecore_lldp_mib_update_event(p_hwfn, p_ptt);
+			break;
+		case MFW_DRV_MSG_OEM_CFG_UPDATE:
+			ecore_mcp_handle_ufp_event(p_hwfn, p_ptt);
 			break;
 		case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
 			ecore_mcp_handle_transceiver_change(p_hwfn, p_ptt);
@@ -1649,7 +2034,7 @@ enum _ecore_status_t ecore_mcp_handle_events(struct ecore_hwfn *p_hwfn,
 			ecore_mcp_update_bw(p_hwfn, p_ptt);
 			break;
 		case MFW_DRV_MSG_FAILURE_DETECTED:
-			ecore_mcp_handle_fan_failure(p_hwfn, p_ptt);
+			ecore_mcp_handle_fan_failure(p_hwfn);
 			break;
 		case MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED:
 			ecore_mcp_handle_critical_error(p_hwfn, p_ptt);
@@ -1732,14 +2117,13 @@ enum _ecore_status_t ecore_mcp_get_mfw_ver(struct ecore_hwfn *p_hwfn,
 	return ECORE_SUCCESS;
 }
 
-enum _ecore_status_t ecore_mcp_get_media_type(struct ecore_dev *p_dev,
+enum _ecore_status_t ecore_mcp_get_media_type(struct ecore_hwfn *p_hwfn,
+					      struct ecore_ptt *p_ptt,
 					      u32 *p_media_type)
 {
-	struct ecore_hwfn *p_hwfn = &p_dev->hwfns[0];
-	struct ecore_ptt *p_ptt;
 
 	/* TODO - Add support for VFs */
-	if (IS_VF(p_dev))
+	if (IS_VF(p_hwfn->p_dev))
 		return ECORE_INVAL;
 
 	if (!ecore_mcp_is_init(p_hwfn)) {
@@ -1747,16 +2131,15 @@ enum _ecore_status_t ecore_mcp_get_media_type(struct ecore_dev *p_dev,
 		return ECORE_BUSY;
 	}
 
-	*p_media_type = MEDIA_UNSPECIFIED;
-
-	p_ptt = ecore_ptt_acquire(p_hwfn);
-	if (!p_ptt)
-		return ECORE_BUSY;
-
-	*p_media_type = ecore_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
-				 OFFSETOF(struct public_port, media_type));
-
-	ecore_ptt_release(p_hwfn, p_ptt);
+	if (!p_ptt) {
+		*p_media_type = MEDIA_UNSPECIFIED;
+		return ECORE_INVAL;
+	} else {
+		*p_media_type = ecore_rd(p_hwfn, p_ptt,
+					 p_hwfn->mcp_info->port_addr +
+					 OFFSETOF(struct public_port,
+						  media_type));
+	}
 
 	return ECORE_SUCCESS;
 }
@@ -1928,42 +2311,6 @@ const struct ecore_mcp_function_info
 	return &p_hwfn->mcp_info->func_info;
 }
 
-enum _ecore_status_t ecore_mcp_nvm_command(struct ecore_hwfn *p_hwfn,
-					   struct ecore_ptt *p_ptt,
-					   struct ecore_mcp_nvm_params *params)
-{
-	enum _ecore_status_t rc;
-
-	switch (params->type) {
-	case ECORE_MCP_NVM_RD:
-		rc = ecore_mcp_nvm_rd_cmd(p_hwfn, p_ptt, params->nvm_common.cmd,
-					  params->nvm_common.offset,
-					  &params->nvm_common.resp,
-					  &params->nvm_common.param,
-					  params->nvm_rd.buf_size,
-					  params->nvm_rd.buf);
-		break;
-	case ECORE_MCP_CMD:
-		rc = ecore_mcp_cmd(p_hwfn, p_ptt, params->nvm_common.cmd,
-				   params->nvm_common.offset,
-				   &params->nvm_common.resp,
-				   &params->nvm_common.param);
-		break;
-	case ECORE_MCP_NVM_WR:
-		rc = ecore_mcp_nvm_wr_cmd(p_hwfn, p_ptt, params->nvm_common.cmd,
-					  params->nvm_common.offset,
-					  &params->nvm_common.resp,
-					  &params->nvm_common.param,
-					  params->nvm_wr.buf_size,
-					  params->nvm_wr.buf);
-		break;
-	default:
-		rc = ECORE_NOTIMPL;
-		break;
-	}
-	return rc;
-}
-
 int ecore_mcp_get_personality_cnt(struct ecore_hwfn *p_hwfn,
 				  struct ecore_ptt *p_ptt, u32 personalities)
 {
@@ -2009,8 +2356,8 @@ enum _ecore_status_t ecore_mcp_get_flash_size(struct ecore_hwfn *p_hwfn,
 
 	flash_size = ecore_rd(p_hwfn, p_ptt, MCP_REG_NVM_CFG4);
 	flash_size = (flash_size & MCP_REG_NVM_CFG4_FLASH_SIZE) >>
-	    MCP_REG_NVM_CFG4_FLASH_SIZE_SHIFT;
-	flash_size = (1 << (flash_size + MCP_BYTES_PER_MBIT_SHIFT));
+		     MCP_REG_NVM_CFG4_FLASH_SIZE_SHIFT;
+	flash_size = (1 << (flash_size + MCP_BYTES_PER_MBIT_OFFSET));
 
 	*p_flash_size = flash_size;
 
@@ -2035,9 +2382,10 @@ enum _ecore_status_t ecore_start_recovery_process(struct ecore_hwfn *p_hwfn,
 	return ECORE_SUCCESS;
 }
 
-enum _ecore_status_t ecore_mcp_config_vf_msix(struct ecore_hwfn *p_hwfn,
-					      struct ecore_ptt *p_ptt,
-					      u8 vf_id, u8 num)
+static enum _ecore_status_t
+ecore_mcp_config_vf_msix_bb(struct ecore_hwfn *p_hwfn,
+			    struct ecore_ptt *p_ptt,
+			    u8 vf_id, u8 num)
 {
 	u32 resp = 0, param = 0, rc_param = 0;
 	enum _ecore_status_t rc;
@@ -2048,9 +2396,9 @@ enum _ecore_status_t ecore_mcp_config_vf_msix(struct ecore_hwfn *p_hwfn,
 		return ECORE_SUCCESS;
 	num *= p_hwfn->p_dev->num_hwfns;
 
-	param |= (vf_id << DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT) &
+	param |= (vf_id << DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_OFFSET) &
 	    DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK;
-	param |= (num << DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT) &
+	param |= (num << DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_OFFSET) &
 	    DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_MASK;
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_CFG_VF_MSIX, param,
@@ -2069,6 +2417,39 @@ enum _ecore_status_t ecore_mcp_config_vf_msix(struct ecore_hwfn *p_hwfn,
 	return rc;
 }
 
+static enum _ecore_status_t
+ecore_mcp_config_vf_msix_ah(struct ecore_hwfn *p_hwfn,
+			    struct ecore_ptt *p_ptt,
+			    u8 num)
+{
+	u32 resp = 0, param = num, rc_param = 0;
+	enum _ecore_status_t rc;
+
+	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_CFG_PF_VFS_MSIX,
+			   param, &resp, &rc_param);
+
+	if (resp != FW_MSG_CODE_DRV_CFG_PF_VFS_MSIX_DONE) {
+		DP_NOTICE(p_hwfn, true, "MFW failed to set MSI-X for VFs\n");
+		rc = ECORE_INVAL;
+	} else {
+		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+			   "Requested 0x%02x MSI-x interrupts for VFs\n",
+			   num);
+	}
+
+	return rc;
+}
+
+enum _ecore_status_t ecore_mcp_config_vf_msix(struct ecore_hwfn *p_hwfn,
+					      struct ecore_ptt *p_ptt,
+					      u8 vf_id, u8 num)
+{
+	if (ECORE_IS_BB(p_hwfn->p_dev))
+		return ecore_mcp_config_vf_msix_bb(p_hwfn, p_ptt, vf_id, num);
+	else
+		return ecore_mcp_config_vf_msix_ah(p_hwfn, p_ptt, num);
+}
+
 enum _ecore_status_t
 ecore_mcp_send_drv_version(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 			   struct ecore_mcp_drv_version *p_ver)
@@ -2106,33 +2487,68 @@ ecore_mcp_send_drv_version(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	return rc;
 }
 
+/* A maximal 100 msec waiting time for the MCP to halt */
+#define ECORE_MCP_HALT_SLEEP_MS		10
+#define ECORE_MCP_HALT_MAX_RETRIES	10
+
 enum _ecore_status_t ecore_mcp_halt(struct ecore_hwfn *p_hwfn,
 				    struct ecore_ptt *p_ptt)
 {
+	u32 resp = 0, param = 0, cpu_state, cnt = 0;
 	enum _ecore_status_t rc;
-	u32 resp = 0, param = 0;
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp,
 			   &param);
-	if (rc != ECORE_SUCCESS)
+	if (rc != ECORE_SUCCESS) {
 		DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+		return rc;
+	}
 
-	return rc;
+	do {
+		OSAL_MSLEEP(ECORE_MCP_HALT_SLEEP_MS);
+		cpu_state = ecore_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
+		if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED)
+			break;
+	} while (++cnt < ECORE_MCP_HALT_MAX_RETRIES);
+
+	if (cnt == ECORE_MCP_HALT_MAX_RETRIES) {
+		DP_NOTICE(p_hwfn, false,
+			  "Failed to halt the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n",
+			  ecore_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE), cpu_state);
+		return ECORE_BUSY;
+	}
+
+	ecore_mcp_cmd_set_blocking(p_hwfn, true);
+
+	return ECORE_SUCCESS;
 }
 
+#define ECORE_MCP_RESUME_SLEEP_MS	10
+
 enum _ecore_status_t ecore_mcp_resume(struct ecore_hwfn *p_hwfn,
 				      struct ecore_ptt *p_ptt)
 {
-	u32 value, cpu_mode;
+	u32 cpu_mode, cpu_state;
 
 	ecore_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff);
 
-	value = ecore_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
-	value &= ~MCP_REG_CPU_MODE_SOFT_HALT;
-	ecore_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value);
 	cpu_mode = ecore_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+	cpu_mode &= ~MCP_REG_CPU_MODE_SOFT_HALT;
+	ecore_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, cpu_mode);
+
+	OSAL_MSLEEP(ECORE_MCP_RESUME_SLEEP_MS);
+	cpu_state = ecore_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
+
+	if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) {
+		DP_NOTICE(p_hwfn, false,
+			  "Failed to resume the MCP [CPU_MODE = 0x%08x, CPU_STATE = 0x%08x]\n",
+			  cpu_mode, cpu_state);
+		return ECORE_BUSY;
+	}
+
+	ecore_mcp_cmd_set_blocking(p_hwfn, false);
 
-	return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -1 : 0;
+	return ECORE_SUCCESS;
 }
 
 enum _ecore_status_t
@@ -2140,9 +2556,9 @@ ecore_mcp_ov_update_current_config(struct ecore_hwfn *p_hwfn,
 				   struct ecore_ptt *p_ptt,
 				   enum ecore_ov_client client)
 {
-	enum _ecore_status_t rc;
 	u32 resp = 0, param = 0;
 	u32 drv_mb_param;
+	enum _ecore_status_t rc;
 
 	switch (client) {
 	case ECORE_OV_CLIENT_DRV:
@@ -2172,9 +2588,9 @@ ecore_mcp_ov_update_driver_state(struct ecore_hwfn *p_hwfn,
 				 struct ecore_ptt *p_ptt,
 				 enum ecore_ov_driver_state drv_state)
 {
-	enum _ecore_status_t rc;
 	u32 resp = 0, param = 0;
 	u32 drv_mb_param;
+	enum _ecore_status_t rc;
 
 	switch (drv_state) {
 	case ECORE_OV_DRIVER_STATE_NOT_LOADED:
@@ -2247,8 +2663,8 @@ enum _ecore_status_t ecore_mcp_mask_parities(struct ecore_hwfn *p_hwfn,
 					     struct ecore_ptt *p_ptt,
 					     u32 mask_parities)
 {
-	enum _ecore_status_t rc;
 	u32 resp = 0, param = 0;
+	enum _ecore_status_t rc;
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES,
 			   mask_parities, &resp, &param);
@@ -2270,7 +2686,7 @@ enum _ecore_status_t ecore_mcp_nvm_read(struct ecore_dev *p_dev, u32 addr,
 {
 	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
 	u32 bytes_left, offset, bytes_to_copy, buf_size;
-	struct ecore_mcp_nvm_params params;
+	u32 nvm_offset, resp, param;
 	struct ecore_ptt *p_ptt;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 
@@ -2278,22 +2694,29 @@ enum _ecore_status_t ecore_mcp_nvm_read(struct ecore_dev *p_dev, u32 addr,
 	if (!p_ptt)
 		return ECORE_BUSY;
 
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
 	bytes_left = len;
 	offset = 0;
-	params.type = ECORE_MCP_NVM_RD;
-	params.nvm_rd.buf_size = &buf_size;
-	params.nvm_common.cmd = DRV_MSG_CODE_NVM_READ_NVRAM;
 	while (bytes_left > 0) {
 		bytes_to_copy = OSAL_MIN_T(u32, bytes_left,
 					   MCP_DRV_NVM_BUF_LEN);
-		params.nvm_common.offset = (addr + offset) |
-		    (bytes_to_copy << DRV_MB_PARAM_NVM_LEN_SHIFT);
-		params.nvm_rd.buf = (u32 *)(p_buf + offset);
-		rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
-		if (rc != ECORE_SUCCESS || (params.nvm_common.resp !=
-					    FW_MSG_CODE_NVM_OK)) {
-			DP_NOTICE(p_dev, false, "MCP command rc = %d\n", rc);
+		nvm_offset = (addr + offset) | (bytes_to_copy <<
+						DRV_MB_PARAM_NVM_LEN_OFFSET);
+		rc = ecore_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+					  DRV_MSG_CODE_NVM_READ_NVRAM,
+					  nvm_offset, &resp, &param, &buf_size,
+					  (u32 *)(p_buf + offset));
+		if (rc != ECORE_SUCCESS) {
+			DP_NOTICE(p_dev, false,
+				  "ecore_mcp_nvm_rd_cmd() failed, rc = %d\n",
+				  rc);
+			resp = FW_MSG_CODE_ERROR;
+			break;
+		}
+
+		if (resp != FW_MSG_CODE_NVM_OK) {
+			DP_NOTICE(p_dev, false,
+				  "nvm read failed, resp = 0x%08x\n", resp);
+			rc = ECORE_UNKNOWN_ERROR;
 			break;
 		}
 
@@ -2301,14 +2724,14 @@ enum _ecore_status_t ecore_mcp_nvm_read(struct ecore_dev *p_dev, u32 addr,
 		 * isn't preemptible. Sleep a bit to prevent CPU hogging.
 		 */
 		if (bytes_left % 0x1000 <
-		    (bytes_left - *params.nvm_rd.buf_size) % 0x1000)
+		    (bytes_left - buf_size) % 0x1000)
 			OSAL_MSLEEP(1);
 
-		offset += *params.nvm_rd.buf_size;
-		bytes_left -= *params.nvm_rd.buf_size;
+		offset += buf_size;
+		bytes_left -= buf_size;
 	}
 
-	p_dev->mcp_nvm_resp = params.nvm_common.resp;
+	p_dev->mcp_nvm_resp = resp;
 	ecore_ptt_release(p_hwfn, p_ptt);
 
 	return rc;
@@ -2318,26 +2741,23 @@ enum _ecore_status_t ecore_mcp_phy_read(struct ecore_dev *p_dev, u32 cmd,
 					u32 addr, u8 *p_buf, u32 len)
 {
 	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
-	struct ecore_mcp_nvm_params params;
 	struct ecore_ptt *p_ptt;
+	u32 resp, param;
 	enum _ecore_status_t rc;
 
 	p_ptt = ecore_ptt_acquire(p_hwfn);
 	if (!p_ptt)
 		return ECORE_BUSY;
 
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
-	params.type = ECORE_MCP_NVM_RD;
-	params.nvm_rd.buf_size = &len;
-	params.nvm_common.cmd = (cmd == ECORE_PHY_CORE_READ) ?
-	    DRV_MSG_CODE_PHY_CORE_READ : DRV_MSG_CODE_PHY_RAW_READ;
-	params.nvm_common.offset = addr;
-	params.nvm_rd.buf = (u32 *)p_buf;
-	rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
+	rc = ecore_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+				  (cmd == ECORE_PHY_CORE_READ) ?
+				  DRV_MSG_CODE_PHY_CORE_READ :
+				  DRV_MSG_CODE_PHY_RAW_READ,
+				  addr, &resp, &param, &len, (u32 *)p_buf);
 	if (rc != ECORE_SUCCESS)
 		DP_NOTICE(p_dev, false, "MCP command rc = %d\n", rc);
 
-	p_dev->mcp_nvm_resp = params.nvm_common.resp;
+	p_dev->mcp_nvm_resp = resp;
 	ecore_ptt_release(p_hwfn, p_ptt);
 
 	return rc;
@@ -2346,14 +2766,12 @@ enum _ecore_status_t ecore_mcp_phy_read(struct ecore_dev *p_dev, u32 cmd,
 enum _ecore_status_t ecore_mcp_nvm_resp(struct ecore_dev *p_dev, u8 *p_buf)
 {
 	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
-	struct ecore_mcp_nvm_params params;
 	struct ecore_ptt *p_ptt;
 
 	p_ptt = ecore_ptt_acquire(p_hwfn);
 	if (!p_ptt)
 		return ECORE_BUSY;
 
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
 	OSAL_MEMCPY(p_buf, &p_dev->mcp_nvm_resp, sizeof(p_dev->mcp_nvm_resp));
 	ecore_ptt_release(p_hwfn, p_ptt);
 
@@ -2363,19 +2781,16 @@ enum _ecore_status_t ecore_mcp_nvm_resp(struct ecore_dev *p_dev, u8 *p_buf)
 enum _ecore_status_t ecore_mcp_nvm_del_file(struct ecore_dev *p_dev, u32 addr)
 {
 	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
-	struct ecore_mcp_nvm_params params;
 	struct ecore_ptt *p_ptt;
+	u32 resp, param;
 	enum _ecore_status_t rc;
 
 	p_ptt = ecore_ptt_acquire(p_hwfn);
 	if (!p_ptt)
 		return ECORE_BUSY;
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
-	params.type = ECORE_MCP_CMD;
-	params.nvm_common.cmd = DRV_MSG_CODE_NVM_DEL_FILE;
-	params.nvm_common.offset = addr;
-	rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
-	p_dev->mcp_nvm_resp = params.nvm_common.resp;
+	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_NVM_DEL_FILE, addr,
+			   &resp, &param);
+	p_dev->mcp_nvm_resp = resp;
 	ecore_ptt_release(p_hwfn, p_ptt);
 
 	return rc;
@@ -2385,19 +2800,16 @@ enum _ecore_status_t ecore_mcp_nvm_put_file_begin(struct ecore_dev *p_dev,
 						  u32 addr)
 {
 	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
-	struct ecore_mcp_nvm_params params;
 	struct ecore_ptt *p_ptt;
+	u32 resp, param;
 	enum _ecore_status_t rc;
 
 	p_ptt = ecore_ptt_acquire(p_hwfn);
 	if (!p_ptt)
 		return ECORE_BUSY;
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
-	params.type = ECORE_MCP_CMD;
-	params.nvm_common.cmd = DRV_MSG_CODE_NVM_PUT_FILE_BEGIN;
-	params.nvm_common.offset = addr;
-	rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
-	p_dev->mcp_nvm_resp = params.nvm_common.resp;
+	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_NVM_PUT_FILE_BEGIN, addr,
+			   &resp, &param);
+	p_dev->mcp_nvm_resp = resp;
 	ecore_ptt_release(p_hwfn, p_ptt);
 
 	return rc;
@@ -2409,37 +2821,58 @@ enum _ecore_status_t ecore_mcp_nvm_put_file_begin(struct ecore_dev *p_dev,
 enum _ecore_status_t ecore_mcp_nvm_write(struct ecore_dev *p_dev, u32 cmd,
 					 u32 addr, u8 *p_buf, u32 len)
 {
+	u32 buf_idx, buf_size, nvm_cmd, nvm_offset, resp, param;
 	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
 	enum _ecore_status_t rc = ECORE_INVAL;
-	struct ecore_mcp_nvm_params params;
 	struct ecore_ptt *p_ptt;
-	u32 buf_idx, buf_size;
 
 	p_ptt = ecore_ptt_acquire(p_hwfn);
 	if (!p_ptt)
 		return ECORE_BUSY;
 
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
-	params.type = ECORE_MCP_NVM_WR;
-	if (cmd == ECORE_PUT_FILE_DATA)
-		params.nvm_common.cmd = DRV_MSG_CODE_NVM_PUT_FILE_DATA;
-	else
-		params.nvm_common.cmd = DRV_MSG_CODE_NVM_WRITE_NVRAM;
+	switch (cmd) {
+	case ECORE_PUT_FILE_DATA:
+		nvm_cmd = DRV_MSG_CODE_NVM_PUT_FILE_DATA;
+		break;
+	case ECORE_NVM_WRITE_NVRAM:
+		nvm_cmd = DRV_MSG_CODE_NVM_WRITE_NVRAM;
+		break;
+	case ECORE_EXT_PHY_FW_UPGRADE:
+		nvm_cmd = DRV_MSG_CODE_EXT_PHY_FW_UPGRADE;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, true, "Invalid nvm write command 0x%x\n",
+			  cmd);
+		rc = ECORE_INVAL;
+		goto out;
+	}
+
 	buf_idx = 0;
 	while (buf_idx < len) {
 		buf_size = OSAL_MIN_T(u32, (len - buf_idx),
 				      MCP_DRV_NVM_BUF_LEN);
-		params.nvm_common.offset = ((buf_size <<
-					     DRV_MB_PARAM_NVM_LEN_SHIFT)
-					    | addr) + buf_idx;
-		params.nvm_wr.buf_size = buf_size;
-		params.nvm_wr.buf = (u32 *)&p_buf[buf_idx];
-		rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
-		if (rc != ECORE_SUCCESS ||
-		    ((params.nvm_common.resp != FW_MSG_CODE_NVM_OK) &&
-		     (params.nvm_common.resp !=
-		      FW_MSG_CODE_NVM_PUT_FILE_FINISH_OK)))
-			DP_NOTICE(p_dev, false, "MCP command rc = %d\n", rc);
+		nvm_offset = ((buf_size << DRV_MB_PARAM_NVM_LEN_OFFSET) |
+			      addr) +
+			     buf_idx;
+		rc = ecore_mcp_nvm_wr_cmd(p_hwfn, p_ptt, nvm_cmd, nvm_offset,
+					  &resp, &param, buf_size,
+					  (u32 *)&p_buf[buf_idx]);
+		if (rc != ECORE_SUCCESS) {
+			DP_NOTICE(p_dev, false,
+				  "ecore_mcp_nvm_write() failed, rc = %d\n",
+				  rc);
+			resp = FW_MSG_CODE_ERROR;
+			break;
+		}
+
+		if (resp != FW_MSG_CODE_OK &&
+		    resp != FW_MSG_CODE_NVM_OK &&
+		    resp != FW_MSG_CODE_NVM_PUT_FILE_FINISH_OK) {
+			DP_NOTICE(p_dev, false,
+				  "nvm write failed, resp = 0x%08x\n", resp);
+			rc = ECORE_UNKNOWN_ERROR;
+			break;
+		}
 
 		/* This can be a lengthy process, and it's possible scheduler
 		 * isn't preemptible. Sleep a bit to prevent CPU hogging.
@@ -2451,7 +2884,8 @@ enum _ecore_status_t ecore_mcp_nvm_write(struct ecore_dev *p_dev, u32 cmd,
 		buf_idx += buf_size;
 	}
 
-	p_dev->mcp_nvm_resp = params.nvm_common.resp;
+	p_dev->mcp_nvm_resp = resp;
+out:
 	ecore_ptt_release(p_hwfn, p_ptt);
 
 	return rc;
@@ -2461,25 +2895,21 @@ enum _ecore_status_t ecore_mcp_phy_write(struct ecore_dev *p_dev, u32 cmd,
 					 u32 addr, u8 *p_buf, u32 len)
 {
 	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
-	struct ecore_mcp_nvm_params params;
 	struct ecore_ptt *p_ptt;
+	u32 resp, param, nvm_cmd;
 	enum _ecore_status_t rc;
 
 	p_ptt = ecore_ptt_acquire(p_hwfn);
 	if (!p_ptt)
 		return ECORE_BUSY;
 
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
-	params.type = ECORE_MCP_NVM_WR;
-	params.nvm_wr.buf_size = len;
-	params.nvm_common.cmd = (cmd == ECORE_PHY_CORE_WRITE) ?
-	    DRV_MSG_CODE_PHY_CORE_WRITE : DRV_MSG_CODE_PHY_RAW_WRITE;
-	params.nvm_common.offset = addr;
-	params.nvm_wr.buf = (u32 *)p_buf;
-	rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
+	nvm_cmd = (cmd == ECORE_PHY_CORE_WRITE) ?  DRV_MSG_CODE_PHY_CORE_WRITE :
+			DRV_MSG_CODE_PHY_RAW_WRITE;
+	rc = ecore_mcp_nvm_wr_cmd(p_hwfn, p_ptt, nvm_cmd, addr,
+				  &resp, &param, len, (u32 *)p_buf);
 	if (rc != ECORE_SUCCESS)
 		DP_NOTICE(p_dev, false, "MCP command rc = %d\n", rc);
-	p_dev->mcp_nvm_resp = params.nvm_common.resp;
+	p_dev->mcp_nvm_resp = resp;
 	ecore_ptt_release(p_hwfn, p_ptt);
 
 	return rc;
@@ -2489,20 +2919,17 @@ enum _ecore_status_t ecore_mcp_nvm_set_secure_mode(struct ecore_dev *p_dev,
 						   u32 addr)
 {
 	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
-	struct ecore_mcp_nvm_params params;
 	struct ecore_ptt *p_ptt;
+	u32 resp, param;
 	enum _ecore_status_t rc;
 
 	p_ptt = ecore_ptt_acquire(p_hwfn);
 	if (!p_ptt)
 		return ECORE_BUSY;
 
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
-	params.type = ECORE_MCP_CMD;
-	params.nvm_common.cmd = DRV_MSG_CODE_SET_SECURE_MODE;
-	params.nvm_common.offset = addr;
-	rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
-	p_dev->mcp_nvm_resp = params.nvm_common.resp;
+	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_SECURE_MODE, addr,
+			   &resp, &param);
+	p_dev->mcp_nvm_resp = resp;
 	ecore_ptt_release(p_hwfn, p_ptt);
 
 	return rc;
@@ -2513,42 +2940,42 @@ enum _ecore_status_t ecore_mcp_phy_sfp_read(struct ecore_hwfn *p_hwfn,
 					    u32 port, u32 addr, u32 offset,
 					    u32 len, u8 *p_buf)
 {
-	struct ecore_mcp_nvm_params params;
+	u32 bytes_left, bytes_to_copy, buf_size, nvm_offset;
+	u32 resp, param;
 	enum _ecore_status_t rc;
-	u32 bytes_left, bytes_to_copy, buf_size;
 
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
-	params.nvm_common.offset =
-		(port << DRV_MB_PARAM_TRANSCEIVER_PORT_SHIFT) |
-		(addr << DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_SHIFT);
+	nvm_offset = (port << DRV_MB_PARAM_TRANSCEIVER_PORT_OFFSET) |
+			(addr << DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_OFFSET);
 	addr = offset;
 	offset = 0;
 	bytes_left = len;
-	params.type = ECORE_MCP_NVM_RD;
-	params.nvm_rd.buf_size = &buf_size;
-	params.nvm_common.cmd = DRV_MSG_CODE_TRANSCEIVER_READ;
 	while (bytes_left > 0) {
 		bytes_to_copy = OSAL_MIN_T(u32, bytes_left,
 					   MAX_I2C_TRANSACTION_SIZE);
-		params.nvm_rd.buf = (u32 *)(p_buf + offset);
-		params.nvm_common.offset &=
-			(DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK |
-			 DRV_MB_PARAM_TRANSCEIVER_PORT_MASK);
-		params.nvm_common.offset |=
-			((addr + offset) <<
-			 DRV_MB_PARAM_TRANSCEIVER_OFFSET_SHIFT);
-		params.nvm_common.offset |=
-			(bytes_to_copy << DRV_MB_PARAM_TRANSCEIVER_SIZE_SHIFT);
-		rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
-		if ((params.nvm_common.resp & FW_MSG_CODE_MASK) ==
-		    FW_MSG_CODE_TRANSCEIVER_NOT_PRESENT) {
+		nvm_offset &= (DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK |
+			       DRV_MB_PARAM_TRANSCEIVER_PORT_MASK);
+		nvm_offset |= ((addr + offset) <<
+				DRV_MB_PARAM_TRANSCEIVER_OFFSET_OFFSET);
+		nvm_offset |= (bytes_to_copy <<
+			       DRV_MB_PARAM_TRANSCEIVER_SIZE_OFFSET);
+		rc = ecore_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+					  DRV_MSG_CODE_TRANSCEIVER_READ,
+					  nvm_offset, &resp, &param, &buf_size,
+					  (u32 *)(p_buf + offset));
+		if (rc != ECORE_SUCCESS) {
+			DP_NOTICE(p_hwfn, false,
+				  "Failed to send a transceiver read command to the MFW. rc = %d.\n",
+				  rc);
+			return rc;
+		}
+
+		if (resp == FW_MSG_CODE_TRANSCEIVER_NOT_PRESENT)
 			return ECORE_NODEV;
-		} else if ((params.nvm_common.resp & FW_MSG_CODE_MASK) !=
-			   FW_MSG_CODE_TRANSCEIVER_DIAG_OK)
+		else if (resp != FW_MSG_CODE_TRANSCEIVER_DIAG_OK)
 			return ECORE_UNKNOWN_ERROR;
 
-		offset += *params.nvm_rd.buf_size;
-		bytes_left -= *params.nvm_rd.buf_size;
+		offset += buf_size;
+		bytes_left -= buf_size;
 	}
 
 	return ECORE_SUCCESS;
@@ -2559,36 +2986,35 @@ enum _ecore_status_t ecore_mcp_phy_sfp_write(struct ecore_hwfn *p_hwfn,
 					     u32 port, u32 addr, u32 offset,
 					     u32 len, u8 *p_buf)
 {
-	struct ecore_mcp_nvm_params params;
+	u32 buf_idx, buf_size, nvm_offset, resp, param;
 	enum _ecore_status_t rc;
-	u32 buf_idx, buf_size;
-
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
-	params.nvm_common.offset =
-		(port << DRV_MB_PARAM_TRANSCEIVER_PORT_SHIFT) |
-		(addr << DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_SHIFT);
-	params.type = ECORE_MCP_NVM_WR;
-	params.nvm_common.cmd = DRV_MSG_CODE_TRANSCEIVER_WRITE;
+
+	nvm_offset = (port << DRV_MB_PARAM_TRANSCEIVER_PORT_OFFSET) |
+			(addr << DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_OFFSET);
 	buf_idx = 0;
 	while (buf_idx < len) {
 		buf_size = OSAL_MIN_T(u32, (len - buf_idx),
 				      MAX_I2C_TRANSACTION_SIZE);
-		params.nvm_common.offset &=
-			(DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK |
-			 DRV_MB_PARAM_TRANSCEIVER_PORT_MASK);
-		params.nvm_common.offset |=
-			((offset + buf_idx) <<
-			 DRV_MB_PARAM_TRANSCEIVER_OFFSET_SHIFT);
-		params.nvm_common.offset |=
-			(buf_size << DRV_MB_PARAM_TRANSCEIVER_SIZE_SHIFT);
-		params.nvm_wr.buf_size = buf_size;
-		params.nvm_wr.buf = (u32 *)&p_buf[buf_idx];
-		rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
-		if ((params.nvm_common.resp & FW_MSG_CODE_MASK) ==
-		    FW_MSG_CODE_TRANSCEIVER_NOT_PRESENT) {
+		nvm_offset &= (DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK |
+				 DRV_MB_PARAM_TRANSCEIVER_PORT_MASK);
+		nvm_offset |= ((offset + buf_idx) <<
+				 DRV_MB_PARAM_TRANSCEIVER_OFFSET_OFFSET);
+		nvm_offset |= (buf_size <<
+			       DRV_MB_PARAM_TRANSCEIVER_SIZE_OFFSET);
+		rc = ecore_mcp_nvm_wr_cmd(p_hwfn, p_ptt,
+					  DRV_MSG_CODE_TRANSCEIVER_WRITE,
+					  nvm_offset, &resp, &param, buf_size,
+					  (u32 *)&p_buf[buf_idx]);
+		if (rc != ECORE_SUCCESS) {
+			DP_NOTICE(p_hwfn, false,
+				  "Failed to send a transceiver write command to the MFW. rc = %d.\n",
+				  rc);
+			return rc;
+		}
+
+		if (resp == FW_MSG_CODE_TRANSCEIVER_NOT_PRESENT)
 			return ECORE_NODEV;
-		} else if ((params.nvm_common.resp & FW_MSG_CODE_MASK) !=
-			   FW_MSG_CODE_TRANSCEIVER_DIAG_OK)
+		else if (resp != FW_MSG_CODE_TRANSCEIVER_DIAG_OK)
 			return ECORE_UNKNOWN_ERROR;
 
 		buf_idx += buf_size;
@@ -2604,7 +3030,7 @@ enum _ecore_status_t ecore_mcp_gpio_read(struct ecore_hwfn *p_hwfn,
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 	u32 drv_mb_param = 0, rsp;
 
-	drv_mb_param = (gpio << DRV_MB_PARAM_GPIO_NUMBER_SHIFT);
+	drv_mb_param = (gpio << DRV_MB_PARAM_GPIO_NUMBER_OFFSET);
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GPIO_READ,
 			   drv_mb_param, &rsp, gpio_val);
@@ -2625,8 +3051,8 @@ enum _ecore_status_t ecore_mcp_gpio_write(struct ecore_hwfn *p_hwfn,
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 	u32 drv_mb_param = 0, param, rsp;
 
-	drv_mb_param = (gpio << DRV_MB_PARAM_GPIO_NUMBER_SHIFT) |
-		(gpio_val << DRV_MB_PARAM_GPIO_VALUE_SHIFT);
+	drv_mb_param = (gpio << DRV_MB_PARAM_GPIO_NUMBER_OFFSET) |
+		(gpio_val << DRV_MB_PARAM_GPIO_VALUE_OFFSET);
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GPIO_WRITE,
 			   drv_mb_param, &rsp, &param);
@@ -2648,7 +3074,7 @@ enum _ecore_status_t ecore_mcp_gpio_info(struct ecore_hwfn *p_hwfn,
 	u32 drv_mb_param = 0, rsp, val = 0;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 
-	drv_mb_param = gpio << DRV_MB_PARAM_GPIO_NUMBER_SHIFT;
+	drv_mb_param = gpio << DRV_MB_PARAM_GPIO_NUMBER_OFFSET;
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GPIO_INFO,
 			   drv_mb_param, &rsp, &val);
@@ -2656,9 +3082,9 @@ enum _ecore_status_t ecore_mcp_gpio_info(struct ecore_hwfn *p_hwfn,
 		return rc;
 
 	*gpio_direction = (val & DRV_MB_PARAM_GPIO_DIRECTION_MASK) >>
-			   DRV_MB_PARAM_GPIO_DIRECTION_SHIFT;
+			   DRV_MB_PARAM_GPIO_DIRECTION_OFFSET;
 	*gpio_ctrl = (val & DRV_MB_PARAM_GPIO_CTRL_MASK) >>
-		      DRV_MB_PARAM_GPIO_CTRL_SHIFT;
+		      DRV_MB_PARAM_GPIO_CTRL_OFFSET;
 
 	if ((rsp & FW_MSG_CODE_MASK) != FW_MSG_CODE_GPIO_OK)
 		return ECORE_UNKNOWN_ERROR;
@@ -2673,7 +3099,7 @@ enum _ecore_status_t ecore_mcp_bist_register_test(struct ecore_hwfn *p_hwfn,
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 
 	drv_mb_param = (DRV_MB_PARAM_BIST_REGISTER_TEST <<
-			DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT);
+			DRV_MB_PARAM_BIST_TEST_INDEX_OFFSET);
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST,
 			   drv_mb_param, &rsp, &param);
@@ -2695,7 +3121,7 @@ enum _ecore_status_t ecore_mcp_bist_clock_test(struct ecore_hwfn *p_hwfn,
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 
 	drv_mb_param = (DRV_MB_PARAM_BIST_CLOCK_TEST <<
-			DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT);
+			DRV_MB_PARAM_BIST_TEST_INDEX_OFFSET);
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST,
 			   drv_mb_param, &rsp, &param);
@@ -2717,7 +3143,7 @@ enum _ecore_status_t ecore_mcp_bist_nvm_test_get_num_images(
 	enum _ecore_status_t rc = ECORE_SUCCESS;
 
 	drv_mb_param = (DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES <<
-			DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT);
+			DRV_MB_PARAM_BIST_TEST_INDEX_OFFSET);
 
 	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST,
 			   drv_mb_param, &rsp, num_images);
@@ -2735,26 +3161,20 @@ enum _ecore_status_t ecore_mcp_bist_nvm_test_get_image_att(
 	struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	struct bist_nvm_image_att *p_image_att, u32 image_index)
 {
-	struct ecore_mcp_nvm_params params;
+	u32 buf_size, nvm_offset, resp, param;
 	enum _ecore_status_t rc;
-	u32 buf_size;
-
-	OSAL_MEMSET(&params, 0, sizeof(struct ecore_mcp_nvm_params));
-	params.nvm_common.offset = (DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX <<
-				    DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT);
-	params.nvm_common.offset |= (image_index <<
-				    DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT);
 
-	params.type = ECORE_MCP_NVM_RD;
-	params.nvm_rd.buf_size = &buf_size;
-	params.nvm_common.cmd = DRV_MSG_CODE_BIST_TEST;
-	params.nvm_rd.buf = (u32 *)p_image_att;
-
-	rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
+	nvm_offset = (DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX <<
+				    DRV_MB_PARAM_BIST_TEST_INDEX_OFFSET);
+	nvm_offset |= (image_index <<
+		       DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_OFFSET);
+	rc = ecore_mcp_nvm_rd_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST,
+				  nvm_offset, &resp, &param, &buf_size,
+				  (u32 *)p_image_att);
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
-	if (((params.nvm_common.resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK) ||
+	if (((resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK) ||
 	    (p_image_att->return_code != 1))
 		rc = ECORE_UNKNOWN_ERROR;
 
@@ -2788,13 +3208,13 @@ ecore_mcp_get_temperature_info(struct ecore_hwfn *p_hwfn,
 		val = mfw_temp_info.sensor[i];
 		p_temp_sensor = &p_temp_info->sensors[i];
 		p_temp_sensor->sensor_location = (val & SENSOR_LOCATION_MASK) >>
-						 SENSOR_LOCATION_SHIFT;
+						 SENSOR_LOCATION_OFFSET;
 		p_temp_sensor->threshold_high = (val & THRESHOLD_HIGH_MASK) >>
-						THRESHOLD_HIGH_SHIFT;
+						THRESHOLD_HIGH_OFFSET;
 		p_temp_sensor->critical = (val & CRITICAL_TEMPERATURE_MASK) >>
-					  CRITICAL_TEMPERATURE_SHIFT;
+					  CRITICAL_TEMPERATURE_OFFSET;
 		p_temp_sensor->current_temp = (val & CURRENT_TEMP_MASK) >>
-					      CURRENT_TEMP_SHIFT;
+					      CURRENT_TEMP_OFFSET;
 	}
 
 	return ECORE_SUCCESS;
@@ -2805,23 +3225,17 @@ enum _ecore_status_t ecore_mcp_get_mba_versions(
 	struct ecore_ptt *p_ptt,
 	struct ecore_mba_vers *p_mba_vers)
 {
-	struct ecore_mcp_nvm_params params;
+	u32 buf_size, resp, param;
 	enum _ecore_status_t rc;
-	u32 buf_size;
 
-	OSAL_MEM_ZERO(&params, sizeof(params));
-	params.type = ECORE_MCP_NVM_RD;
-	params.nvm_common.cmd = DRV_MSG_CODE_GET_MBA_VERSION;
-	params.nvm_common.offset = 0;
-	params.nvm_rd.buf = &p_mba_vers->mba_vers[0];
-	params.nvm_rd.buf_size = &buf_size;
-	rc = ecore_mcp_nvm_command(p_hwfn, p_ptt, &params);
+	rc = ecore_mcp_nvm_rd_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GET_MBA_VERSION,
+				  0, &resp, &param, &buf_size,
+				  &p_mba_vers->mba_vers[0]);
 
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
-	if ((params.nvm_common.resp & FW_MSG_CODE_MASK) !=
-	    FW_MSG_CODE_NVM_OK)
+	if ((resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK)
 		rc = ECORE_UNKNOWN_ERROR;
 
 	if (buf_size != MCP_DRV_NVM_BUF_LEN)
@@ -2897,9 +3311,9 @@ ecore_mcp_get_mfw_res_id(enum ecore_resources res_id)
 #define ECORE_RESC_ALLOC_VERSION_MINOR	0
 #define ECORE_RESC_ALLOC_VERSION				\
 	((ECORE_RESC_ALLOC_VERSION_MAJOR <<			\
-	  DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT) |	\
+	  DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_OFFSET) |	\
 	 (ECORE_RESC_ALLOC_VERSION_MINOR <<			\
-	  DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT))
+	  DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_OFFSET))
 
 struct ecore_resc_alloc_in_params {
 	u32 cmd;
@@ -2983,10 +3397,10 @@ ecore_mcp_resc_allocation_msg(struct ecore_hwfn *p_hwfn,
 		   "Resource message request: cmd 0x%08x, res_id %d [%s], hsi_version %d.%d, val 0x%x\n",
 		   p_in_params->cmd, p_in_params->res_id,
 		   ecore_hw_get_resc_name(p_in_params->res_id),
-		   ECORE_MFW_GET_FIELD(mb_params.param,
-			   DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR),
-		   ECORE_MFW_GET_FIELD(mb_params.param,
-			   DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR),
+		   GET_MFW_FIELD(mb_params.param,
+				 DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR),
+		   GET_MFW_FIELD(mb_params.param,
+				 DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR),
 		   p_in_params->resc_max_val);
 
 	rc = ecore_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
@@ -3003,10 +3417,10 @@ ecore_mcp_resc_allocation_msg(struct ecore_hwfn *p_hwfn,
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
 		   "Resource message response: mfw_hsi_version %d.%d, num 0x%x, start 0x%x, vf_num 0x%x, vf_start 0x%x, flags 0x%08x\n",
-		   ECORE_MFW_GET_FIELD(p_out_params->mcp_param,
-			   FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR),
-		   ECORE_MFW_GET_FIELD(p_out_params->mcp_param,
-			   FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR),
+		   GET_MFW_FIELD(p_out_params->mcp_param,
+				 FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR),
+		   GET_MFW_FIELD(p_out_params->mcp_param,
+				 FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR),
 		   p_out_params->resc_num, p_out_params->resc_start,
 		   p_out_params->vf_resc_num, p_out_params->vf_resc_start,
 		   p_out_params->flags);
@@ -3094,7 +3508,7 @@ static enum _ecore_status_t ecore_mcp_resource_cmd(struct ecore_hwfn *p_hwfn,
 	}
 
 	if (*p_mcp_param == RESOURCE_OPCODE_UNKNOWN_CMD) {
-		u8 opcode = ECORE_MFW_GET_FIELD(param, RESOURCE_CMD_REQ_OPCODE);
+		u8 opcode = GET_MFW_FIELD(param, RESOURCE_CMD_REQ_OPCODE);
 
 		DP_NOTICE(p_hwfn, false,
 			  "The resource command is unknown to the MFW [param 0x%08x, opcode %d]\n",
@@ -3127,9 +3541,9 @@ __ecore_mcp_resc_lock(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 		break;
 	}
 
-	ECORE_MFW_SET_FIELD(param, RESOURCE_CMD_REQ_RESC, p_params->resource);
-	ECORE_MFW_SET_FIELD(param, RESOURCE_CMD_REQ_OPCODE, opcode);
-	ECORE_MFW_SET_FIELD(param, RESOURCE_CMD_REQ_AGE, p_params->timeout);
+	SET_MFW_FIELD(param, RESOURCE_CMD_REQ_RESC, p_params->resource);
+	SET_MFW_FIELD(param, RESOURCE_CMD_REQ_OPCODE, opcode);
+	SET_MFW_FIELD(param, RESOURCE_CMD_REQ_AGE, p_params->timeout);
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
 		   "Resource lock request: param 0x%08x [age %d, opcode %d, resource %d]\n",
@@ -3142,9 +3556,8 @@ __ecore_mcp_resc_lock(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 		return rc;
 
 	/* Analyze the response */
-	p_params->owner = ECORE_MFW_GET_FIELD(mcp_param,
-					     RESOURCE_CMD_RSP_OWNER);
-	opcode = ECORE_MFW_GET_FIELD(mcp_param, RESOURCE_CMD_RSP_OPCODE);
+	p_params->owner = GET_MFW_FIELD(mcp_param, RESOURCE_CMD_RSP_OWNER);
+	opcode = GET_MFW_FIELD(mcp_param, RESOURCE_CMD_RSP_OPCODE);
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
 		   "Resource lock response: mcp_param 0x%08x [opcode %d, owner %d]\n",
@@ -3199,6 +3612,36 @@ ecore_mcp_resc_lock(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 	return ECORE_SUCCESS;
 }
 
+void ecore_mcp_resc_lock_default_init(struct ecore_resc_lock_params *p_lock,
+				      struct ecore_resc_unlock_params *p_unlock,
+				      enum ecore_resc_lock resource,
+				      bool b_is_permanent)
+{
+	if (p_lock != OSAL_NULL) {
+		OSAL_MEM_ZERO(p_lock, sizeof(*p_lock));
+
+		/* Permanent resources don't require aging, and there's no
+		 * point in trying to acquire them more than once since it's
+		 * unexpected another entity would release them.
+		 */
+		if (b_is_permanent) {
+			p_lock->timeout = ECORE_MCP_RESC_LOCK_TO_NONE;
+		} else {
+			p_lock->retry_num = ECORE_MCP_RESC_LOCK_RETRY_CNT_DFLT;
+			p_lock->retry_interval =
+					ECORE_MCP_RESC_LOCK_RETRY_VAL_DFLT;
+			p_lock->sleep_b4_retry = true;
+		}
+
+		p_lock->resource = resource;
+	}
+
+	if (p_unlock != OSAL_NULL) {
+		OSAL_MEM_ZERO(p_unlock, sizeof(*p_unlock));
+		p_unlock->resource = resource;
+	}
+}
+
 enum _ecore_status_t
 ecore_mcp_resc_unlock(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 		      struct ecore_resc_unlock_params *p_params)
@@ -3209,8 +3652,8 @@ ecore_mcp_resc_unlock(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 
 	opcode = p_params->b_force ? RESOURCE_OPCODE_FORCE_RELEASE
 				   : RESOURCE_OPCODE_RELEASE;
-	ECORE_MFW_SET_FIELD(param, RESOURCE_CMD_REQ_RESC, p_params->resource);
-	ECORE_MFW_SET_FIELD(param, RESOURCE_CMD_REQ_OPCODE, opcode);
+	SET_MFW_FIELD(param, RESOURCE_CMD_REQ_RESC, p_params->resource);
+	SET_MFW_FIELD(param, RESOURCE_CMD_REQ_OPCODE, opcode);
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
 		   "Resource unlock request: param 0x%08x [opcode %d, resource %d]\n",
@@ -3223,7 +3666,7 @@ ecore_mcp_resc_unlock(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 		return rc;
 
 	/* Analyze the response */
-	opcode = ECORE_MFW_GET_FIELD(mcp_param, RESOURCE_CMD_RSP_OPCODE);
+	opcode = GET_MFW_FIELD(mcp_param, RESOURCE_CMD_RSP_OPCODE);
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
 		   "Resource unlock response: mcp_param 0x%08x [opcode %d]\n",
@@ -3250,3 +3693,137 @@ ecore_mcp_resc_unlock(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 
 	return ECORE_SUCCESS;
 }
+
+bool ecore_mcp_is_smart_an_supported(struct ecore_hwfn *p_hwfn)
+{
+	return !!(p_hwfn->mcp_info->capabilities &
+		  FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ);
+}
+
+enum _ecore_status_t ecore_mcp_get_capabilities(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt)
+{
+	u32 mcp_resp;
+	enum _ecore_status_t rc;
+
+	rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT,
+			   0, &mcp_resp, &p_hwfn->mcp_info->capabilities);
+	if (rc == ECORE_SUCCESS)
+		DP_VERBOSE(p_hwfn, (ECORE_MSG_SP | ECORE_MSG_PROBE),
+			   "MFW supported features: %08x\n",
+			   p_hwfn->mcp_info->capabilities);
+
+	return rc;
+}
+
+enum _ecore_status_t ecore_mcp_set_capabilities(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt)
+{
+	u32 mcp_resp, mcp_param, features;
+
+	features = DRV_MB_PARAM_FEATURE_SUPPORT_PORT_SMARTLINQ |
+		   DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE |
+		   DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK;
+
+	return ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT,
+			     features, &mcp_resp, &mcp_param);
+}
+
+enum _ecore_status_t
+ecore_mcp_drv_attribute(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			struct ecore_mcp_drv_attr *p_drv_attr)
+{
+	struct attribute_cmd_write_stc attr_cmd_write;
+	enum _attribute_commands_e mfw_attr_cmd;
+	struct ecore_mcp_mb_params mb_params;
+	enum _ecore_status_t rc;
+
+	switch (p_drv_attr->attr_cmd) {
+	case ECORE_MCP_DRV_ATTR_CMD_READ:
+		mfw_attr_cmd = ATTRIBUTE_CMD_READ;
+		break;
+	case ECORE_MCP_DRV_ATTR_CMD_WRITE:
+		mfw_attr_cmd = ATTRIBUTE_CMD_WRITE;
+		break;
+	case ECORE_MCP_DRV_ATTR_CMD_READ_CLEAR:
+		mfw_attr_cmd = ATTRIBUTE_CMD_READ_CLEAR;
+		break;
+	case ECORE_MCP_DRV_ATTR_CMD_CLEAR:
+		mfw_attr_cmd = ATTRIBUTE_CMD_CLEAR;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, false, "Unknown attribute command %d\n",
+			  p_drv_attr->attr_cmd);
+		return ECORE_INVAL;
+	}
+
+	OSAL_MEM_ZERO(&mb_params, sizeof(mb_params));
+	mb_params.cmd = DRV_MSG_CODE_ATTRIBUTE;
+	SET_MFW_FIELD(mb_params.param, DRV_MB_PARAM_ATTRIBUTE_KEY,
+		      p_drv_attr->attr_num);
+	SET_MFW_FIELD(mb_params.param, DRV_MB_PARAM_ATTRIBUTE_CMD,
+		      mfw_attr_cmd);
+	if (p_drv_attr->attr_cmd == ECORE_MCP_DRV_ATTR_CMD_WRITE) {
+		OSAL_MEM_ZERO(&attr_cmd_write, sizeof(attr_cmd_write));
+		attr_cmd_write.val = p_drv_attr->val;
+		attr_cmd_write.mask = p_drv_attr->mask;
+		attr_cmd_write.offset = p_drv_attr->offset;
+
+		mb_params.p_data_src = &attr_cmd_write;
+		mb_params.data_src_size = sizeof(attr_cmd_write);
+	}
+
+	rc = ecore_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc != ECORE_SUCCESS)
+		return rc;
+
+	if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The attribute command is not supported by the MFW\n");
+		return ECORE_NOTIMPL;
+	} else if (mb_params.mcp_resp != FW_MSG_CODE_OK) {
+		DP_INFO(p_hwfn,
+			"Failed to send an attribute command [mcp_resp 0x%x, attr_cmd %d, attr_num %d]\n",
+			mb_params.mcp_resp, p_drv_attr->attr_cmd,
+			p_drv_attr->attr_num);
+		return ECORE_INVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
+		   "Attribute Command: cmd %d [mfw_cmd %d], num %d, in={val 0x%08x, mask 0x%08x, offset 0x%08x}, out={val 0x%08x}\n",
+		   p_drv_attr->attr_cmd, mfw_attr_cmd, p_drv_attr->attr_num,
+		   p_drv_attr->val, p_drv_attr->mask, p_drv_attr->offset,
+		   mb_params.mcp_param);
+
+	if (p_drv_attr->attr_cmd == ECORE_MCP_DRV_ATTR_CMD_READ ||
+	    p_drv_attr->attr_cmd == ECORE_MCP_DRV_ATTR_CMD_READ_CLEAR)
+		p_drv_attr->val = mb_params.mcp_param;
+
+	return ECORE_SUCCESS;
+}
+
+void ecore_mcp_wol_wr(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+		      u32 offset, u32 val)
+{
+	struct ecore_mcp_mb_params mb_params = {0};
+	enum _ecore_status_t	   rc = ECORE_SUCCESS;
+	u32			   dword = val;
+
+	mb_params.cmd = DRV_MSG_CODE_WRITE_WOL_REG;
+	mb_params.param = offset;
+	mb_params.p_data_src = &dword;
+	mb_params.data_src_size = sizeof(dword);
+
+	rc = ecore_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc != ECORE_SUCCESS) {
+		DP_NOTICE(p_hwfn, false,
+			  "Failed to wol write request, rc = %d\n", rc);
+	}
+
+	if (mb_params.mcp_resp != FW_MSG_CODE_WOL_READ_WRITE_OK) {
+		DP_NOTICE(p_hwfn, false,
+			  "Failed to write value 0x%x to offset 0x%x [mcp_resp 0x%x]\n",
+			  val, offset, mb_params.mcp_resp);
+		rc = ECORE_UNKNOWN_ERROR;
+	}
+}
diff --git a/drivers/net/qede/base/ecore_mcp.h b/drivers/net/qede/base/ecore_mcp.h
index 77fb5a3c..6afaf7de 100644
--- a/drivers/net/qede/base/ecore_mcp.h
+++ b/drivers/net/qede/base/ecore_mcp.h
@@ -13,6 +13,7 @@
 #include "mcp_public.h"
 #include "ecore.h"
 #include "ecore_mcp_api.h"
+#include "ecore_dev_api.h"
 
 /* Using hwfn number (and not pf_num) is required since in CMT mode,
  * same pf_num may be used by two different hwfn
@@ -24,17 +25,27 @@
 					    ((rel_pfid) | \
 					     ((p_hwfn)->abs_pf_id & 1) << 3) : \
 					     rel_pfid)
-#define MCP_PF_ID(p_hwfn) MCP_PF_ID_BY_REL(p_hwfn, (p_hwfn)->rel_pf_id)
+#define MCP_PF_ID(p_hwfn)	MCP_PF_ID_BY_REL(p_hwfn, (p_hwfn)->rel_pf_id)
 
 #define MFW_PORT(_p_hwfn)	((_p_hwfn)->abs_pf_id % \
-				 ((_p_hwfn)->p_dev->num_ports_in_engines * \
-				  ecore_device_num_engines((_p_hwfn)->p_dev)))
+				 ecore_device_num_ports((_p_hwfn)->p_dev))
 
 struct ecore_mcp_info {
-	/* Spinlock used for protecting the access to the MFW mailbox */
-	osal_spinlock_t lock;
-	/* Flag to indicate whether sending a MFW mailbox is forbidden */
-	bool block_mb_sending;
+	/* List for mailbox commands which were sent and wait for a response */
+	osal_list_t cmd_list;
+
+	/* Spinlock used for protecting the access to the mailbox commands list
+	 * and the sending of the commands.
+	 */
+	osal_spinlock_t cmd_lock;
+
+	/* Flag to indicate whether sending a MFW mailbox command is blocked */
+	bool b_block_cmd;
+
+	/* Spinlock used for syncing SW link-changes and link-changes
+	 * originating from attention context.
+	 */
+	osal_spinlock_t link_lock;
 
 	/* Address of the MCP public area */
 	u32 public_base;
@@ -59,7 +70,10 @@ struct ecore_mcp_info {
 	u8 *mfw_mb_cur;
 	u8 *mfw_mb_shadow;
 	u16 mfw_mb_length;
-	u16 mcp_hist;
+	u32 mcp_hist;
+
+	/* Capabilties negotiated with the MFW */
+	u32 capabilities;
 };
 
 struct ecore_mcp_mb_params {
@@ -97,7 +111,7 @@ enum _ecore_status_t ecore_mcp_cmd_init(struct ecore_hwfn *p_hwfn,
  *
  * @param p_hwfn
  * @param p_ptt
- * Can only be called after `num_ports_in_engines' is set
+ * Can only be called after `num_ports_in_engine' is set
  */
 void ecore_mcp_cmd_port_init(struct ecore_hwfn *p_hwfn,
 			     struct ecore_ptt *p_ptt);
@@ -150,9 +164,13 @@ enum ecore_drv_role {
 };
 
 struct ecore_load_req_params {
+	/* Input params */
 	enum ecore_drv_role drv_role;
 	u8 timeout_val; /* 1..254, '0' - default value, '255' - no timeout */
 	bool avoid_eng_reset;
+	enum ecore_override_force_load override_force_load;
+
+	/* Output params */
 	u32 load_code;
 };
 
@@ -247,56 +265,6 @@ enum _ecore_status_t ecore_mcp_reset(struct ecore_hwfn *p_hwfn,
 				     struct ecore_ptt *p_ptt);
 
 /**
- * @brief - Sends an NVM write command request to the MFW with
- *          payload.
- *
- * @param p_hwfn
- * @param p_ptt
- * @param cmd - Command: Either DRV_MSG_CODE_NVM_WRITE_NVRAM or
- *            DRV_MSG_CODE_NVM_PUT_FILE_DATA
- * @param param - [0:23] - Offset [24:31] - Size
- * @param o_mcp_resp - MCP response
- * @param o_mcp_param - MCP response param
- * @param i_txn_size -  Buffer size
- * @param i_buf - Pointer to the buffer
- *
- * @param return ECORE_SUCCESS upon success.
- */
-enum _ecore_status_t ecore_mcp_nvm_wr_cmd(struct ecore_hwfn *p_hwfn,
-					  struct ecore_ptt *p_ptt,
-					  u32 cmd,
-					  u32 param,
-					  u32 *o_mcp_resp,
-					  u32 *o_mcp_param,
-					  u32 i_txn_size,
-					  u32 *i_buf);
-
-/**
- * @brief - Sends an NVM read command request to the MFW to get
- *        a buffer.
- *
- * @param p_hwfn
- * @param p_ptt
- * @param cmd - Command: DRV_MSG_CODE_NVM_GET_FILE_DATA or
- *            DRV_MSG_CODE_NVM_READ_NVRAM commands
- * @param param - [0:23] - Offset [24:31] - Size
- * @param o_mcp_resp - MCP response
- * @param o_mcp_param - MCP response param
- * @param o_txn_size -  Buffer size output
- * @param o_buf - Pointer to the buffer returned by the MFW.
- *
- * @param return ECORE_SUCCESS upon success.
- */
-enum _ecore_status_t ecore_mcp_nvm_rd_cmd(struct ecore_hwfn *p_hwfn,
-					  struct ecore_ptt *p_ptt,
-					  u32 cmd,
-					  u32 param,
-					  u32 *o_mcp_resp,
-					  u32 *o_mcp_param,
-					  u32 *o_txn_size,
-					  u32 *o_buf);
-
-/**
  * @brief indicates whether the MFW objects [under mcp_info] are accessible
  *
  * @param p_hwfn
@@ -368,12 +336,33 @@ enum _ecore_status_t ecore_mcp_mdump_set_values(struct ecore_hwfn *p_hwfn,
  *
  * @param p_hwfn
  * @param p_ptt
+ * @param epoch
  *
  * @param return ECORE_SUCCESS upon success.
  */
 enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn,
 					     struct ecore_ptt *p_ptt);
 
+struct ecore_mdump_retain_data {
+	u32 valid;
+	u32 epoch;
+	u32 pf;
+	u32 status;
+};
+
+/**
+ * @brief - Gets the mdump retained data from the MFW.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_mdump_retain
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t
+ecore_mcp_mdump_get_retain(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			   struct ecore_mdump_retain_data *p_mdump_retain);
+
 /**
  * @brief - Sets the MFW's max value for the given resource
  *
@@ -426,7 +415,12 @@ enum ecore_resc_lock {
 	/* Locks that the MFW is aware of should be added here downwards */
 
 	/* Ecore only locks should be added here upwards */
-	ECORE_RESC_LOCK_RESC_ALLOC = ECORE_MCP_RESC_LOCK_MAX_VAL
+	ECORE_RESC_LOCK_RESC_ALLOC = ECORE_MCP_RESC_LOCK_MAX_VAL,
+
+	/* A dummy value to be used for auxiliary functions in need of
+	 * returning an 'error' value.
+	 */
+	ECORE_RESC_LOCK_RESC_INVALID,
 };
 
 struct ecore_resc_lock_params {
@@ -440,9 +434,11 @@ struct ecore_resc_lock_params {
 
 	/* Number of times to retry locking */
 	u8 retry_num;
+#define ECORE_MCP_RESC_LOCK_RETRY_CNT_DFLT	10
 
 	/* The interval in usec between retries */
 	u16 retry_interval;
+#define ECORE_MCP_RESC_LOCK_RETRY_VAL_DFLT	10000
 
 	/* Use sleep or delay between retries */
 	bool sleep_b4_retry;
@@ -493,4 +489,83 @@ enum _ecore_status_t
 ecore_mcp_resc_unlock(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 		      struct ecore_resc_unlock_params *p_params);
 
+/**
+ * @brief - default initialization for lock/unlock resource structs
+ *
+ * @param p_lock - lock params struct to be initialized; Can be OSAL_NULL
+ * @param p_unlock - unlock params struct to be initialized; Can be OSAL_NULL
+ * @param resource - the requested resource
+ * @paral b_is_permanent - disable retries & aging when set
+ */
+void ecore_mcp_resc_lock_default_init(struct ecore_resc_lock_params *p_lock,
+				      struct ecore_resc_unlock_params *p_unlock,
+				      enum ecore_resc_lock resource,
+				      bool b_is_permanent);
+
+/**
+ * @brief Learn of supported MFW features; To be done during early init
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+enum _ecore_status_t ecore_mcp_get_capabilities(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt);
+
+/**
+ * @brief Inform MFW of set of features supported by driver. Should be done
+ * inside the contet of the LOAD_REQ.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+enum _ecore_status_t ecore_mcp_set_capabilities(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt);
+
+enum ecore_mcp_drv_attr_cmd {
+	ECORE_MCP_DRV_ATTR_CMD_READ,
+	ECORE_MCP_DRV_ATTR_CMD_WRITE,
+	ECORE_MCP_DRV_ATTR_CMD_READ_CLEAR,
+	ECORE_MCP_DRV_ATTR_CMD_CLEAR,
+};
+
+struct ecore_mcp_drv_attr {
+	enum ecore_mcp_drv_attr_cmd attr_cmd;
+	u32 attr_num;
+
+	/* R/RC - will be set with the read value
+	 * W - should hold the required value to be written
+	 * C - DC
+	 */
+	u32 val;
+
+	/* W - mask/offset to be applied on the given value
+	 * R/RC/C - DC
+	 */
+	u32 mask;
+	u32 offset;
+};
+
+/**
+ * @brief Handle the drivers' attributes that are kept by the MFW.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_drv_attr
+ */
+enum _ecore_status_t
+ecore_mcp_drv_attribute(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+			struct ecore_mcp_drv_attr *p_drv_attr);
+
+/**
+ * @brief Read ufp config from the shared memory.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+void
+ecore_mcp_read_ufp_config(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt);
+
+void ecore_mcp_wol_wr(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+		      u32 offset, u32 val);
+
 #endif /* __ECORE_MCP_H__ */
diff --git a/drivers/net/qede/base/ecore_mcp_api.h b/drivers/net/qede/base/ecore_mcp_api.h
index abc190c9..be3e91f0 100644
--- a/drivers/net/qede/base/ecore_mcp_api.h
+++ b/drivers/net/qede/base/ecore_mcp_api.h
@@ -23,24 +23,51 @@ struct ecore_mcp_link_pause_params {
 	bool forced_tx;
 };
 
+enum ecore_mcp_eee_mode {
+	ECORE_MCP_EEE_DISABLED,
+	ECORE_MCP_EEE_ENABLED,
+	ECORE_MCP_EEE_UNSUPPORTED
+};
+
+struct ecore_link_eee_params {
+	u32 tx_lpi_timer;
+#define ECORE_EEE_1G_ADV	(1 << 0)
+#define ECORE_EEE_10G_ADV	(1 << 1)
+	/* Capabilities are represented using ECORE_EEE_*_ADV values */
+	u8 adv_caps;
+	u8 lp_adv_caps;
+	bool enable;
+	bool tx_lpi_enable;
+};
+
 struct ecore_mcp_link_params {
 	struct ecore_mcp_link_speed_params speed;
 	struct ecore_mcp_link_pause_params pause;
 	u32 loopback_mode; /* in PMM_LOOPBACK values */
+	struct ecore_link_eee_params eee;
 };
 
 struct ecore_mcp_link_capabilities {
 	u32 speed_capabilities;
 	bool default_speed_autoneg; /* In Mb/s */
 	u32 default_speed; /* In Mb/s */
+	enum ecore_mcp_eee_mode default_eee;
+	u32 eee_lpi_timer;
+	u8 eee_speed_caps;
 };
 
 struct ecore_mcp_link_state {
 	bool link_up;
 
-	u32 line_speed; /* In Mb/s */
 	u32 min_pf_rate; /* In Mb/s */
-	u32 speed; /* In Mb/s */
+
+	/* Actual link speed in Mb/s */
+	u32 line_speed;
+
+	/* PF max speed in MB/s, deduced from line_speed
+	 * according to PF max bandwidth configuration.
+	 */
+	u32 speed;
 	bool full_duplex;
 
 	bool an;
@@ -67,6 +94,10 @@ struct ecore_mcp_link_state {
 	u8 partner_adv_pause;
 
 	bool sfp_tx_fault;
+
+	bool eee_active;
+	u8 eee_adv_caps;
+	u8 eee_lp_adv_caps;
 };
 
 struct ecore_mcp_function_info {
@@ -88,37 +119,6 @@ struct ecore_mcp_function_info {
 	u16 mtu;
 };
 
-struct ecore_mcp_nvm_common {
-	u32 offset;
-	u32 param;
-	u32 resp;
-	u32 cmd;
-};
-
-struct ecore_mcp_nvm_rd {
-	u32 *buf_size;
-	u32 *buf;
-};
-
-struct ecore_mcp_nvm_wr {
-	u32 buf_size;
-	u32 *buf;
-};
-
-struct ecore_mcp_nvm_params {
-#define ECORE_MCP_CMD		(1 << 0)
-#define ECORE_MCP_NVM_RD	(1 << 1)
-#define ECORE_MCP_NVM_WR	(1 << 2)
-	u8 type;
-
-	struct ecore_mcp_nvm_common nvm_common;
-
-	union {
-		struct ecore_mcp_nvm_rd nvm_rd;
-		struct ecore_mcp_nvm_wr nvm_wr;
-	};
-};
-
 #ifndef __EXTRACT__LINUX__
 enum ecore_nvm_images {
 	ECORE_NVM_IMAGE_ISCSI_CFG,
@@ -583,14 +583,16 @@ enum _ecore_status_t ecore_mcp_get_mfw_ver(struct ecore_hwfn *p_hwfn,
  * @brief Get media type value of the port.
  *
  * @param p_dev      - ecore dev pointer
+ * @param p_ptt
  * @param mfw_ver    - media type value
  *
  * @return enum _ecore_status_t -
  *      ECORE_SUCCESS - Operation was successful.
  *      ECORE_BUSY - Operation failed
  */
-enum _ecore_status_t ecore_mcp_get_media_type(struct ecore_dev *p_dev,
-					   u32 *media_type);
+enum _ecore_status_t ecore_mcp_get_media_type(struct ecore_hwfn *p_hwfn,
+					      struct ecore_ptt *p_ptt,
+					      u32 *media_type);
 
 /**
  * @brief - Sends a command to the MCP mailbox.
@@ -598,9 +600,9 @@ enum _ecore_status_t ecore_mcp_get_media_type(struct ecore_dev *p_dev,
  * @param p_hwfn      - hw function
  * @param p_ptt       - PTT required for register access
  * @param cmd         - command to be sent to the MCP
- * @param param       - optional param
- * @param o_mcp_resp  - the MCP response code (exclude sequence)
- * @param o_mcp_param - optional parameter provided by the MCP response
+ * @param param       - Optional param
+ * @param o_mcp_resp  - The MCP response code (exclude sequence)
+ * @param o_mcp_param - Optional parameter provided by the MCP response
  *
  * @return enum _ecore_status_t -
  *      ECORE_SUCCESS - operation was successful
@@ -632,44 +634,6 @@ const struct ecore_mcp_function_info
 *ecore_mcp_get_function_info(struct ecore_hwfn *p_hwfn);
 #endif
 
-/**
- * @brief - Function for reading/manipulating the nvram. Following are supported
- *          functionalities.
- *          1. Read: Read the specified nvram offset.
- *             input values:
- *               type   - ECORE_MCP_NVM_RD
- *               cmd    - command code (e.g. DRV_MSG_CODE_NVM_READ_NVRAM)
- *               offset - nvm offset
- *
- *             output values:
- *               buf      - buffer
- *               buf_size - buffer size
- *
- *          2. Write: Write the data at the specified nvram offset
- *             input values:
- *               type     - ECORE_MCP_NVM_WR
- *               cmd      - command code (e.g. DRV_MSG_CODE_NVM_WRITE_NVRAM)
- *               offset   - nvm offset
- *               buf      - buffer
- *               buf_size - buffer size
- *
- *          3. Command: Send the NVM command to MCP.
- *             input values:
- *               type   - ECORE_MCP_CMD
- *               cmd    - command code (e.g. DRV_MSG_CODE_NVM_DEL_FILE)
- *               offset - nvm offset
- *
- *
- * @param p_hwfn
- * @param p_ptt
- * @param params
- *
- * @return ECORE_SUCCESS - operation was successful.
- */
-enum _ecore_status_t ecore_mcp_nvm_command(struct ecore_hwfn *p_hwfn,
-					   struct ecore_ptt *p_ptt,
-					   struct ecore_mcp_nvm_params *params);
-
 #ifndef LINUX_REMOVE
 /**
  * @brief - count number of function with a matching personality on engine.
@@ -891,7 +855,7 @@ enum _ecore_status_t ecore_mcp_nvm_resp(struct ecore_dev *p_dev, u8 *p_buf);
  *  @param p_dev
  *  @param addr - nvm offset
  *  @param cmd - nvm command
- *  @param p_buf - nvm write buffer
+ *  @param p_buf - nvm read buffer
  *  @param len - buffer len
  *
  * @return enum _ecore_status_t - ECORE_SUCCESS - operation was successful.
@@ -904,7 +868,7 @@ enum _ecore_status_t ecore_mcp_phy_read(struct ecore_dev *p_dev, u32 cmd,
  *
  *  @param p_dev
  *  @param addr - nvm offset
- *  @param p_buf - nvm write buffer
+ *  @param p_buf - nvm read buffer
  *  @param len - buffer len
  *
  * @return enum _ecore_status_t - ECORE_SUCCESS - operation was successful.
@@ -913,6 +877,56 @@ enum _ecore_status_t ecore_mcp_nvm_read(struct ecore_dev *p_dev, u32 addr,
 			   u8 *p_buf, u32 len);
 
 /**
+ * @brief - Sends an NVM write command request to the MFW with
+ *          payload.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param cmd - Command: Either DRV_MSG_CODE_NVM_WRITE_NVRAM or
+ *            DRV_MSG_CODE_NVM_PUT_FILE_DATA
+ * @param param - [0:23] - Offset [24:31] - Size
+ * @param o_mcp_resp - MCP response
+ * @param o_mcp_param - MCP response param
+ * @param i_txn_size -  Buffer size
+ * @param i_buf - Pointer to the buffer
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_nvm_wr_cmd(struct ecore_hwfn *p_hwfn,
+					  struct ecore_ptt *p_ptt,
+					  u32 cmd,
+					  u32 param,
+					  u32 *o_mcp_resp,
+					  u32 *o_mcp_param,
+					  u32 i_txn_size,
+					  u32 *i_buf);
+
+/**
+ * @brief - Sends an NVM read command request to the MFW to get
+ *        a buffer.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param cmd - Command: DRV_MSG_CODE_NVM_GET_FILE_DATA or
+ *            DRV_MSG_CODE_NVM_READ_NVRAM commands
+ * @param param - [0:23] - Offset [24:31] - Size
+ * @param o_mcp_resp - MCP response
+ * @param o_mcp_param - MCP response param
+ * @param o_txn_size -  Buffer size output
+ * @param o_buf - Pointer to the buffer returned by the MFW.
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_nvm_rd_cmd(struct ecore_hwfn *p_hwfn,
+					  struct ecore_ptt *p_ptt,
+					  u32 cmd,
+					  u32 param,
+					  u32 *o_mcp_resp,
+					  u32 *o_mcp_param,
+					  u32 *o_txn_size,
+					  u32 *o_buf);
+
+/**
  * @brief Read from sfp
  *
  *  @param p_hwfn - hw function
@@ -1123,6 +1137,17 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
 						struct ecore_ptt *p_ptt);
 
 /**
+ * @brief - Clear the mdump retained data.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return ECORE_SUCCESS upon success.
+ */
+enum _ecore_status_t ecore_mcp_mdump_clr_retain(struct ecore_hwfn *p_hwfn,
+						struct ecore_ptt *p_ptt);
+
+/**
  * @brief - Processes the TLV request from MFW i.e., get the required TLV info
  *          from the ecore client and send it to the MFW.
  *
@@ -1134,4 +1159,13 @@ enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn,
 enum _ecore_status_t ecore_mfw_process_tlv_req(struct ecore_hwfn *p_hwfn,
 					       struct ecore_ptt *p_ptt);
 
+
+/**
+ * @brief - Return whether management firmware support smart AN
+ *
+ * @param p_hwfn
+ *
+ * @return bool - true iff feature is supported.
+ */
+bool ecore_mcp_is_smart_an_supported(struct ecore_hwfn *p_hwfn);
 #endif
diff --git a/drivers/net/qede/base/ecore_mng_tlv.c b/drivers/net/qede/base/ecore_mng_tlv.c
index 0bf1be88..3a1de094 100644
--- a/drivers/net/qede/base/ecore_mng_tlv.c
+++ b/drivers/net/qede/base/ecore_mng_tlv.c
@@ -1403,9 +1403,9 @@ ecore_mfw_get_iscsi_tlv_value(struct ecore_drv_tlv_hdr *p_tlv,
 	return -1;
 }
 
-static enum _ecore_status_t
-ecore_mfw_update_tlvs(u8 tlv_group, struct ecore_hwfn *p_hwfn,
-		      struct ecore_ptt *p_ptt, u8 *p_mfw_buf, u32 size)
+static enum _ecore_status_t ecore_mfw_update_tlvs(struct ecore_hwfn *p_hwfn,
+						  u8 tlv_group, u8 *p_mfw_buf,
+						  u32 size)
 {
 	union ecore_mfw_tlv_data *p_tlv_data;
 	struct ecore_drv_tlv_hdr tlv;
@@ -1512,8 +1512,7 @@ ecore_mfw_process_tlv_req(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 	/* Update the TLV values in the local buffer */
 	for (id = ECORE_MFW_TLV_GENERIC; id < ECORE_MFW_TLV_MAX; id <<= 1) {
 		if (tlv_group & id) {
-			if (ecore_mfw_update_tlvs(id, p_hwfn, p_ptt, p_mfw_buf,
-						  size))
+			if (ecore_mfw_update_tlvs(p_hwfn, id, p_mfw_buf, size))
 				goto drv_done;
 		}
 	}
diff --git a/drivers/net/qede/base/ecore_proto_if.h b/drivers/net/qede/base/ecore_proto_if.h
index 226e3d2a..66622323 100644
--- a/drivers/net/qede/base/ecore_proto_if.h
+++ b/drivers/net/qede/base/ecore_proto_if.h
@@ -22,6 +22,10 @@ struct ecore_eth_pf_params {
 	 */
 	u16	num_cons;
 
+	/* per-VF number of CIDs */
+	u8	num_vf_cons;
+#define ETH_PF_PARAMS_VF_CONS_DEFAULT	(32)
+
 	/* To enable arfs, previous to HW-init a positive number needs to be
 	 * set [as filters require allocated searcher ILT memory].
 	 * This will set the maximal number of configured steering-filters.
@@ -67,6 +71,7 @@ struct ecore_iscsi_pf_params {
 
 	u8		is_target;
 	u8		bdq_pbl_num_entries[2];
+	u8		disable_stats_collection;
 };
 
 enum ecore_rdma_protocol {
diff --git a/drivers/net/qede/base/ecore_rt_defs.h b/drivers/net/qede/base/ecore_rt_defs.h
index c9c23096..1d085815 100644
--- a/drivers/net/qede/base/ecore_rt_defs.h
+++ b/drivers/net/qede/base/ecore_rt_defs.h
@@ -28,424 +28,506 @@
 #define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET                            15
 #define DORQ_REG_PF_WAKE_ALL_RT_OFFSET                              16
 #define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET                           17
-#define IGU_REG_PF_CONFIGURATION_RT_OFFSET                          18
-#define IGU_REG_VF_CONFIGURATION_RT_OFFSET                          19
-#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET                           20
-#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET                           21
-#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET                        22
-#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET                       23
-#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET                         24
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                             761
-#define CAU_REG_SB_VAR_MEMORY_RT_SIZE                               736
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                             761
-#define CAU_REG_SB_VAR_MEMORY_RT_SIZE                               736
-#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET                            1497
-#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE                              736
-#define CAU_REG_PI_MEMORY_RT_OFFSET                                 2233
+#define DORQ_REG_GLB_MAX_ICID_0_RT_OFFSET                           18
+#define DORQ_REG_GLB_MAX_ICID_1_RT_OFFSET                           19
+#define DORQ_REG_GLB_RANGE2CONN_TYPE_0_RT_OFFSET                    20
+#define DORQ_REG_GLB_RANGE2CONN_TYPE_1_RT_OFFSET                    21
+#define DORQ_REG_PRV_PF_MAX_ICID_2_RT_OFFSET                        22
+#define DORQ_REG_PRV_PF_MAX_ICID_3_RT_OFFSET                        23
+#define DORQ_REG_PRV_PF_MAX_ICID_4_RT_OFFSET                        24
+#define DORQ_REG_PRV_PF_MAX_ICID_5_RT_OFFSET                        25
+#define DORQ_REG_PRV_VF_MAX_ICID_2_RT_OFFSET                        26
+#define DORQ_REG_PRV_VF_MAX_ICID_3_RT_OFFSET                        27
+#define DORQ_REG_PRV_VF_MAX_ICID_4_RT_OFFSET                        28
+#define DORQ_REG_PRV_VF_MAX_ICID_5_RT_OFFSET                        29
+#define DORQ_REG_PRV_PF_RANGE2CONN_TYPE_2_RT_OFFSET                 30
+#define DORQ_REG_PRV_PF_RANGE2CONN_TYPE_3_RT_OFFSET                 31
+#define DORQ_REG_PRV_PF_RANGE2CONN_TYPE_4_RT_OFFSET                 32
+#define DORQ_REG_PRV_PF_RANGE2CONN_TYPE_5_RT_OFFSET                 33
+#define DORQ_REG_PRV_VF_RANGE2CONN_TYPE_2_RT_OFFSET                 34
+#define DORQ_REG_PRV_VF_RANGE2CONN_TYPE_3_RT_OFFSET                 35
+#define DORQ_REG_PRV_VF_RANGE2CONN_TYPE_4_RT_OFFSET                 36
+#define DORQ_REG_PRV_VF_RANGE2CONN_TYPE_5_RT_OFFSET                 37
+#define IGU_REG_PF_CONFIGURATION_RT_OFFSET                          38
+#define IGU_REG_VF_CONFIGURATION_RT_OFFSET                          39
+#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET                           40
+#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET                           41
+#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET                        42
+#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET                       43
+#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET                         44
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET                             45
+#define CAU_REG_SB_VAR_MEMORY_RT_SIZE                               1024
+#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET                            1069
+#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE                              1024
+#define CAU_REG_PI_MEMORY_RT_OFFSET                                 2093
 #define CAU_REG_PI_MEMORY_RT_SIZE                                   4416
-#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET                6649
-#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET                  6650
-#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET                  6651
-#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET                     6652
-#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET                     6653
-#define PRS_REG_SEARCH_TCP_RT_OFFSET                                6654
-#define PRS_REG_SEARCH_FCOE_RT_OFFSET                               6655
-#define PRS_REG_SEARCH_ROCE_RT_OFFSET                               6656
-#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET                       6657
-#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET                       6658
-#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET                           6659
-#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET                 6660
-#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET       6661
-#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET                  6662
-#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET                           6663
-#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET                     6664
-#define SRC_REG_FIRSTFREE_RT_OFFSET                                 6665
+#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET                6509
+#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET                  6510
+#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET                  6511
+#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET                     6512
+#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET                     6513
+#define PRS_REG_SEARCH_TCP_RT_OFFSET                                6514
+#define PRS_REG_SEARCH_FCOE_RT_OFFSET                               6515
+#define PRS_REG_SEARCH_ROCE_RT_OFFSET                               6516
+#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET                       6517
+#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET                       6518
+#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET                           6519
+#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET                 6520
+#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET       6521
+#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET                  6522
+#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET                           6523
+#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET                     6524
+#define SRC_REG_FIRSTFREE_RT_OFFSET                                 6525
 #define SRC_REG_FIRSTFREE_RT_SIZE                                   2
-#define SRC_REG_LASTFREE_RT_OFFSET                                  6667
+#define SRC_REG_LASTFREE_RT_OFFSET                                  6527
 #define SRC_REG_LASTFREE_RT_SIZE                                    2
-#define SRC_REG_COUNTFREE_RT_OFFSET                                 6669
-#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET                          6670
-#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET                            6671
-#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET                            6672
-#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET                              6673
-#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET                              6674
-#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET                             6675
-#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET                            6676
-#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET                           6677
-#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET                            6678
-#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET                           6679
-#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET                            6680
-#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET                          6681
-#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET                           6682
-#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET                         6683
-#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET                          6684
-#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET                         6685
-#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET                          6686
-#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET                         6687
-#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET                          6688
-#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET                 6689
-#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET               6690
-#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET               6691
-#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET                           6692
-#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET                         6693
-#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET                         6694
-#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET                       6695
-#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET                     6696
-#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET                     6697
-#define PSWRQ2_REG_VF_BASE_RT_OFFSET                                6698
-#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET                            6699
-#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET                          6700
-#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET                          6701
-#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET                             6702
-#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE                               22000
-#define PGLUE_REG_B_VF_BASE_RT_OFFSET                               28702
-#define PGLUE_REG_B_MSDM_OFFSET_MASK_B_RT_OFFSET                    28703
-#define PGLUE_REG_B_MSDM_VF_SHIFT_B_RT_OFFSET                       28704
-#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET                       28705
-#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET                          28706
-#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET                          28707
-#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET                          28708
-#define TM_REG_VF_ENABLE_CONN_RT_OFFSET                             28709
-#define TM_REG_PF_ENABLE_CONN_RT_OFFSET                             28710
-#define TM_REG_PF_ENABLE_TASK_RT_OFFSET                             28711
-#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET                 28712
-#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET                 28713
-#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET                            28714
+#define SRC_REG_COUNTFREE_RT_OFFSET                                 6529
+#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET                          6530
+#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET                            6531
+#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET                            6532
+#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET                              6533
+#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET                              6534
+#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET                             6535
+#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET                            6536
+#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET                           6537
+#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET                            6538
+#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET                           6539
+#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET                            6540
+#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET                          6541
+#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET                           6542
+#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET                         6543
+#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET                          6544
+#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET                         6545
+#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET                          6546
+#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET                         6547
+#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET                          6548
+#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET                 6549
+#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET               6550
+#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET               6551
+#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET                           6552
+#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET                         6553
+#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET                         6554
+#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET                       6555
+#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET                     6556
+#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET                     6557
+#define PSWRQ2_REG_VF_BASE_RT_OFFSET                                6558
+#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET                            6559
+#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET                          6560
+#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET                          6561
+#define PSWRQ2_REG_TGSRC_FIRST_ILT_RT_OFFSET                        6562
+#define PSWRQ2_REG_RGSRC_FIRST_ILT_RT_OFFSET                        6563
+#define PSWRQ2_REG_TGSRC_LAST_ILT_RT_OFFSET                         6564
+#define PSWRQ2_REG_RGSRC_LAST_ILT_RT_OFFSET                         6565
+#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET                             6566
+#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE                               26414
+#define PGLUE_REG_B_VF_BASE_RT_OFFSET                               32980
+#define PGLUE_REG_B_MSDM_OFFSET_MASK_B_RT_OFFSET                    32981
+#define PGLUE_REG_B_MSDM_VF_SHIFT_B_RT_OFFSET                       32982
+#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET                       32983
+#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET                          32984
+#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET                          32985
+#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET                          32986
+#define TM_REG_VF_ENABLE_CONN_RT_OFFSET                             32987
+#define TM_REG_PF_ENABLE_CONN_RT_OFFSET                             32988
+#define TM_REG_PF_ENABLE_TASK_RT_OFFSET                             32989
+#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET                 32990
+#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET                 32991
+#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET                            32992
 #define TM_REG_CONFIG_CONN_MEM_RT_SIZE                              416
-#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET                            29130
+#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET                            33408
 #define TM_REG_CONFIG_TASK_MEM_RT_SIZE                              608
-#define QM_REG_MAXPQSIZE_0_RT_OFFSET                                29738
-#define QM_REG_MAXPQSIZE_1_RT_OFFSET                                29739
-#define QM_REG_MAXPQSIZE_2_RT_OFFSET                                29740
-#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET                           29741
-#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET                           29742
-#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET                           29743
-#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET                           29744
-#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET                           29745
-#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET                           29746
-#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET                           29747
-#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET                           29748
-#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET                           29749
-#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET                           29750
-#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET                          29751
-#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET                          29752
-#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET                          29753
-#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET                          29754
-#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET                          29755
-#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET                          29756
-#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET                          29757
-#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET                          29758
-#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET                          29759
-#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET                          29760
-#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET                          29761
-#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET                          29762
-#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET                          29763
-#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET                          29764
-#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET                          29765
-#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET                          29766
-#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET                          29767
-#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET                          29768
-#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET                          29769
-#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET                          29770
-#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET                          29771
-#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET                          29772
-#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET                          29773
-#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET                          29774
-#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET                          29775
-#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET                          29776
-#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET                          29777
-#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET                          29778
-#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET                          29779
-#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET                          29780
-#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET                          29781
-#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET                          29782
-#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET                          29783
-#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET                          29784
-#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET                          29785
-#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET                          29786
-#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET                          29787
-#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET                          29788
-#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET                          29789
-#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET                          29790
-#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET                          29791
-#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET                          29792
-#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET                          29793
-#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET                          29794
-#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET                          29795
-#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET                          29796
-#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET                          29797
-#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET                          29798
-#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET                          29799
-#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET                          29800
-#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET                          29801
-#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET                          29802
-#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET                          29803
-#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET                          29804
-#define QM_REG_BASEADDROTHERPQ_RT_OFFSET                            29805
+#define QM_REG_MAXPQSIZE_0_RT_OFFSET                                34016
+#define QM_REG_MAXPQSIZE_1_RT_OFFSET                                34017
+#define QM_REG_MAXPQSIZE_2_RT_OFFSET                                34018
+#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET                           34019
+#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET                           34020
+#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET                           34021
+#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET                           34022
+#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET                           34023
+#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET                           34024
+#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET                           34025
+#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET                           34026
+#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET                           34027
+#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET                           34028
+#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET                          34029
+#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET                          34030
+#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET                          34031
+#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET                          34032
+#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET                          34033
+#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET                          34034
+#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET                          34035
+#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET                          34036
+#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET                          34037
+#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET                          34038
+#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET                          34039
+#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET                          34040
+#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET                          34041
+#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET                          34042
+#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET                          34043
+#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET                          34044
+#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET                          34045
+#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET                          34046
+#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET                          34047
+#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET                          34048
+#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET                          34049
+#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET                          34050
+#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET                          34051
+#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET                          34052
+#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET                          34053
+#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET                          34054
+#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET                          34055
+#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET                          34056
+#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET                          34057
+#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET                          34058
+#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET                          34059
+#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET                          34060
+#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET                          34061
+#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET                          34062
+#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET                          34063
+#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET                          34064
+#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET                          34065
+#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET                          34066
+#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET                          34067
+#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET                          34068
+#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET                          34069
+#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET                          34070
+#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET                          34071
+#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET                          34072
+#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET                          34073
+#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET                          34074
+#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET                          34075
+#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET                          34076
+#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET                          34077
+#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET                          34078
+#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET                          34079
+#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET                          34080
+#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET                          34081
+#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET                          34082
+#define QM_REG_BASEADDROTHERPQ_RT_OFFSET                            34083
 #define QM_REG_BASEADDROTHERPQ_RT_SIZE                              128
-#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET                         29933
-#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET                         29934
-#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET                          29935
-#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET                        29936
-#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET                       29937
-#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET                            29938
-#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET                            29939
-#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET                            29940
-#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET                            29941
-#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET                            29942
-#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET                            29943
-#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET                            29944
-#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET                            29945
-#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET                            29946
-#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET                            29947
-#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET                           29948
-#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET                           29949
-#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET                           29950
-#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET                           29951
-#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET                           29952
-#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET                           29953
-#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET                        29954
-#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET                        29955
-#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET                        29956
-#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET                        29957
-#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET                           29958
-#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET                           29959
-#define QM_REG_PQTX2PF_0_RT_OFFSET                                  29960
-#define QM_REG_PQTX2PF_1_RT_OFFSET                                  29961
-#define QM_REG_PQTX2PF_2_RT_OFFSET                                  29962
-#define QM_REG_PQTX2PF_3_RT_OFFSET                                  29963
-#define QM_REG_PQTX2PF_4_RT_OFFSET                                  29964
-#define QM_REG_PQTX2PF_5_RT_OFFSET                                  29965
-#define QM_REG_PQTX2PF_6_RT_OFFSET                                  29966
-#define QM_REG_PQTX2PF_7_RT_OFFSET                                  29967
-#define QM_REG_PQTX2PF_8_RT_OFFSET                                  29968
-#define QM_REG_PQTX2PF_9_RT_OFFSET                                  29969
-#define QM_REG_PQTX2PF_10_RT_OFFSET                                 29970
-#define QM_REG_PQTX2PF_11_RT_OFFSET                                 29971
-#define QM_REG_PQTX2PF_12_RT_OFFSET                                 29972
-#define QM_REG_PQTX2PF_13_RT_OFFSET                                 29973
-#define QM_REG_PQTX2PF_14_RT_OFFSET                                 29974
-#define QM_REG_PQTX2PF_15_RT_OFFSET                                 29975
-#define QM_REG_PQTX2PF_16_RT_OFFSET                                 29976
-#define QM_REG_PQTX2PF_17_RT_OFFSET                                 29977
-#define QM_REG_PQTX2PF_18_RT_OFFSET                                 29978
-#define QM_REG_PQTX2PF_19_RT_OFFSET                                 29979
-#define QM_REG_PQTX2PF_20_RT_OFFSET                                 29980
-#define QM_REG_PQTX2PF_21_RT_OFFSET                                 29981
-#define QM_REG_PQTX2PF_22_RT_OFFSET                                 29982
-#define QM_REG_PQTX2PF_23_RT_OFFSET                                 29983
-#define QM_REG_PQTX2PF_24_RT_OFFSET                                 29984
-#define QM_REG_PQTX2PF_25_RT_OFFSET                                 29985
-#define QM_REG_PQTX2PF_26_RT_OFFSET                                 29986
-#define QM_REG_PQTX2PF_27_RT_OFFSET                                 29987
-#define QM_REG_PQTX2PF_28_RT_OFFSET                                 29988
-#define QM_REG_PQTX2PF_29_RT_OFFSET                                 29989
-#define QM_REG_PQTX2PF_30_RT_OFFSET                                 29990
-#define QM_REG_PQTX2PF_31_RT_OFFSET                                 29991
-#define QM_REG_PQTX2PF_32_RT_OFFSET                                 29992
-#define QM_REG_PQTX2PF_33_RT_OFFSET                                 29993
-#define QM_REG_PQTX2PF_34_RT_OFFSET                                 29994
-#define QM_REG_PQTX2PF_35_RT_OFFSET                                 29995
-#define QM_REG_PQTX2PF_36_RT_OFFSET                                 29996
-#define QM_REG_PQTX2PF_37_RT_OFFSET                                 29997
-#define QM_REG_PQTX2PF_38_RT_OFFSET                                 29998
-#define QM_REG_PQTX2PF_39_RT_OFFSET                                 29999
-#define QM_REG_PQTX2PF_40_RT_OFFSET                                 30000
-#define QM_REG_PQTX2PF_41_RT_OFFSET                                 30001
-#define QM_REG_PQTX2PF_42_RT_OFFSET                                 30002
-#define QM_REG_PQTX2PF_43_RT_OFFSET                                 30003
-#define QM_REG_PQTX2PF_44_RT_OFFSET                                 30004
-#define QM_REG_PQTX2PF_45_RT_OFFSET                                 30005
-#define QM_REG_PQTX2PF_46_RT_OFFSET                                 30006
-#define QM_REG_PQTX2PF_47_RT_OFFSET                                 30007
-#define QM_REG_PQTX2PF_48_RT_OFFSET                                 30008
-#define QM_REG_PQTX2PF_49_RT_OFFSET                                 30009
-#define QM_REG_PQTX2PF_50_RT_OFFSET                                 30010
-#define QM_REG_PQTX2PF_51_RT_OFFSET                                 30011
-#define QM_REG_PQTX2PF_52_RT_OFFSET                                 30012
-#define QM_REG_PQTX2PF_53_RT_OFFSET                                 30013
-#define QM_REG_PQTX2PF_54_RT_OFFSET                                 30014
-#define QM_REG_PQTX2PF_55_RT_OFFSET                                 30015
-#define QM_REG_PQTX2PF_56_RT_OFFSET                                 30016
-#define QM_REG_PQTX2PF_57_RT_OFFSET                                 30017
-#define QM_REG_PQTX2PF_58_RT_OFFSET                                 30018
-#define QM_REG_PQTX2PF_59_RT_OFFSET                                 30019
-#define QM_REG_PQTX2PF_60_RT_OFFSET                                 30020
-#define QM_REG_PQTX2PF_61_RT_OFFSET                                 30021
-#define QM_REG_PQTX2PF_62_RT_OFFSET                                 30022
-#define QM_REG_PQTX2PF_63_RT_OFFSET                                 30023
-#define QM_REG_PQOTHER2PF_0_RT_OFFSET                               30024
-#define QM_REG_PQOTHER2PF_1_RT_OFFSET                               30025
-#define QM_REG_PQOTHER2PF_2_RT_OFFSET                               30026
-#define QM_REG_PQOTHER2PF_3_RT_OFFSET                               30027
-#define QM_REG_PQOTHER2PF_4_RT_OFFSET                               30028
-#define QM_REG_PQOTHER2PF_5_RT_OFFSET                               30029
-#define QM_REG_PQOTHER2PF_6_RT_OFFSET                               30030
-#define QM_REG_PQOTHER2PF_7_RT_OFFSET                               30031
-#define QM_REG_PQOTHER2PF_8_RT_OFFSET                               30032
-#define QM_REG_PQOTHER2PF_9_RT_OFFSET                               30033
-#define QM_REG_PQOTHER2PF_10_RT_OFFSET                              30034
-#define QM_REG_PQOTHER2PF_11_RT_OFFSET                              30035
-#define QM_REG_PQOTHER2PF_12_RT_OFFSET                              30036
-#define QM_REG_PQOTHER2PF_13_RT_OFFSET                              30037
-#define QM_REG_PQOTHER2PF_14_RT_OFFSET                              30038
-#define QM_REG_PQOTHER2PF_15_RT_OFFSET                              30039
-#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET                             30040
-#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET                             30041
-#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET                        30042
-#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET                        30043
-#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET                          30044
-#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET                          30045
-#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET                          30046
-#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET                          30047
-#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET                          30048
-#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET                          30049
-#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET                          30050
-#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET                          30051
-#define QM_REG_RLGLBLINCVAL_RT_OFFSET                               30052
+#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET                         34211
+#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET                         34212
+#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET                          34213
+#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET                        34214
+#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET                       34215
+#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET                            34216
+#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET                            34217
+#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET                            34218
+#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET                            34219
+#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET                            34220
+#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET                            34221
+#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET                            34222
+#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET                            34223
+#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET                            34224
+#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET                            34225
+#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET                           34226
+#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET                           34227
+#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET                           34228
+#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET                           34229
+#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET                           34230
+#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET                           34231
+#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET                        34232
+#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET                        34233
+#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET                        34234
+#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET                        34235
+#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET                           34236
+#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET                           34237
+#define QM_REG_PQTX2PF_0_RT_OFFSET                                  34238
+#define QM_REG_PQTX2PF_1_RT_OFFSET                                  34239
+#define QM_REG_PQTX2PF_2_RT_OFFSET                                  34240
+#define QM_REG_PQTX2PF_3_RT_OFFSET                                  34241
+#define QM_REG_PQTX2PF_4_RT_OFFSET                                  34242
+#define QM_REG_PQTX2PF_5_RT_OFFSET                                  34243
+#define QM_REG_PQTX2PF_6_RT_OFFSET                                  34244
+#define QM_REG_PQTX2PF_7_RT_OFFSET                                  34245
+#define QM_REG_PQTX2PF_8_RT_OFFSET                                  34246
+#define QM_REG_PQTX2PF_9_RT_OFFSET                                  34247
+#define QM_REG_PQTX2PF_10_RT_OFFSET                                 34248
+#define QM_REG_PQTX2PF_11_RT_OFFSET                                 34249
+#define QM_REG_PQTX2PF_12_RT_OFFSET                                 34250
+#define QM_REG_PQTX2PF_13_RT_OFFSET                                 34251
+#define QM_REG_PQTX2PF_14_RT_OFFSET                                 34252
+#define QM_REG_PQTX2PF_15_RT_OFFSET                                 34253
+#define QM_REG_PQTX2PF_16_RT_OFFSET                                 34254
+#define QM_REG_PQTX2PF_17_RT_OFFSET                                 34255
+#define QM_REG_PQTX2PF_18_RT_OFFSET                                 34256
+#define QM_REG_PQTX2PF_19_RT_OFFSET                                 34257
+#define QM_REG_PQTX2PF_20_RT_OFFSET                                 34258
+#define QM_REG_PQTX2PF_21_RT_OFFSET                                 34259
+#define QM_REG_PQTX2PF_22_RT_OFFSET                                 34260
+#define QM_REG_PQTX2PF_23_RT_OFFSET                                 34261
+#define QM_REG_PQTX2PF_24_RT_OFFSET                                 34262
+#define QM_REG_PQTX2PF_25_RT_OFFSET                                 34263
+#define QM_REG_PQTX2PF_26_RT_OFFSET                                 34264
+#define QM_REG_PQTX2PF_27_RT_OFFSET                                 34265
+#define QM_REG_PQTX2PF_28_RT_OFFSET                                 34266
+#define QM_REG_PQTX2PF_29_RT_OFFSET                                 34267
+#define QM_REG_PQTX2PF_30_RT_OFFSET                                 34268
+#define QM_REG_PQTX2PF_31_RT_OFFSET                                 34269
+#define QM_REG_PQTX2PF_32_RT_OFFSET                                 34270
+#define QM_REG_PQTX2PF_33_RT_OFFSET                                 34271
+#define QM_REG_PQTX2PF_34_RT_OFFSET                                 34272
+#define QM_REG_PQTX2PF_35_RT_OFFSET                                 34273
+#define QM_REG_PQTX2PF_36_RT_OFFSET                                 34274
+#define QM_REG_PQTX2PF_37_RT_OFFSET                                 34275
+#define QM_REG_PQTX2PF_38_RT_OFFSET                                 34276
+#define QM_REG_PQTX2PF_39_RT_OFFSET                                 34277
+#define QM_REG_PQTX2PF_40_RT_OFFSET                                 34278
+#define QM_REG_PQTX2PF_41_RT_OFFSET                                 34279
+#define QM_REG_PQTX2PF_42_RT_OFFSET                                 34280
+#define QM_REG_PQTX2PF_43_RT_OFFSET                                 34281
+#define QM_REG_PQTX2PF_44_RT_OFFSET                                 34282
+#define QM_REG_PQTX2PF_45_RT_OFFSET                                 34283
+#define QM_REG_PQTX2PF_46_RT_OFFSET                                 34284
+#define QM_REG_PQTX2PF_47_RT_OFFSET                                 34285
+#define QM_REG_PQTX2PF_48_RT_OFFSET                                 34286
+#define QM_REG_PQTX2PF_49_RT_OFFSET                                 34287
+#define QM_REG_PQTX2PF_50_RT_OFFSET                                 34288
+#define QM_REG_PQTX2PF_51_RT_OFFSET                                 34289
+#define QM_REG_PQTX2PF_52_RT_OFFSET                                 34290
+#define QM_REG_PQTX2PF_53_RT_OFFSET                                 34291
+#define QM_REG_PQTX2PF_54_RT_OFFSET                                 34292
+#define QM_REG_PQTX2PF_55_RT_OFFSET                                 34293
+#define QM_REG_PQTX2PF_56_RT_OFFSET                                 34294
+#define QM_REG_PQTX2PF_57_RT_OFFSET                                 34295
+#define QM_REG_PQTX2PF_58_RT_OFFSET                                 34296
+#define QM_REG_PQTX2PF_59_RT_OFFSET                                 34297
+#define QM_REG_PQTX2PF_60_RT_OFFSET                                 34298
+#define QM_REG_PQTX2PF_61_RT_OFFSET                                 34299
+#define QM_REG_PQTX2PF_62_RT_OFFSET                                 34300
+#define QM_REG_PQTX2PF_63_RT_OFFSET                                 34301
+#define QM_REG_PQOTHER2PF_0_RT_OFFSET                               34302
+#define QM_REG_PQOTHER2PF_1_RT_OFFSET                               34303
+#define QM_REG_PQOTHER2PF_2_RT_OFFSET                               34304
+#define QM_REG_PQOTHER2PF_3_RT_OFFSET                               34305
+#define QM_REG_PQOTHER2PF_4_RT_OFFSET                               34306
+#define QM_REG_PQOTHER2PF_5_RT_OFFSET                               34307
+#define QM_REG_PQOTHER2PF_6_RT_OFFSET                               34308
+#define QM_REG_PQOTHER2PF_7_RT_OFFSET                               34309
+#define QM_REG_PQOTHER2PF_8_RT_OFFSET                               34310
+#define QM_REG_PQOTHER2PF_9_RT_OFFSET                               34311
+#define QM_REG_PQOTHER2PF_10_RT_OFFSET                              34312
+#define QM_REG_PQOTHER2PF_11_RT_OFFSET                              34313
+#define QM_REG_PQOTHER2PF_12_RT_OFFSET                              34314
+#define QM_REG_PQOTHER2PF_13_RT_OFFSET                              34315
+#define QM_REG_PQOTHER2PF_14_RT_OFFSET                              34316
+#define QM_REG_PQOTHER2PF_15_RT_OFFSET                              34317
+#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET                             34318
+#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET                             34319
+#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET                        34320
+#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET                        34321
+#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET                          34322
+#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET                          34323
+#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET                          34324
+#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET                          34325
+#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET                          34326
+#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET                          34327
+#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET                          34328
+#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET                          34329
+#define QM_REG_RLGLBLINCVAL_RT_OFFSET                               34330
 #define QM_REG_RLGLBLINCVAL_RT_SIZE                                 256
-#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET                           30308
+#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET                           34586
 #define QM_REG_RLGLBLUPPERBOUND_RT_SIZE                             256
-#define QM_REG_RLGLBLCRD_RT_OFFSET                                  30564
+#define QM_REG_RLGLBLCRD_RT_OFFSET                                  34842
 #define QM_REG_RLGLBLCRD_RT_SIZE                                    256
-#define QM_REG_RLGLBLENABLE_RT_OFFSET                               30820
-#define QM_REG_RLPFPERIOD_RT_OFFSET                                 30821
-#define QM_REG_RLPFPERIODTIMER_RT_OFFSET                            30822
-#define QM_REG_RLPFINCVAL_RT_OFFSET                                 30823
+#define QM_REG_RLGLBLENABLE_RT_OFFSET                               35098
+#define QM_REG_RLPFPERIOD_RT_OFFSET                                 35099
+#define QM_REG_RLPFPERIODTIMER_RT_OFFSET                            35100
+#define QM_REG_RLPFINCVAL_RT_OFFSET                                 35101
 #define QM_REG_RLPFINCVAL_RT_SIZE                                   16
-#define QM_REG_RLPFUPPERBOUND_RT_OFFSET                             30839
+#define QM_REG_RLPFUPPERBOUND_RT_OFFSET                             35117
 #define QM_REG_RLPFUPPERBOUND_RT_SIZE                               16
-#define QM_REG_RLPFCRD_RT_OFFSET                                    30855
+#define QM_REG_RLPFCRD_RT_OFFSET                                    35133
 #define QM_REG_RLPFCRD_RT_SIZE                                      16
-#define QM_REG_RLPFENABLE_RT_OFFSET                                 30871
-#define QM_REG_RLPFVOQENABLE_RT_OFFSET                              30872
-#define QM_REG_WFQPFWEIGHT_RT_OFFSET                                30873
+#define QM_REG_RLPFENABLE_RT_OFFSET                                 35149
+#define QM_REG_RLPFVOQENABLE_RT_OFFSET                              35150
+#define QM_REG_WFQPFWEIGHT_RT_OFFSET                                35151
 #define QM_REG_WFQPFWEIGHT_RT_SIZE                                  16
-#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET                            30889
+#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET                            35167
 #define QM_REG_WFQPFUPPERBOUND_RT_SIZE                              16
-#define QM_REG_WFQPFCRD_RT_OFFSET                                   30905
+#define QM_REG_WFQPFCRD_RT_OFFSET                                   35183
 #define QM_REG_WFQPFCRD_RT_SIZE                                     256
-#define QM_REG_WFQPFENABLE_RT_OFFSET                                31161
-#define QM_REG_WFQVPENABLE_RT_OFFSET                                31162
-#define QM_REG_BASEADDRTXPQ_RT_OFFSET                               31163
+#define QM_REG_WFQPFENABLE_RT_OFFSET                                35439
+#define QM_REG_WFQVPENABLE_RT_OFFSET                                35440
+#define QM_REG_BASEADDRTXPQ_RT_OFFSET                               35441
 #define QM_REG_BASEADDRTXPQ_RT_SIZE                                 512
-#define QM_REG_TXPQMAP_RT_OFFSET                                    31675
+#define QM_REG_TXPQMAP_RT_OFFSET                                    35953
 #define QM_REG_TXPQMAP_RT_SIZE                                      512
-#define QM_REG_WFQVPWEIGHT_RT_OFFSET                                32187
+#define QM_REG_WFQVPWEIGHT_RT_OFFSET                                36465
 #define QM_REG_WFQVPWEIGHT_RT_SIZE                                  512
-#define QM_REG_WFQVPCRD_RT_OFFSET                                   32699
+#define QM_REG_WFQVPCRD_RT_OFFSET                                   36977
 #define QM_REG_WFQVPCRD_RT_SIZE                                     512
-#define QM_REG_WFQVPMAP_RT_OFFSET                                   33211
+#define QM_REG_WFQVPMAP_RT_OFFSET                                   37489
 #define QM_REG_WFQVPMAP_RT_SIZE                                     512
-#define QM_REG_WFQPFCRD_MSB_RT_OFFSET                               33723
+#define QM_REG_WFQPFCRD_MSB_RT_OFFSET                               38001
 #define QM_REG_WFQPFCRD_MSB_RT_SIZE                                 320
-#define QM_REG_VOQCRDLINE_RT_OFFSET                                 34043
+#define QM_REG_VOQCRDLINE_RT_OFFSET                                 38321
 #define QM_REG_VOQCRDLINE_RT_SIZE                                   36
-#define QM_REG_VOQINITCRDLINE_RT_OFFSET                             34079
+#define QM_REG_VOQINITCRDLINE_RT_OFFSET                             38357
 #define QM_REG_VOQINITCRDLINE_RT_SIZE                               36
-#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET                           34115
-#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET                     34116
-#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET                     34117
-#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET                     34118
-#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET                     34119
-#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET                      34120
-#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET                  34121
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET                           34122
+#define QM_REG_RLPFVOQENABLE_MSB_RT_OFFSET                          38393
+#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET                           38394
+#define NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET                      38395
+#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET                     38396
+#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET                     38397
+#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET                     38398
+#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET                     38399
+#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET                  38400
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET                           38401
 #define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE                             4
-#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET                      34126
-#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE                        4
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET                        34130
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET                        38405
 #define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE                          4
-#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET                           34134
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET                     34135
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET                     38409
 #define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE                       32
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET                        34167
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET                        38441
 #define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE                          16
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET                      34183
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET                      38457
 #define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE                        16
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET             34199
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET             38473
 #define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE               16
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET                   34215
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET                   38489
 #define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE                     16
-#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET                              34231
-#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET                    34232
-#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET                           34233
-#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET                           34234
-#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET                           34235
-#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET                       34236
-#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET                       34237
-#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET                       34238
-#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET                       34239
-#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET                    34240
-#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET                    34241
-#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET                    34242
-#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET                    34243
-#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET                        34244
-#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET                     34245
-#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET                           34246
-#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET                      34247
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET                    34248
-#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET                       34249
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET                34250
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET                    34251
-#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET                       34252
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET                34253
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET                    34254
-#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET                       34255
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET                34256
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET                    34257
-#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET                       34258
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET                34259
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET                    34260
-#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET                       34261
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET                34262
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET                    34263
-#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET                       34264
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET                34265
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET                    34266
-#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET                       34267
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET                34268
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET                    34269
-#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET                       34270
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET                34271
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET                    34272
-#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET                       34273
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET                34274
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET                    34275
-#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET                       34276
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET                34277
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET                   34278
-#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET                      34279
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET               34280
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET                   34281
-#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET                      34282
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET               34283
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET                   34284
-#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET                      34285
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET               34286
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET                   34287
-#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET                      34288
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET               34289
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET                   34290
-#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET                      34291
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET               34292
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET                   34293
-#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET                      34294
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET               34295
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET                   34296
-#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET                      34297
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET               34298
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET                   34299
-#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET                      34300
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET               34301
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET                   34302
-#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET                      34303
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET               34304
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET                   34305
-#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET                      34306
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET               34307
-#define XCM_REG_CON_PHY_Q3_RT_OFFSET                                34308
+#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET                              38505
+#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET                    38506
+#define NIG_REG_PPF_TO_ENGINE_SEL_RT_OFFSET                         38507
+#define NIG_REG_PPF_TO_ENGINE_SEL_RT_SIZE                           8
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_VALUE_RT_OFFSET              38515
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_VALUE_RT_SIZE                1024
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_EN_RT_OFFSET                 39539
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_EN_RT_SIZE                   512
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_MODE_RT_OFFSET               40051
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_MODE_RT_SIZE                 512
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET      40563
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE        512
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_HDR_SEL_RT_OFFSET            41075
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_HDR_SEL_RT_SIZE              512
+#define NIG_REG_LLH_PF_CLS_FILTERS_MAP_RT_OFFSET                    41587
+#define NIG_REG_LLH_PF_CLS_FILTERS_MAP_RT_SIZE                      32
+#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET                           41619
+#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET                           41620
+#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET                           41621
+#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET                       41622
+#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET                       41623
+#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET                       41624
+#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET                       41625
+#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET                    41626
+#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET                    41627
+#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET                    41628
+#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET                    41629
+#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET                        41630
+#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET                     41631
+#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET                           41632
+#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET                      41633
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET                    41634
+#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET                       41635
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET                41636
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET                    41637
+#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET                       41638
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET                41639
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET                    41640
+#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET                       41641
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET                41642
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET                    41643
+#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET                       41644
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET                41645
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET                    41646
+#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET                       41647
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET                41648
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET                    41649
+#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET                       41650
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET                41651
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET                    41652
+#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET                       41653
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET                41654
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET                    41655
+#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET                       41656
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET                41657
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET                    41658
+#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET                       41659
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET                41660
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET                    41661
+#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET                       41662
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET                41663
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET                   41664
+#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET                      41665
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET               41666
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET                   41667
+#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET                      41668
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET               41669
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET                   41670
+#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET                      41671
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET               41672
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET                   41673
+#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET                      41674
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET               41675
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET                   41676
+#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET                      41677
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET               41678
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET                   41679
+#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET                      41680
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET               41681
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET                   41682
+#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET                      41683
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET               41684
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET                   41685
+#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET                      41686
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET               41687
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET                   41688
+#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET                      41689
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET               41690
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET                   41691
+#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET                      41692
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET               41693
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ20_RT_OFFSET                   41694
+#define PBF_REG_BTB_GUARANTEED_VOQ20_RT_OFFSET                      41695
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ20_RT_OFFSET               41696
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ21_RT_OFFSET                   41697
+#define PBF_REG_BTB_GUARANTEED_VOQ21_RT_OFFSET                      41698
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ21_RT_OFFSET               41699
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ22_RT_OFFSET                   41700
+#define PBF_REG_BTB_GUARANTEED_VOQ22_RT_OFFSET                      41701
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ22_RT_OFFSET               41702
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ23_RT_OFFSET                   41703
+#define PBF_REG_BTB_GUARANTEED_VOQ23_RT_OFFSET                      41704
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ23_RT_OFFSET               41705
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ24_RT_OFFSET                   41706
+#define PBF_REG_BTB_GUARANTEED_VOQ24_RT_OFFSET                      41707
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ24_RT_OFFSET               41708
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ25_RT_OFFSET                   41709
+#define PBF_REG_BTB_GUARANTEED_VOQ25_RT_OFFSET                      41710
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ25_RT_OFFSET               41711
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ26_RT_OFFSET                   41712
+#define PBF_REG_BTB_GUARANTEED_VOQ26_RT_OFFSET                      41713
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ26_RT_OFFSET               41714
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ27_RT_OFFSET                   41715
+#define PBF_REG_BTB_GUARANTEED_VOQ27_RT_OFFSET                      41716
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ27_RT_OFFSET               41717
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ28_RT_OFFSET                   41718
+#define PBF_REG_BTB_GUARANTEED_VOQ28_RT_OFFSET                      41719
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ28_RT_OFFSET               41720
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ29_RT_OFFSET                   41721
+#define PBF_REG_BTB_GUARANTEED_VOQ29_RT_OFFSET                      41722
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ29_RT_OFFSET               41723
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ30_RT_OFFSET                   41724
+#define PBF_REG_BTB_GUARANTEED_VOQ30_RT_OFFSET                      41725
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ30_RT_OFFSET               41726
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ31_RT_OFFSET                   41727
+#define PBF_REG_BTB_GUARANTEED_VOQ31_RT_OFFSET                      41728
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ31_RT_OFFSET               41729
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ32_RT_OFFSET                   41730
+#define PBF_REG_BTB_GUARANTEED_VOQ32_RT_OFFSET                      41731
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ32_RT_OFFSET               41732
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ33_RT_OFFSET                   41733
+#define PBF_REG_BTB_GUARANTEED_VOQ33_RT_OFFSET                      41734
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ33_RT_OFFSET               41735
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ34_RT_OFFSET                   41736
+#define PBF_REG_BTB_GUARANTEED_VOQ34_RT_OFFSET                      41737
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ34_RT_OFFSET               41738
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ35_RT_OFFSET                   41739
+#define PBF_REG_BTB_GUARANTEED_VOQ35_RT_OFFSET                      41740
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ35_RT_OFFSET               41741
+#define XCM_REG_CON_PHY_Q3_RT_OFFSET                                41742
 
-#define RUNTIME_ARRAY_SIZE 34309
+#define RUNTIME_ARRAY_SIZE 41743
 
 #endif /* __RT_DEFS_H__ */
diff --git a/drivers/net/qede/base/ecore_sp_api.h b/drivers/net/qede/base/ecore_sp_api.h
index c8e564f9..86e84964 100644
--- a/drivers/net/qede/base/ecore_sp_api.h
+++ b/drivers/net/qede/base/ecore_sp_api.h
@@ -49,6 +49,7 @@ enum _ecore_status_t ecore_eth_cqe_completion(struct ecore_hwfn *p_hwfn,
  * for a physical function (PF).
  *
  * @param p_hwfn
+ * @param p_ptt
  * @param p_tunn - pf update tunneling parameters
  * @param comp_mode - completion mode
  * @param p_comp_data - callback function
@@ -58,6 +59,7 @@ enum _ecore_status_t ecore_eth_cqe_completion(struct ecore_hwfn *p_hwfn,
 
 enum _ecore_status_t
 ecore_sp_pf_update_tunn_cfg(struct ecore_hwfn *p_hwfn,
+			    struct ecore_ptt *p_ptt,
 			    struct ecore_tunnel_info *p_tunn,
 			    enum spq_mode comp_mode,
 			    struct ecore_spq_comp_cb *p_comp_data);
diff --git a/drivers/net/qede/base/ecore_sp_commands.c b/drivers/net/qede/base/ecore_sp_commands.c
index d6e4b9e0..7598e7a6 100644
--- a/drivers/net/qede/base/ecore_sp_commands.c
+++ b/drivers/net/qede/base/ecore_sp_commands.c
@@ -232,6 +232,7 @@ static void ecore_set_hw_tunn_mode(struct ecore_hwfn *p_hwfn,
 }
 
 static void ecore_set_hw_tunn_mode_port(struct ecore_hwfn *p_hwfn,
+					struct ecore_ptt  *p_ptt,
 					struct ecore_tunnel_info *p_tunn)
 {
 	if (ECORE_IS_BB_A0(p_hwfn->p_dev)) {
@@ -241,14 +242,14 @@ static void ecore_set_hw_tunn_mode_port(struct ecore_hwfn *p_hwfn,
 	}
 
 	if (p_tunn->vxlan_port.b_update_port)
-		ecore_set_vxlan_dest_port(p_hwfn, p_hwfn->p_main_ptt,
+		ecore_set_vxlan_dest_port(p_hwfn, p_ptt,
 					  p_tunn->vxlan_port.port);
 
 	if (p_tunn->geneve_port.b_update_port)
-		ecore_set_geneve_dest_port(p_hwfn, p_hwfn->p_main_ptt,
+		ecore_set_geneve_dest_port(p_hwfn, p_ptt,
 					   p_tunn->geneve_port.port);
 
-	ecore_set_hw_tunn_mode(p_hwfn, p_hwfn->p_main_ptt, p_tunn);
+	ecore_set_hw_tunn_mode(p_hwfn, p_ptt, p_tunn);
 }
 
 static void
@@ -293,9 +294,11 @@ ecore_tunn_set_pf_start_params(struct ecore_hwfn *p_hwfn,
 					&p_tun->ip_gre);
 }
 
+#define ETH_P_8021Q 0x8100
+
 enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
+				       struct ecore_ptt *p_ptt,
 				       struct ecore_tunnel_info *p_tunn,
-				       enum ecore_mf_mode mode,
 				       bool allow_npar_tx_switch)
 {
 	struct pf_start_ramrod_data *p_ramrod = OSAL_NULL;
@@ -305,6 +308,7 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 	struct ecore_sp_init_data init_data;
 	enum _ecore_status_t rc = ECORE_NOTIMPL;
 	u8 page_cnt;
+	int i;
 
 	/* update initial eq producer */
 	ecore_eq_prod_update(p_hwfn,
@@ -332,20 +336,26 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 	p_ramrod->dont_log_ramrods = 0;
 	p_ramrod->log_type_mask = OSAL_CPU_TO_LE16(0x8f);
 
-	switch (mode) {
-	case ECORE_MF_DEFAULT:
-	case ECORE_MF_NPAR:
-		p_ramrod->mf_mode = MF_NPAR;
-		break;
-	case ECORE_MF_OVLAN:
+	if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits))
 		p_ramrod->mf_mode = MF_OVLAN;
-		break;
-	default:
-		DP_NOTICE(p_hwfn, true,
-			  "Unsupported MF mode, init as DEFAULT\n");
+	else
 		p_ramrod->mf_mode = MF_NPAR;
+
+	p_ramrod->outer_tag_config.outer_tag.tci =
+		OSAL_CPU_TO_LE16(p_hwfn->hw_info.ovlan);
+
+	if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits)) {
+		p_ramrod->outer_tag_config.outer_tag.tpid =
+			OSAL_CPU_TO_LE16(ETH_P_8021Q);
+		if (p_hwfn->ufp_info.pri_type == ECORE_UFP_PRI_OS)
+			p_ramrod->outer_tag_config.enable_stag_pri_change = 1;
+		else
+			p_ramrod->outer_tag_config.enable_stag_pri_change = 0;
+		p_ramrod->outer_tag_config.pri_map_valid = 1;
+		for (i = 0; i < 8; i++)
+			p_ramrod->outer_tag_config.inner_to_outer_pri_map[i] =
+									  (u8)i;
 	}
-	p_ramrod->outer_tag = p_hwfn->hw_info.ovlan;
 
 	/* Place EQ address in RAMROD */
 	DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr,
@@ -358,7 +368,8 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 	ecore_tunn_set_pf_start_params(p_hwfn, p_tunn,
 				       &p_ramrod->tunnel_config);
 
-	if (IS_MF_SI(p_hwfn))
+	if (OSAL_TEST_BIT(ECORE_MF_INTER_PF_SWITCH,
+			  &p_hwfn->p_dev->mf_bits))
 		p_ramrod->allow_npar_tx_switching = allow_npar_tx_switch;
 
 	switch (p_hwfn->hw_info.personality) {
@@ -384,18 +395,20 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 	p_ramrod->hsi_fp_ver.minor_ver_arr[ETH_VER_KEY] = ETH_HSI_VER_MINOR;
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ,
-		   "Setting event_ring_sb [id %04x index %02x], outer_tag [%d]\n",
-		   sb, sb_index, p_ramrod->outer_tag);
+		   "Setting event_ring_sb [id %04x index %02x], outer_tag.tpid [%d], outer_tag.tci [%d]\n",
+		   sb, sb_index, p_ramrod->outer_tag_config.outer_tag.tpid,
+		   p_ramrod->outer_tag_config.outer_tag.tci);
 
 	rc = ecore_spq_post(p_hwfn, p_ent, OSAL_NULL);
 
 	if (p_tunn)
-		ecore_set_hw_tunn_mode_port(p_hwfn, &p_hwfn->p_dev->tunnel);
+		ecore_set_hw_tunn_mode_port(p_hwfn, p_ptt,
+					    &p_hwfn->p_dev->tunnel);
 
 	return rc;
 }
 
-enum _ecore_status_t ecore_sp_pf_update(struct ecore_hwfn *p_hwfn)
+enum _ecore_status_t ecore_sp_pf_update_dcbx(struct ecore_hwfn *p_hwfn)
 {
 	struct ecore_spq_entry *p_ent = OSAL_NULL;
 	struct ecore_sp_init_data init_data;
@@ -419,6 +432,50 @@ enum _ecore_status_t ecore_sp_pf_update(struct ecore_hwfn *p_hwfn)
 	return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL);
 }
 
+enum _ecore_status_t ecore_sp_pf_update_ufp(struct ecore_hwfn *p_hwfn)
+{
+	struct ecore_spq_entry *p_ent = OSAL_NULL;
+	struct ecore_sp_init_data init_data;
+	enum _ecore_status_t rc = ECORE_NOTIMPL;
+
+	/* Get SPQ entry */
+	OSAL_MEMSET(&init_data, 0, sizeof(init_data));
+	init_data.cid = ecore_spq_get_cid(p_hwfn);
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = ECORE_SPQ_MODE_CB;
+
+	rc = ecore_sp_init_request(p_hwfn, &p_ent,
+				   COMMON_RAMROD_PF_UPDATE, PROTOCOLID_COMMON,
+				   &init_data);
+	if (rc != ECORE_SUCCESS)
+		return rc;
+
+	p_ent->ramrod.pf_update.update_enable_stag_pri_change = true;
+	if (p_hwfn->ufp_info.pri_type == ECORE_UFP_PRI_OS)
+		p_ent->ramrod.pf_update.enable_stag_pri_change = 1;
+	else
+		p_ent->ramrod.pf_update.enable_stag_pri_change = 0;
+
+	return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL);
+}
+
+
+/* QM rate limiter resolution is 1.6Mbps */
+#define QM_RL_RESOLUTION(mb_val)	((mb_val) * 10 / 16)
+
+/* FW uses 1/64k to express gd */
+#define FW_GD_RESOLUTION(gd)		(64 * 1024 / (gd))
+
+u16 ecore_sp_rl_mb_to_qm(u32 mb_val)
+{
+	return (u16)OSAL_MIN_T(u32, (u16)(~0U), QM_RL_RESOLUTION(mb_val));
+}
+
+u16 ecore_sp_rl_gd_denom(u32 gd)
+{
+	return gd ? (u16)OSAL_MIN_T(u32, (u16)(~0U), FW_GD_RESOLUTION(gd)) : 0;
+}
+
 enum _ecore_status_t ecore_sp_rl_update(struct ecore_hwfn *p_hwfn,
 					struct ecore_rl_update_params *params)
 {
@@ -450,21 +507,37 @@ enum _ecore_status_t ecore_sp_rl_update(struct ecore_hwfn *p_hwfn,
 	rl_update->rl_id_last = params->rl_id_last;
 	rl_update->rl_dc_qcn_flg = params->rl_dc_qcn_flg;
 	rl_update->rl_bc_rate = OSAL_CPU_TO_LE32(params->rl_bc_rate);
-	rl_update->rl_max_rate = OSAL_CPU_TO_LE16(params->rl_max_rate);
-	rl_update->rl_r_ai = OSAL_CPU_TO_LE16(params->rl_r_ai);
-	rl_update->rl_r_hai = OSAL_CPU_TO_LE16(params->rl_r_hai);
-	rl_update->dcqcn_g = OSAL_CPU_TO_LE16(params->dcqcn_g);
+	rl_update->rl_max_rate =
+		OSAL_CPU_TO_LE16(ecore_sp_rl_mb_to_qm(params->rl_max_rate));
+	rl_update->rl_r_ai =
+		OSAL_CPU_TO_LE16(ecore_sp_rl_mb_to_qm(params->rl_r_ai));
+	rl_update->rl_r_hai =
+		OSAL_CPU_TO_LE16(ecore_sp_rl_mb_to_qm(params->rl_r_hai));
+	rl_update->dcqcn_g =
+		OSAL_CPU_TO_LE16(ecore_sp_rl_gd_denom(params->dcqcn_gd));
 	rl_update->dcqcn_k_us = OSAL_CPU_TO_LE32(params->dcqcn_k_us);
-	rl_update->dcqcn_timeuot_us = OSAL_CPU_TO_LE32(
-		params->dcqcn_timeuot_us);
+	rl_update->dcqcn_timeuot_us =
+		OSAL_CPU_TO_LE32(params->dcqcn_timeuot_us);
 	rl_update->qcn_timeuot_us = OSAL_CPU_TO_LE32(params->qcn_timeuot_us);
 
+	DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "rl_params: qcn_update_param_flg %x, dcqcn_update_param_flg %x, rl_init_flg %x, rl_start_flg %x, rl_stop_flg %x, rl_id_first %x, rl_id_last %x, rl_dc_qcn_flg %x, rl_bc_rate %x, rl_max_rate %x, rl_r_ai %x, rl_r_hai %x, dcqcn_g %x, dcqcn_k_us %x, dcqcn_timeuot_us %x, qcn_timeuot_us %x\n",
+		   rl_update->qcn_update_param_flg,
+		   rl_update->dcqcn_update_param_flg,
+		   rl_update->rl_init_flg, rl_update->rl_start_flg,
+		   rl_update->rl_stop_flg, rl_update->rl_id_first,
+		   rl_update->rl_id_last, rl_update->rl_dc_qcn_flg,
+		   rl_update->rl_bc_rate, rl_update->rl_max_rate,
+		   rl_update->rl_r_ai, rl_update->rl_r_hai,
+		   rl_update->dcqcn_g, rl_update->dcqcn_k_us,
+		   rl_update->dcqcn_timeuot_us, rl_update->qcn_timeuot_us);
+
 	return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL);
 }
 
 /* Set pf update ramrod command params */
 enum _ecore_status_t
 ecore_sp_pf_update_tunn_cfg(struct ecore_hwfn *p_hwfn,
+			    struct ecore_ptt *p_ptt,
 			    struct ecore_tunnel_info *p_tunn,
 			    enum spq_mode comp_mode,
 			    struct ecore_spq_comp_cb *p_comp_data)
@@ -505,7 +578,7 @@ ecore_sp_pf_update_tunn_cfg(struct ecore_hwfn *p_hwfn,
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
-	ecore_set_hw_tunn_mode_port(p_hwfn, &p_hwfn->p_dev->tunnel);
+	ecore_set_hw_tunn_mode_port(p_hwfn, p_ptt, &p_hwfn->p_dev->tunnel);
 
 	return rc;
 }
@@ -551,3 +624,28 @@ enum _ecore_status_t ecore_sp_heartbeat_ramrod(struct ecore_hwfn *p_hwfn)
 
 	return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL);
 }
+
+enum _ecore_status_t ecore_sp_pf_update_stag(struct ecore_hwfn *p_hwfn)
+{
+	struct ecore_spq_entry *p_ent = OSAL_NULL;
+	struct ecore_sp_init_data init_data;
+	enum _ecore_status_t rc = ECORE_NOTIMPL;
+
+	/* Get SPQ entry */
+	OSAL_MEMSET(&init_data, 0, sizeof(init_data));
+	init_data.cid = ecore_spq_get_cid(p_hwfn);
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = ECORE_SPQ_MODE_CB;
+
+	rc = ecore_sp_init_request(p_hwfn, &p_ent,
+				   COMMON_RAMROD_PF_UPDATE, PROTOCOLID_COMMON,
+				   &init_data);
+	if (rc != ECORE_SUCCESS)
+		return rc;
+
+	p_ent->ramrod.pf_update.update_mf_vlan_flag = true;
+	p_ent->ramrod.pf_update.mf_vlan =
+				OSAL_CPU_TO_LE16(p_hwfn->hw_info.ovlan);
+
+	return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL);
+}
diff --git a/drivers/net/qede/base/ecore_sp_commands.h b/drivers/net/qede/base/ecore_sp_commands.h
index 33e31e42..98009c65 100644
--- a/drivers/net/qede/base/ecore_sp_commands.h
+++ b/drivers/net/qede/base/ecore_sp_commands.h
@@ -59,8 +59,8 @@ enum _ecore_status_t ecore_sp_init_request(struct ecore_hwfn *p_hwfn,
  * to the internal RAM of the UStorm by the Function Start Ramrod.
  *
  * @param p_hwfn
+ * @param p_ptt
  * @param p_tunn - pf start tunneling configuration
- * @param mode
  * @param allow_npar_tx_switch - npar tx switching to be used
  *	  for vports configured for tx-switching.
  *
@@ -68,8 +68,8 @@ enum _ecore_status_t ecore_sp_init_request(struct ecore_hwfn *p_hwfn,
  */
 
 enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
+				       struct ecore_ptt *p_ptt,
 				       struct ecore_tunnel_info *p_tunn,
-				       enum ecore_mf_mode mode,
 				       bool allow_npar_tx_switch);
 
 /**
@@ -85,7 +85,7 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
  * @return enum _ecore_status_t
  */
 
-enum _ecore_status_t ecore_sp_pf_update(struct ecore_hwfn *p_hwfn);
+enum _ecore_status_t ecore_sp_pf_update_dcbx(struct ecore_hwfn *p_hwfn);
 
 /**
  * @brief ecore_sp_pf_stop - PF Function Stop Ramrod
@@ -123,10 +123,10 @@ struct ecore_rl_update_params {
 	u8 rl_id_last;
 	u8 rl_dc_qcn_flg; /* If set, RL will used for DCQCN */
 	u32 rl_bc_rate; /* Byte Counter Limit */
-	u16 rl_max_rate; /* Maximum rate in 1.6 Mbps resolution */
-	u16 rl_r_ai; /* Active increase rate */
-	u16 rl_r_hai; /* Hyper active increase rate */
-	u16 dcqcn_g; /* DCQCN Alpha update gain in 1/64K resolution */
+	u32 rl_max_rate; /* Maximum rate in Mbps resolution */
+	u32 rl_r_ai; /* Active increase rate */
+	u32 rl_r_hai; /* Hyper active increase rate */
+	u32 dcqcn_gd; /* DCQCN Alpha update gain */
 	u32 dcqcn_k_us; /* DCQCN Alpha update interval */
 	u32 dcqcn_timeuot_us;
 	u32 qcn_timeuot_us;
@@ -143,4 +143,23 @@ struct ecore_rl_update_params {
 enum _ecore_status_t ecore_sp_rl_update(struct ecore_hwfn *p_hwfn,
 					struct ecore_rl_update_params *params);
 
+/**
+ * @brief ecore_sp_pf_update_stag - PF STAG value update Ramrod
+ *
+ * @param p_hwfn
+ *
+ * @return enum _ecore_status_t
+ */
+
+enum _ecore_status_t ecore_sp_pf_update_stag(struct ecore_hwfn *p_hwfn);
+
+/**
+ * @brief ecore_sp_pf_update_ufp - PF ufp update Ramrod
+ *
+ * @param p_hwfn
+ *
+ * @return enum _ecore_status_t
+ */
+enum _ecore_status_t ecore_sp_pf_update_ufp(struct ecore_hwfn *p_hwfn);
+
 #endif /*__ECORE_SP_COMMANDS_H__*/
diff --git a/drivers/net/qede/base/ecore_spq.c b/drivers/net/qede/base/ecore_spq.c
index 3c1d05b3..70ffa8cd 100644
--- a/drivers/net/qede/base/ecore_spq.c
+++ b/drivers/net/qede/base/ecore_spq.c
@@ -36,9 +36,8 @@
 /***************************************************************************
  * Blocking Imp. (BLOCK/EBLOCK mode)
  ***************************************************************************/
-static void ecore_spq_blocking_cb(struct ecore_hwfn *p_hwfn,
-				  void *cookie,
-				  union event_ring_data *data,
+static void ecore_spq_blocking_cb(struct ecore_hwfn *p_hwfn, void *cookie,
+				  union event_ring_data OSAL_UNUSED * data,
 				  u8 fw_return_code)
 {
 	struct ecore_spq_comp_done *comp_done;
@@ -87,6 +86,7 @@ static enum _ecore_status_t ecore_spq_block(struct ecore_hwfn *p_hwfn,
 					    u8 *p_fw_ret, bool skip_quick_poll)
 {
 	struct ecore_spq_comp_done *comp_done;
+	struct ecore_ptt *p_ptt;
 	enum _ecore_status_t rc;
 
 	/* A relatively short polling period w/o sleeping, to allow the FW to
@@ -103,8 +103,13 @@ static enum _ecore_status_t ecore_spq_block(struct ecore_hwfn *p_hwfn,
 	if (rc == ECORE_SUCCESS)
 		return ECORE_SUCCESS;
 
+	p_ptt = ecore_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return ECORE_AGAIN;
+
 	DP_INFO(p_hwfn, "Ramrod is stuck, requesting MCP drain\n");
-	rc = ecore_mcp_drain(p_hwfn, p_hwfn->p_main_ptt);
+	rc = ecore_mcp_drain(p_hwfn, p_ptt);
+	ecore_ptt_release(p_hwfn, p_ptt);
 	if (rc != ECORE_SUCCESS) {
 		DP_NOTICE(p_hwfn, true, "MCP drain failed\n");
 		goto err;
@@ -173,10 +178,10 @@ ecore_spq_fill_entry(struct ecore_hwfn *p_hwfn, struct ecore_spq_entry *p_ent)
 static void ecore_spq_hw_initialize(struct ecore_hwfn *p_hwfn,
 				    struct ecore_spq *p_spq)
 {
+	struct e4_core_conn_context *p_cxt;
 	struct ecore_cxt_info cxt_info;
-	struct core_conn_context *p_cxt;
-	enum _ecore_status_t rc;
 	u16 physical_q;
+	enum _ecore_status_t rc;
 
 	cxt_info.iid = p_spq->cid;
 
@@ -225,9 +230,9 @@ static enum _ecore_status_t ecore_spq_hw_post(struct ecore_hwfn *p_hwfn,
 					      struct ecore_spq_entry *p_ent)
 {
 	struct ecore_chain *p_chain = &p_hwfn->p_spq->chain;
+	struct core_db_data *p_db_data = &p_spq->db_data;
 	u16 echo = ecore_chain_get_prod_idx(p_chain);
 	struct slow_path_element *elem;
-	struct core_db_data db;
 
 	p_ent->elem.hdr.echo = OSAL_CPU_TO_LE16(echo);
 	elem = ecore_chain_produce(p_chain);
@@ -236,31 +241,24 @@ static enum _ecore_status_t ecore_spq_hw_post(struct ecore_hwfn *p_hwfn,
 		return ECORE_INVAL;
 	}
 
-	*elem = p_ent->elem;	/* struct assignment */
+	*elem = p_ent->elem;	/* Struct assignment */
 
-	/* send a doorbell on the slow hwfn session */
-	OSAL_MEMSET(&db, 0, sizeof(db));
-	SET_FIELD(db.params, CORE_DB_DATA_DEST, DB_DEST_XCM);
-	SET_FIELD(db.params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
-	SET_FIELD(db.params, CORE_DB_DATA_AGG_VAL_SEL,
-		  DQ_XCM_CORE_SPQ_PROD_CMD);
-	db.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
-	db.spq_prod = OSAL_CPU_TO_LE16(ecore_chain_get_prod_idx(p_chain));
+	p_db_data->spq_prod =
+		OSAL_CPU_TO_LE16(ecore_chain_get_prod_idx(p_chain));
 
-	/* make sure the SPQE is updated before the doorbell */
+	/* Make sure the SPQE is updated before the doorbell */
 	OSAL_WMB(p_hwfn->p_dev);
 
-	DOORBELL(p_hwfn, DB_ADDR(p_spq->cid, DQ_DEMS_LEGACY),
-		 *(u32 *)&db);
+	DOORBELL(p_hwfn, p_spq->db_addr_offset, *(u32 *)p_db_data);
 
-	/* make sure doorbell is rang */
+	/* Make sure doorbell is rang */
 	OSAL_WMB(p_hwfn->p_dev);
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ,
 		   "Doorbelled [0x%08x, CID 0x%08x] with Flags: %02x"
 		   " agg_params: %02x, prod: %04x\n",
-		   DB_ADDR(p_spq->cid, DQ_DEMS_LEGACY), p_spq->cid, db.params,
-		   db.agg_flags, ecore_chain_get_prod_idx(p_chain));
+		   p_spq->db_addr_offset, p_spq->cid, p_db_data->params,
+		   p_db_data->agg_flags, ecore_chain_get_prod_idx(p_chain));
 
 	return ECORE_SUCCESS;
 }
@@ -273,12 +271,16 @@ static enum _ecore_status_t
 ecore_async_event_completion(struct ecore_hwfn *p_hwfn,
 			     struct event_ring_entry *p_eqe)
 {
-	switch (p_eqe->protocol_id) {
-	case PROTOCOLID_COMMON:
-		return ecore_sriov_eqe_event(p_hwfn,
-					     p_eqe->opcode,
-					     p_eqe->echo, &p_eqe->data);
-	default:
+	ecore_spq_async_comp_cb cb;
+
+	if (!p_hwfn->p_spq || (p_eqe->protocol_id >= MAX_PROTOCOL_TYPE))
+		return ECORE_INVAL;
+
+	cb = p_hwfn->p_spq->async_comp_cb[p_eqe->protocol_id];
+	if (cb) {
+		return cb(p_hwfn, p_eqe->opcode, p_eqe->echo,
+			  &p_eqe->data, p_eqe->fw_return_code);
+	} else {
 		DP_NOTICE(p_hwfn,
 			  true, "Unknown Async completion for protocol: %d\n",
 			  p_eqe->protocol_id);
@@ -286,6 +288,28 @@ ecore_async_event_completion(struct ecore_hwfn *p_hwfn,
 	}
 }
 
+enum _ecore_status_t
+ecore_spq_register_async_cb(struct ecore_hwfn *p_hwfn,
+			    enum protocol_type protocol_id,
+			    ecore_spq_async_comp_cb cb)
+{
+	if (!p_hwfn->p_spq || (protocol_id >= MAX_PROTOCOL_TYPE))
+		return ECORE_INVAL;
+
+	p_hwfn->p_spq->async_comp_cb[protocol_id] = cb;
+	return ECORE_SUCCESS;
+}
+
+void
+ecore_spq_unregister_async_cb(struct ecore_hwfn *p_hwfn,
+			      enum protocol_type protocol_id)
+{
+	if (!p_hwfn->p_spq || (protocol_id >= MAX_PROTOCOL_TYPE))
+		return;
+
+	p_hwfn->p_spq->async_comp_cb[protocol_id] = OSAL_NULL;
+}
+
 /***************************************************************************
  * EQ API
  ***************************************************************************/
@@ -450,8 +474,11 @@ void ecore_spq_setup(struct ecore_hwfn *p_hwfn)
 {
 	struct ecore_spq *p_spq = p_hwfn->p_spq;
 	struct ecore_spq_entry *p_virt = OSAL_NULL;
+	struct core_db_data *p_db_data;
+	void OSAL_IOMEM *db_addr;
 	dma_addr_t p_phys = 0;
 	u32 i, capacity;
+	enum _ecore_status_t rc;
 
 	OSAL_LIST_INIT(&p_spq->pending);
 	OSAL_LIST_INIT(&p_spq->completion_pending);
@@ -489,6 +516,24 @@ void ecore_spq_setup(struct ecore_hwfn *p_hwfn)
 
 	/* reset the chain itself */
 	ecore_chain_reset(&p_spq->chain);
+
+	/* Initialize the address/data of the SPQ doorbell */
+	p_spq->db_addr_offset = DB_ADDR(p_spq->cid, DQ_DEMS_LEGACY);
+	p_db_data = &p_spq->db_data;
+	OSAL_MEM_ZERO(p_db_data, sizeof(*p_db_data));
+	SET_FIELD(p_db_data->params, CORE_DB_DATA_DEST, DB_DEST_XCM);
+	SET_FIELD(p_db_data->params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_MAX);
+	SET_FIELD(p_db_data->params, CORE_DB_DATA_AGG_VAL_SEL,
+		  DQ_XCM_CORE_SPQ_PROD_CMD);
+	p_db_data->agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
+
+	/* Register the SPQ doorbell with the doorbell recovery mechanism */
+	db_addr = (void *)((u8 *)p_hwfn->doorbells + p_spq->db_addr_offset);
+	rc = ecore_db_recovery_add(p_hwfn->p_dev, db_addr, &p_spq->db_data,
+				   DB_REC_WIDTH_32B, DB_REC_KERNEL);
+	if (rc != ECORE_SUCCESS)
+		DP_INFO(p_hwfn,
+			"Failed to register the SPQ doorbell with the doorbell recovery mechanism\n");
 }
 
 enum _ecore_status_t ecore_spq_alloc(struct ecore_hwfn *p_hwfn)
@@ -530,7 +575,9 @@ enum _ecore_status_t ecore_spq_alloc(struct ecore_hwfn *p_hwfn)
 	p_spq->p_virt = p_virt;
 	p_spq->p_phys = p_phys;
 
+#ifdef CONFIG_ECORE_LOCK_ALLOC
 	OSAL_SPIN_LOCK_ALLOC(p_hwfn, &p_spq->lock);
+#endif
 
 	p_hwfn->p_spq = p_spq;
 	return ECORE_SUCCESS;
@@ -544,11 +591,16 @@ spq_allocate_fail:
 void ecore_spq_free(struct ecore_hwfn *p_hwfn)
 {
 	struct ecore_spq *p_spq = p_hwfn->p_spq;
+	void OSAL_IOMEM *db_addr;
 	u32 capacity;
 
 	if (!p_spq)
 		return;
 
+	/* Delete the SPQ doorbell from the doorbell recovery mechanism */
+	db_addr = (void *)((u8 *)p_hwfn->doorbells + p_spq->db_addr_offset);
+	ecore_db_recovery_del(p_hwfn->p_dev, db_addr, &p_spq->db_data);
+
 	if (p_spq->p_virt) {
 		capacity = ecore_chain_get_capacity(&p_spq->chain);
 		OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev,
@@ -559,7 +611,10 @@ void ecore_spq_free(struct ecore_hwfn *p_hwfn)
 	}
 
 	ecore_chain_free(p_hwfn->p_dev, &p_spq->chain);
+#ifdef CONFIG_ECORE_LOCK_ALLOC
 	OSAL_SPIN_LOCK_DEALLOC(&p_spq->lock);
+#endif
+
 	OSAL_FREE(p_hwfn->p_dev, p_spq);
 }
 
diff --git a/drivers/net/qede/base/ecore_spq.h b/drivers/net/qede/base/ecore_spq.h
index e530f834..526cff08 100644
--- a/drivers/net/qede/base/ecore_spq.h
+++ b/drivers/net/qede/base/ecore_spq.h
@@ -86,6 +86,22 @@ struct ecore_consq {
 	struct ecore_chain	chain;
 };
 
+typedef enum _ecore_status_t
+(*ecore_spq_async_comp_cb)(struct ecore_hwfn *p_hwfn,
+			   u8 opcode,
+			   u16 echo,
+			   union event_ring_data *data,
+			   u8 fw_return_code);
+
+enum _ecore_status_t
+ecore_spq_register_async_cb(struct ecore_hwfn *p_hwfn,
+			    enum protocol_type protocol_id,
+			    ecore_spq_async_comp_cb cb);
+
+void
+ecore_spq_unregister_async_cb(struct ecore_hwfn *p_hwfn,
+			      enum protocol_type protocol_id);
+
 struct ecore_spq {
 	osal_spinlock_t			lock;
 
@@ -124,6 +140,10 @@ struct ecore_spq {
 	u32				comp_count;
 
 	u32				cid;
+
+	u32				db_addr_offset;
+	struct core_db_data		db_data;
+	ecore_spq_async_comp_cb		async_comp_cb[MAX_PROTOCOL_TYPE];
 };
 
 struct ecore_port;
diff --git a/drivers/net/qede/base/ecore_sriov.c b/drivers/net/qede/base/ecore_sriov.c
index db2873e7..b1e26d6f 100644
--- a/drivers/net/qede/base/ecore_sriov.c
+++ b/drivers/net/qede/base/ecore_sriov.c
@@ -27,6 +27,12 @@
 #include "ecore_init_fw_funcs.h"
 #include "ecore_sp_commands.h"
 
+static enum _ecore_status_t ecore_sriov_eqe_event(struct ecore_hwfn *p_hwfn,
+						  u8 opcode,
+						  __le16 echo,
+						  union event_ring_data *data,
+						  u8 fw_return_code);
+
 const char *ecore_channel_tlvs_string[] = {
 	"CHANNEL_TLV_NONE",	/* ends tlv sequence */
 	"CHANNEL_TLV_ACQUIRE",
@@ -53,9 +59,26 @@ const char *ecore_channel_tlvs_string[] = {
 	"CHANNEL_TLV_VPORT_UPDATE_SGE_TPA",
 	"CHANNEL_TLV_UPDATE_TUNN_PARAM",
 	"CHANNEL_TLV_COALESCE_UPDATE",
+	"CHANNEL_TLV_QID",
+	"CHANNEL_TLV_COALESCE_READ",
 	"CHANNEL_TLV_MAX"
 };
 
+static u8 ecore_vf_calculate_legacy(struct ecore_vf_info *p_vf)
+{
+	u8 legacy = 0;
+
+	if (p_vf->acquire.vfdev_info.eth_fp_hsi_minor ==
+	    ETH_HSI_VER_NO_PKT_LEN_TUNN)
+		legacy |= ECORE_QCID_LEGACY_VF_RX_PROD;
+
+	if (!(p_vf->acquire.vfdev_info.capabilities &
+	     VFPF_ACQUIRE_CAP_QUEUE_QIDS))
+		legacy |= ECORE_QCID_LEGACY_VF_CID;
+
+	return legacy;
+}
+
 /* IOV ramrods */
 static enum _ecore_status_t ecore_sp_vf_start(struct ecore_hwfn *p_hwfn,
 					      struct ecore_vf_info *p_vf)
@@ -193,9 +216,7 @@ struct ecore_vf_info *ecore_iov_get_vf_info(struct ecore_hwfn *p_hwfn,
 }
 
 static struct ecore_queue_cid *
-ecore_iov_get_vf_rx_queue_cid(struct ecore_hwfn *p_hwfn,
-			      struct ecore_vf_info *p_vf,
-			      struct ecore_vf_queue *p_queue)
+ecore_iov_get_vf_rx_queue_cid(struct ecore_vf_queue *p_queue)
 {
 	int i;
 
@@ -214,8 +235,7 @@ enum ecore_iov_validate_q_mode {
 	ECORE_IOV_VALIDATE_Q_DISABLE,
 };
 
-static bool ecore_iov_validate_queue_mode(struct ecore_hwfn *p_hwfn,
-					  struct ecore_vf_info *p_vf,
+static bool ecore_iov_validate_queue_mode(struct ecore_vf_info *p_vf,
 					  u16 qid,
 					  enum ecore_iov_validate_q_mode mode,
 					  bool b_is_tx)
@@ -257,8 +277,7 @@ static bool ecore_iov_validate_rxq(struct ecore_hwfn *p_hwfn,
 		return false;
 	}
 
-	return ecore_iov_validate_queue_mode(p_hwfn, p_vf, rx_qid,
-					     mode, false);
+	return ecore_iov_validate_queue_mode(p_vf, rx_qid, mode, false);
 }
 
 static bool ecore_iov_validate_txq(struct ecore_hwfn *p_hwfn,
@@ -274,8 +293,7 @@ static bool ecore_iov_validate_txq(struct ecore_hwfn *p_hwfn,
 		return false;
 	}
 
-	return ecore_iov_validate_queue_mode(p_hwfn, p_vf, tx_qid,
-					     mode, true);
+	return ecore_iov_validate_queue_mode(p_vf, tx_qid, mode, true);
 }
 
 static bool ecore_iov_validate_sb(struct ecore_hwfn *p_hwfn,
@@ -297,13 +315,12 @@ static bool ecore_iov_validate_sb(struct ecore_hwfn *p_hwfn,
 }
 
 /* Is there at least 1 queue open? */
-static bool ecore_iov_validate_active_rxq(struct ecore_hwfn *p_hwfn,
-					  struct ecore_vf_info *p_vf)
+static bool ecore_iov_validate_active_rxq(struct ecore_vf_info *p_vf)
 {
 	u8 i;
 
 	for (i = 0; i < p_vf->num_rxqs; i++)
-		if (ecore_iov_validate_queue_mode(p_hwfn, p_vf, i,
+		if (ecore_iov_validate_queue_mode(p_vf, i,
 						  ECORE_IOV_VALIDATE_Q_ENABLE,
 						  false))
 			return true;
@@ -311,13 +328,12 @@ static bool ecore_iov_validate_active_rxq(struct ecore_hwfn *p_hwfn,
 	return false;
 }
 
-static bool ecore_iov_validate_active_txq(struct ecore_hwfn *p_hwfn,
-					  struct ecore_vf_info *p_vf)
+static bool ecore_iov_validate_active_txq(struct ecore_vf_info *p_vf)
 {
 	u8 i;
 
 	for (i = 0; i < p_vf->num_txqs; i++)
-		if (ecore_iov_validate_queue_mode(p_hwfn, p_vf, i,
+		if (ecore_iov_validate_queue_mode(p_vf, i,
 						  ECORE_IOV_VALIDATE_Q_ENABLE,
 						  true))
 			return true;
@@ -325,19 +341,6 @@ static bool ecore_iov_validate_active_txq(struct ecore_hwfn *p_hwfn,
 	return false;
 }
 
-/* TODO - this is linux crc32; Need a way to ifdef it out for linux */
-u32 ecore_crc32(u32 crc, u8 *ptr, u32 length)
-{
-	int i;
-
-	while (length--) {
-		crc ^= *ptr++;
-		for (i = 0; i < 8; i++)
-			crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
-	}
-	return crc;
-}
-
 enum _ecore_status_t ecore_iov_post_vf_bulletin(struct ecore_hwfn *p_hwfn,
 						int vfid,
 						struct ecore_ptt *p_ptt)
@@ -359,8 +362,8 @@ enum _ecore_status_t ecore_iov_post_vf_bulletin(struct ecore_hwfn *p_hwfn,
 
 	/* Increment bulletin board version and compute crc */
 	p_bulletin->version++;
-	p_bulletin->crc = ecore_crc32(0, (u8 *)p_bulletin + crc_size,
-				      p_vf->bulletin.size - crc_size);
+	p_bulletin->crc = OSAL_CRC32(0, (u8 *)p_bulletin + crc_size,
+				     p_vf->bulletin.size - crc_size);
 
 	DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
 		   "Posting Bulletin 0x%08x to VF[%d] (CRC 0x%08x)\n",
@@ -442,33 +445,6 @@ static enum _ecore_status_t ecore_iov_pci_cfg_info(struct ecore_dev *p_dev)
 	return ECORE_SUCCESS;
 }
 
-static void ecore_iov_clear_vf_igu_blocks(struct ecore_hwfn *p_hwfn,
-					  struct ecore_ptt *p_ptt)
-{
-	struct ecore_igu_block *p_sb;
-	u16 sb_id;
-	u32 val;
-
-	if (!p_hwfn->hw_info.p_igu_info) {
-		DP_ERR(p_hwfn,
-		       "ecore_iov_clear_vf_igu_blocks IGU Info not inited\n");
-		return;
-	}
-
-	for (sb_id = 0;
-	     sb_id < ECORE_MAPPING_MEMORY_SIZE(p_hwfn->p_dev); sb_id++) {
-		p_sb = &p_hwfn->hw_info.p_igu_info->igu_map.igu_blocks[sb_id];
-		if ((p_sb->status & ECORE_IGU_STATUS_FREE) &&
-		    !(p_sb->status & ECORE_IGU_STATUS_PF)) {
-			val = ecore_rd(p_hwfn, p_ptt,
-				       IGU_REG_MAPPING_MEMORY + sb_id * 4);
-			SET_FIELD(val, IGU_MAPPING_LINE_VALID, 0);
-			ecore_wr(p_hwfn, p_ptt,
-				 IGU_REG_MAPPING_MEMORY + 4 * sb_id, val);
-		}
-	}
-}
-
 static void ecore_iov_setup_vfdb(struct ecore_hwfn *p_hwfn)
 {
 	struct ecore_hw_sriov_info *p_iov = p_hwfn->p_dev->p_iov_info;
@@ -621,20 +597,24 @@ enum _ecore_status_t ecore_iov_alloc(struct ecore_hwfn *p_hwfn)
 
 	p_hwfn->pf_iov_info = p_sriov;
 
+	ecore_spq_register_async_cb(p_hwfn, PROTOCOLID_COMMON,
+				    ecore_sriov_eqe_event);
+
 	return ecore_iov_allocate_vfdb(p_hwfn);
 }
 
-void ecore_iov_setup(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
+void ecore_iov_setup(struct ecore_hwfn *p_hwfn)
 {
 	if (!IS_PF_SRIOV(p_hwfn) || !IS_PF_SRIOV_ALLOC(p_hwfn))
 		return;
 
 	ecore_iov_setup_vfdb(p_hwfn);
-	ecore_iov_clear_vf_igu_blocks(p_hwfn, p_ptt);
 }
 
 void ecore_iov_free(struct ecore_hwfn *p_hwfn)
 {
+	ecore_spq_unregister_async_cb(p_hwfn, PROTOCOLID_COMMON);
+
 	if (IS_PF_SRIOV_ALLOC(p_hwfn)) {
 		ecore_iov_free_vfdb(p_hwfn);
 		OSAL_FREE(p_hwfn->p_dev, p_hwfn->pf_iov_info);
@@ -843,11 +823,52 @@ static void ecore_iov_vf_igu_set_int(struct ecore_hwfn *p_hwfn,
 }
 
 static enum _ecore_status_t
+ecore_iov_enable_vf_access_msix(struct ecore_hwfn *p_hwfn,
+				struct ecore_ptt *p_ptt,
+				u8 abs_vf_id,
+				u8 num_sbs)
+{
+	u8 current_max = 0;
+	int i;
+
+	/* If client overrides this, don't do anything */
+	if (p_hwfn->p_dev->b_dont_override_vf_msix)
+		return ECORE_SUCCESS;
+
+	/* For AH onward, configuration is per-PF. Find maximum of all
+	 * the currently enabled child VFs, and set the number to be that.
+	 */
+	if (!ECORE_IS_BB(p_hwfn->p_dev)) {
+		ecore_for_each_vf(p_hwfn, i) {
+			struct ecore_vf_info *p_vf;
+
+			p_vf  = ecore_iov_get_vf_info(p_hwfn, (u16)i, true);
+			if (!p_vf)
+				continue;
+
+			current_max = OSAL_MAX_T(u8, current_max,
+						 p_vf->num_sbs);
+		}
+	}
+
+	if (num_sbs > current_max)
+		return ecore_mcp_config_vf_msix(p_hwfn, p_ptt,
+						abs_vf_id, num_sbs);
+
+	return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t
 ecore_iov_enable_vf_access(struct ecore_hwfn *p_hwfn,
 			   struct ecore_ptt *p_ptt, struct ecore_vf_info *vf)
 {
 	u32 igu_vf_conf = IGU_VF_CONF_FUNC_EN;
-	enum _ecore_status_t rc;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
+
+	/* It's possible VF was previously considered malicious -
+	 * clear the indication even if we're only going to disable VF.
+	 */
+	vf->b_malicious = false;
 
 	if (vf->to_disable)
 		return ECORE_SUCCESS;
@@ -861,11 +882,8 @@ ecore_iov_enable_vf_access(struct ecore_hwfn *p_hwfn,
 
 	ecore_iov_vf_igu_reset(p_hwfn, p_ptt, vf);
 
-	/* It's possible VF was previously considered malicious */
-	vf->b_malicious = false;
-
-	rc = ecore_mcp_config_vf_msix(p_hwfn, p_ptt,
-				      vf->abs_vf_id, vf->num_sbs);
+	rc = ecore_iov_enable_vf_access_msix(p_hwfn, p_ptt,
+					     vf->abs_vf_id, vf->num_sbs);
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
@@ -934,46 +952,38 @@ static u8 ecore_iov_alloc_vf_igu_sbs(struct ecore_hwfn *p_hwfn,
 				     struct ecore_vf_info *vf,
 				     u16 num_rx_queues)
 {
-	struct ecore_igu_block *igu_blocks;
-	int qid = 0, igu_id = 0;
+	struct ecore_igu_block *p_block;
+	struct cau_sb_entry sb_entry;
+	int qid = 0;
 	u32 val = 0;
 
-	igu_blocks = p_hwfn->hw_info.p_igu_info->igu_map.igu_blocks;
-
-	if (num_rx_queues > p_hwfn->hw_info.p_igu_info->free_blks)
-		num_rx_queues = p_hwfn->hw_info.p_igu_info->free_blks;
-
-	p_hwfn->hw_info.p_igu_info->free_blks -= num_rx_queues;
+	if (num_rx_queues > p_hwfn->hw_info.p_igu_info->usage.free_cnt_iov)
+		num_rx_queues =
+		(u16)p_hwfn->hw_info.p_igu_info->usage.free_cnt_iov;
+	p_hwfn->hw_info.p_igu_info->usage.free_cnt_iov -= num_rx_queues;
 
 	SET_FIELD(val, IGU_MAPPING_LINE_FUNCTION_NUMBER, vf->abs_vf_id);
 	SET_FIELD(val, IGU_MAPPING_LINE_VALID, 1);
 	SET_FIELD(val, IGU_MAPPING_LINE_PF_VALID, 0);
 
-	while ((qid < num_rx_queues) &&
-	       (igu_id < ECORE_MAPPING_MEMORY_SIZE(p_hwfn->p_dev))) {
-		if (igu_blocks[igu_id].status & ECORE_IGU_STATUS_FREE) {
-			struct cau_sb_entry sb_entry;
-
-			vf->igu_sbs[qid] = (u16)igu_id;
-			igu_blocks[igu_id].status &= ~ECORE_IGU_STATUS_FREE;
-
-			SET_FIELD(val, IGU_MAPPING_LINE_VECTOR_NUMBER, qid);
-
-			ecore_wr(p_hwfn, p_ptt,
-				 IGU_REG_MAPPING_MEMORY + sizeof(u32) * igu_id,
-				 val);
-
-			/* Configure igu sb in CAU which were marked valid */
-			ecore_init_cau_sb_entry(p_hwfn, &sb_entry,
-						p_hwfn->rel_pf_id,
-						vf->abs_vf_id, 1);
-			ecore_dmae_host2grc(p_hwfn, p_ptt,
-					    (u64)(osal_uintptr_t)&sb_entry,
-					    CAU_REG_SB_VAR_MEMORY +
-					    igu_id * sizeof(u64), 2, 0);
-			qid++;
-		}
-		igu_id++;
+	for (qid = 0; qid < num_rx_queues; qid++) {
+		p_block = ecore_get_igu_free_sb(p_hwfn, false);
+		vf->igu_sbs[qid] = p_block->igu_sb_id;
+		p_block->status &= ~ECORE_IGU_STATUS_FREE;
+		SET_FIELD(val, IGU_MAPPING_LINE_VECTOR_NUMBER, qid);
+
+		ecore_wr(p_hwfn, p_ptt,
+			 IGU_REG_MAPPING_MEMORY +
+			 sizeof(u32) * p_block->igu_sb_id, val);
+
+		/* Configure igu sb in CAU which were marked valid */
+		ecore_init_cau_sb_entry(p_hwfn, &sb_entry,
+					p_hwfn->rel_pf_id,
+					vf->abs_vf_id, 1);
+		ecore_dmae_host2grc(p_hwfn, p_ptt,
+				    (u64)(osal_uintptr_t)&sb_entry,
+				    CAU_REG_SB_VAR_MEMORY +
+				    p_block->igu_sb_id * sizeof(u64), 2, 0);
 	}
 
 	vf->num_sbs = (u8)num_rx_queues;
@@ -1009,10 +1019,8 @@ static void ecore_iov_free_vf_igu_sbs(struct ecore_hwfn *p_hwfn,
 		SET_FIELD(val, IGU_MAPPING_LINE_VALID, 0);
 		ecore_wr(p_hwfn, p_ptt, addr, val);
 
-		p_info->igu_map.igu_blocks[igu_id].status |=
-		    ECORE_IGU_STATUS_FREE;
-
-		p_hwfn->hw_info.p_igu_info->free_blks++;
+		p_info->entry[igu_id].status |= ECORE_IGU_STATUS_FREE;
+		p_hwfn->hw_info.p_igu_info->usage.free_cnt_iov++;
 	}
 
 	vf->num_sbs = 0;
@@ -1110,34 +1118,28 @@ ecore_iov_init_hw_for_vf(struct ecore_hwfn *p_hwfn,
 	vf->vport_id = p_params->vport_id;
 	vf->rss_eng_id = p_params->rss_eng_id;
 
-	/* Perform sanity checking on the requested queue_id */
+	/* Since it's possible to relocate SBs, it's a bit difficult to check
+	 * things here. Simply check whether the index falls in the range
+	 * belonging to the PF.
+	 */
 	for (i = 0; i < p_params->num_queues; i++) {
-		u16 min_vf_qzone = (u16)FEAT_NUM(p_hwfn, ECORE_PF_L2_QUE);
-		u16 max_vf_qzone = min_vf_qzone +
-				   FEAT_NUM(p_hwfn, ECORE_VF_L2_QUE) - 1;
-
 		qid = p_params->req_rx_queue[i];
-		if (qid < min_vf_qzone || qid > max_vf_qzone) {
+		if (qid > (u16)RESC_NUM(p_hwfn, ECORE_L2_QUEUE)) {
 			DP_NOTICE(p_hwfn, true,
-				  "Can't enable Rx qid [%04x] for VF[%d]: qids [0x%04x,...,0x%04x] available\n",
+				  "Can't enable Rx qid [%04x] for VF[%d]: qids [0,,...,0x%04x] available\n",
 				  qid, p_params->rel_vf_id,
-				  min_vf_qzone, max_vf_qzone);
+				  (u16)RESC_NUM(p_hwfn, ECORE_L2_QUEUE));
 			return ECORE_INVAL;
 		}
 
 		qid = p_params->req_tx_queue[i];
-		if (qid > max_vf_qzone) {
+		if (qid > (u16)RESC_NUM(p_hwfn, ECORE_L2_QUEUE)) {
 			DP_NOTICE(p_hwfn, true,
-				  "Can't enable Tx qid [%04x] for VF[%d]: max qid 0x%04x\n",
-				  qid, p_params->rel_vf_id, max_vf_qzone);
+				  "Can't enable Tx qid [%04x] for VF[%d]: qids [0,,...,0x%04x] available\n",
+				  qid, p_params->rel_vf_id,
+				  (u16)RESC_NUM(p_hwfn, ECORE_L2_QUEUE));
 			return ECORE_INVAL;
 		}
-
-		/* If client *really* wants, Tx qid can be shared with PF */
-		if (qid < min_vf_qzone)
-			DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
-				   "VF[%d] is using PF qid [0x%04x] for Txq[0x%02x]\n",
-				   p_params->rel_vf_id, qid, i);
 	}
 
 	/* Limit number of queues according to number of CIDs */
@@ -1307,8 +1309,7 @@ static void ecore_iov_unlock_vf_pf_channel(struct ecore_hwfn *p_hwfn,
 }
 
 /* place a given tlv on the tlv buffer, continuing current tlv list */
-void *ecore_add_tlv(struct ecore_hwfn *p_hwfn,
-		    u8 **offset, u16 type, u16 length)
+void *ecore_add_tlv(u8 **offset, u16 type, u16 length)
 {
 	struct channel_tlv *tl = (struct channel_tlv *)*offset;
 
@@ -1364,7 +1365,12 @@ void ecore_dp_tlv_list(struct ecore_hwfn *p_hwfn, void *tlvs_list)
 static void ecore_iov_send_response(struct ecore_hwfn *p_hwfn,
 				    struct ecore_ptt *p_ptt,
 				    struct ecore_vf_info *p_vf,
-				    u16 length, u8 status)
+#ifdef CONFIG_ECORE_SW_CHANNEL
+				    u16 length,
+#else
+				    u16 OSAL_UNUSED length,
+#endif
+				    u8 status)
 {
 	struct ecore_iov_vf_mbx *mbx = &p_vf->vf_mbx;
 	struct ecore_dmae_params params;
@@ -1378,7 +1384,7 @@ static void ecore_iov_send_response(struct ecore_hwfn *p_hwfn,
 	mbx->sw_mbx.response_size =
 	    length + sizeof(struct channel_list_end_tlv);
 
-	if (!p_hwfn->p_dev->b_hw_channel)
+	if (!p_vf->b_hw_channel)
 		return;
 #endif
 
@@ -1394,17 +1400,22 @@ static void ecore_iov_send_response(struct ecore_hwfn *p_hwfn,
 			     (sizeof(union pfvf_tlvs) - sizeof(u64)) / 4,
 			     &params);
 
+	/* Once PF copies the rc to the VF, the latter can continue and
+	 * and send an additional message. So we have to make sure the
+	 * channel would be re-set to ready prior to that.
+	 */
+	REG_WR(p_hwfn,
+	       GTT_BAR0_MAP_REG_USDM_RAM +
+	       USTORM_VF_PF_CHANNEL_READY_OFFSET(eng_vf_id), 1);
+
 	ecore_dmae_host2host(p_hwfn, p_ptt, mbx->reply_phys,
 			     mbx->req_virt->first_tlv.reply_address,
 			     sizeof(u64) / 4, &params);
 
-	REG_WR(p_hwfn,
-	       GTT_BAR0_MAP_REG_USDM_RAM +
-	       USTORM_VF_PF_CHANNEL_READY_OFFSET(eng_vf_id), 1);
+	OSAL_IOV_PF_RESP_TYPE(p_hwfn, p_vf->relative_vf_id, status);
 }
 
-static u16 ecore_iov_vport_to_tlv(struct ecore_hwfn *p_hwfn,
-				  enum ecore_iov_vport_update_flag flag)
+static u16 ecore_iov_vport_to_tlv(enum ecore_iov_vport_update_flag flag)
 {
 	switch (flag) {
 	case ECORE_IOV_VP_UPDATE_ACTIVATE:
@@ -1442,15 +1453,15 @@ static u16 ecore_iov_prep_vp_update_resp_tlvs(struct ecore_hwfn *p_hwfn,
 	size = sizeof(struct pfvf_def_resp_tlv);
 	total_len = size;
 
-	ecore_add_tlv(p_hwfn, &p_mbx->offset, CHANNEL_TLV_VPORT_UPDATE, size);
+	ecore_add_tlv(&p_mbx->offset, CHANNEL_TLV_VPORT_UPDATE, size);
 
 	/* Prepare response for all extended tlvs if they are found by PF */
 	for (i = 0; i < ECORE_IOV_VP_UPDATE_MAX; i++) {
 		if (!(tlvs_mask & (1 << i)))
 			continue;
 
-		resp = ecore_add_tlv(p_hwfn, &p_mbx->offset,
-				     ecore_iov_vport_to_tlv(p_hwfn, i), size);
+		resp = ecore_add_tlv(&p_mbx->offset, ecore_iov_vport_to_tlv(i),
+				     size);
 
 		if (tlvs_accepted & (1 << i))
 			resp->hdr.status = status;
@@ -1460,12 +1471,13 @@ static u16 ecore_iov_prep_vp_update_resp_tlvs(struct ecore_hwfn *p_hwfn,
 		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
 			   "VF[%d] - vport_update resp: TLV %d, status %02x\n",
 			   p_vf->relative_vf_id,
-			   ecore_iov_vport_to_tlv(p_hwfn, i), resp->hdr.status);
+			   ecore_iov_vport_to_tlv(i),
+			   resp->hdr.status);
 
 		total_len += size;
 	}
 
-	ecore_add_tlv(p_hwfn, &p_mbx->offset, CHANNEL_TLV_LIST_END,
+	ecore_add_tlv(&p_mbx->offset, CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
 	return total_len;
@@ -1480,13 +1492,11 @@ static void ecore_iov_prepare_resp(struct ecore_hwfn *p_hwfn,
 
 	mbx->offset = (u8 *)mbx->reply_virt;
 
-	ecore_add_tlv(p_hwfn, &mbx->offset, type, length);
-	ecore_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
+	ecore_add_tlv(&mbx->offset, type, length);
+	ecore_add_tlv(&mbx->offset, CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
 	ecore_iov_send_response(p_hwfn, p_ptt, vf_info, length, status);
-
-	OSAL_IOV_PF_RESP_TYPE(p_hwfn, vf_info->relative_vf_id, status);
 }
 
 struct ecore_public_vf_info
@@ -1535,6 +1545,60 @@ static void ecore_iov_vf_cleanup(struct ecore_hwfn *p_hwfn,
 	OSAL_IOV_VF_CLEANUP(p_hwfn, p_vf->relative_vf_id);
 }
 
+/* Returns either 0, or log(size) */
+static u32 ecore_iov_vf_db_bar_size(struct ecore_hwfn *p_hwfn,
+				    struct ecore_ptt *p_ptt)
+{
+	u32 val = ecore_rd(p_hwfn, p_ptt, PGLUE_B_REG_VF_BAR1_SIZE);
+
+	if (val)
+		return val + 11;
+	return 0;
+}
+
+static void
+ecore_iov_vf_mbx_acquire_resc_cids(struct ecore_hwfn *p_hwfn,
+				   struct ecore_ptt *p_ptt,
+				   struct ecore_vf_info *p_vf,
+				   struct vf_pf_resc_request *p_req,
+				   struct pf_vf_resc *p_resp)
+{
+	u8 num_vf_cons = p_hwfn->pf_params.eth_pf_params.num_vf_cons;
+	u8 db_size = DB_ADDR_VF(1, DQ_DEMS_LEGACY) -
+		     DB_ADDR_VF(0, DQ_DEMS_LEGACY);
+	u32 bar_size;
+
+	p_resp->num_cids = OSAL_MIN_T(u8, p_req->num_cids, num_vf_cons);
+
+	/* If VF didn't bother asking for QIDs than don't bother limiting
+	 * number of CIDs. The VF doesn't care about the number, and this
+	 * has the likely result of causing an additional acquisition.
+	 */
+	if (!(p_vf->acquire.vfdev_info.capabilities &
+	      VFPF_ACQUIRE_CAP_QUEUE_QIDS))
+		return;
+
+	/* If doorbell bar was mapped by VF, limit the VF CIDs to an amount
+	 * that would make sure doorbells for all CIDs fall within the bar.
+	 * If it doesn't, make sure regview window is sufficient.
+	 */
+	if (p_vf->acquire.vfdev_info.capabilities &
+	    VFPF_ACQUIRE_CAP_PHYSICAL_BAR) {
+		bar_size = ecore_iov_vf_db_bar_size(p_hwfn, p_ptt);
+		if (bar_size)
+			bar_size = 1 << bar_size;
+
+		if (ECORE_IS_CMT(p_hwfn->p_dev))
+			bar_size /= 2;
+	} else {
+		bar_size = PXP_VF_BAR0_DQ_LENGTH;
+	}
+
+	if (bar_size / db_size < 256)
+		p_resp->num_cids = OSAL_MIN_T(u8, p_resp->num_cids,
+					      (u8)(bar_size / db_size));
+}
+
 static u8 ecore_iov_vf_mbx_acquire_resc(struct ecore_hwfn *p_hwfn,
 					struct ecore_ptt *p_ptt,
 					struct ecore_vf_info *p_vf,
@@ -1571,6 +1635,8 @@ static u8 ecore_iov_vf_mbx_acquire_resc(struct ecore_hwfn *p_hwfn,
 	p_resp->num_vlan_filters = OSAL_MIN_T(u8, p_vf->num_vlan_filters,
 					      p_req->num_vlan_filters);
 
+	ecore_iov_vf_mbx_acquire_resc_cids(p_hwfn, p_ptt, p_vf, p_req, p_resp);
+
 	/* This isn't really needed/enforced, but some legacy VFs might depend
 	 * on the correct filling of this field.
 	 */
@@ -1582,18 +1648,18 @@ static u8 ecore_iov_vf_mbx_acquire_resc(struct ecore_hwfn *p_hwfn,
 	    p_resp->num_sbs < p_req->num_sbs ||
 	    p_resp->num_mac_filters < p_req->num_mac_filters ||
 	    p_resp->num_vlan_filters < p_req->num_vlan_filters ||
-	    p_resp->num_mc_filters < p_req->num_mc_filters) {
+	    p_resp->num_mc_filters < p_req->num_mc_filters ||
+	    p_resp->num_cids < p_req->num_cids) {
 		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
-			   "VF[%d] - Insufficient resources: rxq [%02x/%02x]"
-			   " txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x]"
-			   " vlan [%02x/%02x] mc [%02x/%02x]\n",
+			   "VF[%d] - Insufficient resources: rxq [%02x/%02x] txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x] vlan [%02x/%02x] mc [%02x/%02x] cids [%02x/%02x]\n",
 			   p_vf->abs_vf_id,
 			   p_req->num_rxqs, p_resp->num_rxqs,
 			   p_req->num_rxqs, p_resp->num_txqs,
 			   p_req->num_sbs, p_resp->num_sbs,
 			   p_req->num_mac_filters, p_resp->num_mac_filters,
 			   p_req->num_vlan_filters, p_resp->num_vlan_filters,
-			   p_req->num_mc_filters, p_resp->num_mc_filters);
+			   p_req->num_mc_filters, p_resp->num_mc_filters,
+			   p_req->num_cids, p_resp->num_cids);
 
 		/* Some legacy OSes are incapable of correctly handling this
 		 * failure.
@@ -1610,8 +1676,7 @@ static u8 ecore_iov_vf_mbx_acquire_resc(struct ecore_hwfn *p_hwfn,
 	return PFVF_STATUS_SUCCESS;
 }
 
-static void ecore_iov_vf_mbx_acquire_stats(struct ecore_hwfn *p_hwfn,
-					   struct pfvf_stats_info *p_stats)
+static void ecore_iov_vf_mbx_acquire_stats(struct pfvf_stats_info *p_stats)
 {
 	p_stats->mstats.address = PXP_VF_BAR0_START_MSDM_ZONE_B +
 				  OFFSETOF(struct mstorm_vf_zone,
@@ -1693,7 +1758,7 @@ static void ecore_iov_vf_mbx_acquire(struct ecore_hwfn       *p_hwfn,
 	}
 
 	/* On 100g PFs, prevent old VFs from loading */
-	if ((p_hwfn->p_dev->num_hwfns > 1) &&
+	if (ECORE_IS_CMT(p_hwfn->p_dev) &&
 	    !(req->vfdev_info.capabilities & VFPF_ACQUIRE_CAP_100G)) {
 		DP_INFO(p_hwfn,
 			"VF[%d] is running an old driver that doesn't support"
@@ -1721,14 +1786,24 @@ static void ecore_iov_vf_mbx_acquire(struct ecore_hwfn       *p_hwfn,
 	/* fill in pfdev info */
 	pfdev_info->chip_num = p_hwfn->p_dev->chip_num;
 	pfdev_info->db_size = 0;	/* @@@ TBD MichalK Vf Doorbells */
-	pfdev_info->indices_per_sb = PIS_PER_SB;
+	pfdev_info->indices_per_sb = PIS_PER_SB_E4;
 
 	pfdev_info->capabilities = PFVF_ACQUIRE_CAP_DEFAULT_UNTAGGED |
 				   PFVF_ACQUIRE_CAP_POST_FW_OVERRIDE;
-	if (p_hwfn->p_dev->num_hwfns > 1)
+	if (ECORE_IS_CMT(p_hwfn->p_dev))
 		pfdev_info->capabilities |= PFVF_ACQUIRE_CAP_100G;
 
-	ecore_iov_vf_mbx_acquire_stats(p_hwfn, &pfdev_info->stats_info);
+	/* Share our ability to use multiple queue-ids only with VFs
+	 * that request it.
+	 */
+	if (req->vfdev_info.capabilities & VFPF_ACQUIRE_CAP_QUEUE_QIDS)
+		pfdev_info->capabilities |= PFVF_ACQUIRE_CAP_QUEUE_QIDS;
+
+	/* Share the sizes of the bars with VF */
+	resp->pfdev_info.bar_size = (u8)ecore_iov_vf_db_bar_size(p_hwfn,
+							     p_ptt);
+
+	ecore_iov_vf_mbx_acquire_stats(&pfdev_info->stats_info);
 
 	OSAL_MEMCPY(pfdev_info->port_mac, p_hwfn->hw_info.hw_mac_addr,
 		    ETH_ALEN);
@@ -1969,8 +2044,7 @@ ecore_iov_configure_vport_forced(struct ecore_hwfn *p_hwfn,
 			struct ecore_queue_cid *p_cid = OSAL_NULL;
 
 			/* There can be at most 1 Rx queue on qzone. Find it */
-			p_cid = ecore_iov_get_vf_rx_queue_cid(p_hwfn, p_vf,
-							      p_queue);
+			p_cid = ecore_iov_get_vf_rx_queue_cid(p_queue);
 			if (p_cid == OSAL_NULL)
 				continue;
 
@@ -2106,16 +2180,19 @@ static void ecore_iov_vf_mbx_stop_vport(struct ecore_hwfn *p_hwfn,
 	u8 status = PFVF_STATUS_SUCCESS;
 	enum _ecore_status_t rc;
 
+	OSAL_IOV_VF_VPORT_STOP(p_hwfn, vf);
 	vf->vport_instance--;
 	vf->spoof_chk = false;
 
-	if ((ecore_iov_validate_active_rxq(p_hwfn, vf)) ||
-	    (ecore_iov_validate_active_txq(p_hwfn, vf))) {
+	if ((ecore_iov_validate_active_rxq(vf)) ||
+	    (ecore_iov_validate_active_txq(vf))) {
 		vf->b_malicious = true;
 		DP_NOTICE(p_hwfn, false,
 			  "VF [%02x] - considered malicious;"
 			  " Unable to stop RX/TX queuess\n",
 			  vf->abs_vf_id);
+		status = PFVF_STATUS_MALICIOUS;
+		goto out;
 	}
 
 	rc = ecore_sp_vport_stop(p_hwfn, vf->opaque_fid, vf->vport_id);
@@ -2129,6 +2206,7 @@ static void ecore_iov_vf_mbx_stop_vport(struct ecore_hwfn *p_hwfn,
 	vf->configured_features = 0;
 	OSAL_MEMSET(&vf->shadow_config, 0, sizeof(vf->shadow_config));
 
+out:
 	ecore_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_VPORT_TEARDOWN,
 			       sizeof(struct pfvf_def_resp_tlv), status);
 }
@@ -2154,9 +2232,8 @@ static void ecore_iov_vf_mbx_start_rxq_resp(struct ecore_hwfn *p_hwfn,
 	else
 		length = sizeof(struct pfvf_def_resp_tlv);
 
-	p_tlv = ecore_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_RXQ,
-			      length);
-	ecore_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
+	p_tlv = ecore_add_tlv(&mbx->offset, CHANNEL_TLV_START_RXQ, length);
+	ecore_add_tlv(&mbx->offset, CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
 	/* Update the TLV with the response */
@@ -2171,6 +2248,42 @@ static void ecore_iov_vf_mbx_start_rxq_resp(struct ecore_hwfn *p_hwfn,
 	ecore_iov_send_response(p_hwfn, p_ptt, vf, length, status);
 }
 
+static u8 ecore_iov_vf_mbx_qid(struct ecore_hwfn *p_hwfn,
+			       struct ecore_vf_info *p_vf, bool b_is_tx)
+{
+	struct ecore_iov_vf_mbx *p_mbx = &p_vf->vf_mbx;
+	struct vfpf_qid_tlv *p_qid_tlv;
+
+	/* Search for the qid if the VF published if its going to provide it */
+	if (!(p_vf->acquire.vfdev_info.capabilities &
+	      VFPF_ACQUIRE_CAP_QUEUE_QIDS)) {
+		if (b_is_tx)
+			return ECORE_IOV_LEGACY_QID_TX;
+		else
+			return ECORE_IOV_LEGACY_QID_RX;
+	}
+
+	p_qid_tlv = (struct vfpf_qid_tlv *)
+		    ecore_iov_search_list_tlvs(p_hwfn, p_mbx->req_virt,
+					       CHANNEL_TLV_QID);
+	if (p_qid_tlv == OSAL_NULL) {
+		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+			   "VF[%2x]: Failed to provide qid\n",
+			   p_vf->relative_vf_id);
+
+		return ECORE_IOV_QID_INVALID;
+	}
+
+	if (p_qid_tlv->qid >= MAX_QUEUES_PER_QZONE) {
+		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+			   "VF[%02x]: Provided qid out-of-bounds %02x\n",
+			   p_vf->relative_vf_id, p_qid_tlv->qid);
+		return ECORE_IOV_QID_INVALID;
+	}
+
+	return p_qid_tlv->qid;
+}
+
 static void ecore_iov_vf_mbx_start_rxq(struct ecore_hwfn *p_hwfn,
 				       struct ecore_ptt *p_ptt,
 				       struct ecore_vf_info *vf)
@@ -2179,11 +2292,11 @@ static void ecore_iov_vf_mbx_start_rxq(struct ecore_hwfn *p_hwfn,
 	struct ecore_queue_cid_vf_params vf_params;
 	struct ecore_iov_vf_mbx *mbx = &vf->vf_mbx;
 	u8 status = PFVF_STATUS_NO_RESOURCE;
+	u8 qid_usage_idx, vf_legacy = 0;
 	struct ecore_vf_queue *p_queue;
 	struct vfpf_start_rxq_tlv *req;
 	struct ecore_queue_cid *p_cid;
-	bool b_legacy_vf = false;
-	u8 qid_usage_idx;
+	struct ecore_sb_info sb_dummy;
 	enum _ecore_status_t rc;
 
 	req = &mbx->req_virt->start_rxq;
@@ -2193,45 +2306,43 @@ static void ecore_iov_vf_mbx_start_rxq(struct ecore_hwfn *p_hwfn,
 	    !ecore_iov_validate_sb(p_hwfn, vf, req->hw_sb))
 		goto out;
 
-	/* Legacy VFs made assumptions on the CID their queues connected to,
-	 * assuming queue X used CID X.
-	 * TODO - need to validate that there was no official release post
-	 * the current legacy scheme that still made that assumption.
-	 */
-	if (vf->acquire.vfdev_info.eth_fp_hsi_minor ==
-	    ETH_HSI_VER_NO_PKT_LEN_TUNN)
-		b_legacy_vf = true;
+	qid_usage_idx = ecore_iov_vf_mbx_qid(p_hwfn, vf, false);
+	if (qid_usage_idx == ECORE_IOV_QID_INVALID)
+		goto out;
 
-	/* Acquire a new queue-cid */
 	p_queue = &vf->vf_queues[req->rx_qid];
+	if (p_queue->cids[qid_usage_idx].p_cid)
+		goto out;
+
+	vf_legacy = ecore_vf_calculate_legacy(vf);
 
+	/* Acquire a new queue-cid */
 	OSAL_MEMSET(&params, 0, sizeof(params));
 	params.queue_id = (u8)p_queue->fw_rx_qid;
 	params.vport_id = vf->vport_id;
 	params.stats_id = vf->abs_vf_id + 0x10;
-	params.sb = req->hw_sb;
-	params.sb_idx = req->sb_index;
 
-	/* TODO - set qid_usage_idx according to extended TLV. For now, use
-	 * '0' for Rx.
-	 */
-	qid_usage_idx = 0;
+	/* Since IGU index is passed via sb_info, construct a dummy one */
+	OSAL_MEM_ZERO(&sb_dummy, sizeof(sb_dummy));
+	sb_dummy.igu_sb_id = req->hw_sb;
+	params.p_sb = &sb_dummy;
+	params.sb_idx = req->sb_index;
 
 	OSAL_MEM_ZERO(&vf_params, sizeof(vf_params));
 	vf_params.vfid = vf->relative_vf_id;
 	vf_params.vf_qid = (u8)req->rx_qid;
-	vf_params.b_legacy = b_legacy_vf;
+	vf_params.vf_legacy = vf_legacy;
 	vf_params.qid_usage_idx = qid_usage_idx;
 
 	p_cid = ecore_eth_queue_to_cid(p_hwfn, vf->opaque_fid,
-				       &params, &vf_params);
+				       &params, true, &vf_params);
 	if (p_cid == OSAL_NULL)
 		goto out;
 
 	/* Legacy VFs have their Producers in a different location, which they
 	 * calculate on their own and clean the producer prior to this.
 	 */
-	if (!b_legacy_vf)
+	if (!(vf_legacy & ECORE_QCID_LEGACY_VF_RX_PROD))
 		REG_WR(p_hwfn,
 		       GTT_BAR0_MAP_REG_MSDM_RAM +
 		       MSTORM_ETH_VF_PRODS_OFFSET(vf->abs_vf_id, req->rx_qid),
@@ -2254,7 +2365,8 @@ static void ecore_iov_vf_mbx_start_rxq(struct ecore_hwfn *p_hwfn,
 
 out:
 	ecore_iov_vf_mbx_start_rxq_resp(p_hwfn, p_ptt, vf, status,
-					b_legacy_vf);
+					!!(vf_legacy &
+					   ECORE_QCID_LEGACY_VF_RX_PROD));
 }
 
 static void
@@ -2382,7 +2494,7 @@ static void ecore_iov_vf_mbx_update_tunn_param(struct ecore_hwfn *p_hwfn,
 	if (b_update_required) {
 		u16 geneve_port;
 
-		rc = ecore_sp_pf_update_tunn_cfg(p_hwfn, &tunn,
+		rc = ecore_sp_pf_update_tunn_cfg(p_hwfn, p_ptt, &tunn,
 						 ECORE_SPQ_MODE_EBLOCK,
 						 OSAL_NULL);
 		if (rc != ECORE_SUCCESS)
@@ -2397,11 +2509,11 @@ static void ecore_iov_vf_mbx_update_tunn_param(struct ecore_hwfn *p_hwfn,
 	}
 
 send_resp:
-	p_resp = ecore_add_tlv(p_hwfn, &mbx->offset,
+	p_resp = ecore_add_tlv(&mbx->offset,
 			       CHANNEL_TLV_UPDATE_TUNN_PARAM, sizeof(*p_resp));
 
 	ecore_iov_pf_update_tun_response(p_resp, p_tun, tunn_feature_mask);
-	ecore_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
+	ecore_add_tlv(&mbx->offset, CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
 	ecore_iov_send_response(p_hwfn, p_ptt, p_vf, sizeof(*p_resp), status);
@@ -2433,9 +2545,8 @@ static void ecore_iov_vf_mbx_start_txq_resp(struct ecore_hwfn *p_hwfn,
 	else
 		length = sizeof(struct pfvf_def_resp_tlv);
 
-	p_tlv = ecore_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_TXQ,
-			      length);
-	ecore_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
+	p_tlv = ecore_add_tlv(&mbx->offset, CHANNEL_TLV_START_TXQ, length);
+	ecore_add_tlv(&mbx->offset, CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
 	/* Update the TLV with the response */
@@ -2456,8 +2567,8 @@ static void ecore_iov_vf_mbx_start_txq(struct ecore_hwfn *p_hwfn,
 	struct ecore_vf_queue *p_queue;
 	struct vfpf_start_txq_tlv *req;
 	struct ecore_queue_cid *p_cid;
-	bool b_legacy_vf = false;
-	u8 qid_usage_idx;
+	struct ecore_sb_info sb_dummy;
+	u8 qid_usage_idx, vf_legacy;
 	u32 cid = 0;
 	enum _ecore_status_t rc;
 	u16 pq;
@@ -2470,39 +2581,35 @@ static void ecore_iov_vf_mbx_start_txq(struct ecore_hwfn *p_hwfn,
 	    !ecore_iov_validate_sb(p_hwfn, vf, req->hw_sb))
 		goto out;
 
-	/* In case this is a legacy VF - need to know to use the right cids.
-	 * TODO - need to validate that there was no official release post
-	 * the current legacy scheme that still made that assumption.
-	 */
-	if (vf->acquire.vfdev_info.eth_fp_hsi_minor ==
-	    ETH_HSI_VER_NO_PKT_LEN_TUNN)
-		b_legacy_vf = true;
+	qid_usage_idx = ecore_iov_vf_mbx_qid(p_hwfn, vf, true);
+	if (qid_usage_idx == ECORE_IOV_QID_INVALID)
+		goto out;
 
-	/* Acquire a new queue-cid */
 	p_queue = &vf->vf_queues[req->tx_qid];
+	if (p_queue->cids[qid_usage_idx].p_cid)
+		goto out;
+
+	vf_legacy = ecore_vf_calculate_legacy(vf);
 
+	/* Acquire a new queue-cid */
 	params.queue_id = p_queue->fw_tx_qid;
 	params.vport_id = vf->vport_id;
 	params.stats_id = vf->abs_vf_id + 0x10;
-	params.sb = req->hw_sb;
-	params.sb_idx = req->sb_index;
-
-	/* TODO - set qid_usage_idx according to extended TLV. For now, use
-	 * '1' for Tx.
-	 */
-	qid_usage_idx = 1;
 
-	if (p_queue->cids[qid_usage_idx].p_cid)
-		goto out;
+	/* Since IGU index is passed via sb_info, construct a dummy one */
+	OSAL_MEM_ZERO(&sb_dummy, sizeof(sb_dummy));
+	sb_dummy.igu_sb_id = req->hw_sb;
+	params.p_sb = &sb_dummy;
+	params.sb_idx = req->sb_index;
 
 	OSAL_MEM_ZERO(&vf_params, sizeof(vf_params));
 	vf_params.vfid = vf->relative_vf_id;
 	vf_params.vf_qid = (u8)req->tx_qid;
-	vf_params.b_legacy = b_legacy_vf;
+	vf_params.vf_legacy = vf_legacy;
 	vf_params.qid_usage_idx = qid_usage_idx;
 
 	p_cid = ecore_eth_queue_to_cid(p_hwfn, vf->opaque_fid,
-				       &params, &vf_params);
+				       &params, false, &vf_params);
 	if (p_cid == OSAL_NULL)
 		goto out;
 
@@ -2528,80 +2635,74 @@ out:
 static enum _ecore_status_t ecore_iov_vf_stop_rxqs(struct ecore_hwfn *p_hwfn,
 						   struct ecore_vf_info *vf,
 						   u16 rxq_id,
-						   u8 num_rxqs,
+						   u8 qid_usage_idx,
 						   bool cqe_completion)
 {
+	struct ecore_vf_queue *p_queue;
 	enum _ecore_status_t rc = ECORE_SUCCESS;
-	int qid, i;
 
-	/* TODO - improve validation [wrap around] */
-	if (rxq_id + num_rxqs > OSAL_ARRAY_SIZE(vf->vf_queues))
+	if (!ecore_iov_validate_rxq(p_hwfn, vf, rxq_id,
+				    ECORE_IOV_VALIDATE_Q_NA)) {
+		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+			   "VF[%d] Tried Closing Rx 0x%04x.%02x which is inactive\n",
+			   vf->relative_vf_id, rxq_id, qid_usage_idx);
 		return ECORE_INVAL;
+	}
 
-	for (qid = rxq_id; qid < rxq_id + num_rxqs; qid++) {
-		struct ecore_vf_queue *p_queue = &vf->vf_queues[qid];
-		struct ecore_queue_cid **pp_cid = OSAL_NULL;
+	p_queue = &vf->vf_queues[rxq_id];
 
-		/* There can be at most a single Rx per qzone. Find it */
-		for (i = 0; i < MAX_QUEUES_PER_QZONE; i++) {
-			if (p_queue->cids[i].p_cid &&
-			    !p_queue->cids[i].b_is_tx) {
-				pp_cid = &p_queue->cids[i].p_cid;
-				break;
-			}
-		}
-		if (pp_cid == OSAL_NULL) {
-			DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
-				   "Ignoring VF[%02x] request of closing Rx queue %04x - closed\n",
-				   vf->relative_vf_id, qid);
-			continue;
-		}
-
-		rc = ecore_eth_rx_queue_stop(p_hwfn, *pp_cid,
-					     false, cqe_completion);
-		if (rc != ECORE_SUCCESS)
-			return rc;
+	/* We've validated the index and the existence of the active RXQ -
+	 * now we need to make sure that it's using the correct qid.
+	 */
+	if (!p_queue->cids[qid_usage_idx].p_cid ||
+	    p_queue->cids[qid_usage_idx].b_is_tx) {
+		struct ecore_queue_cid *p_cid;
 
-		*pp_cid = OSAL_NULL;
-		vf->num_active_rxqs--;
+		p_cid = ecore_iov_get_vf_rx_queue_cid(p_queue);
+		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+			   "VF[%d] - Tried Closing Rx 0x%04x.%02x, but Rx is at %04x.%02x\n",
+			    vf->relative_vf_id, rxq_id, qid_usage_idx,
+			    rxq_id, p_cid->qid_usage_idx);
+		return ECORE_INVAL;
 	}
 
-	return rc;
+	/* Now that we know we have a valid Rx-queue - close it */
+	rc = ecore_eth_rx_queue_stop(p_hwfn,
+				     p_queue->cids[qid_usage_idx].p_cid,
+				     false, cqe_completion);
+	if (rc != ECORE_SUCCESS)
+		return rc;
+
+	p_queue->cids[qid_usage_idx].p_cid = OSAL_NULL;
+	vf->num_active_rxqs--;
+
+	return ECORE_SUCCESS;
 }
 
 static enum _ecore_status_t ecore_iov_vf_stop_txqs(struct ecore_hwfn *p_hwfn,
 						   struct ecore_vf_info *vf,
-						   u16 txq_id, u8 num_txqs)
+						   u16 txq_id,
+						   u8 qid_usage_idx)
 {
-	enum _ecore_status_t rc = ECORE_SUCCESS;
 	struct ecore_vf_queue *p_queue;
-	int qid, j;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
 
 	if (!ecore_iov_validate_txq(p_hwfn, vf, txq_id,
-				    ECORE_IOV_VALIDATE_Q_NA) ||
-	    !ecore_iov_validate_txq(p_hwfn, vf, txq_id + num_txqs,
 				    ECORE_IOV_VALIDATE_Q_NA))
 		return ECORE_INVAL;
 
-	for (qid = txq_id; qid < txq_id + num_txqs; qid++) {
-		p_queue = &vf->vf_queues[qid];
-		for (j = 0; j < MAX_QUEUES_PER_QZONE; j++) {
-			if (p_queue->cids[j].p_cid == OSAL_NULL)
-				continue;
-
-			if (!p_queue->cids[j].b_is_tx)
-				continue;
-
-			rc = ecore_eth_tx_queue_stop(p_hwfn,
-						     p_queue->cids[j].p_cid);
-			if (rc != ECORE_SUCCESS)
-				return rc;
+	p_queue = &vf->vf_queues[txq_id];
+	if (!p_queue->cids[qid_usage_idx].p_cid ||
+	    !p_queue->cids[qid_usage_idx].b_is_tx)
+		return ECORE_INVAL;
 
-			p_queue->cids[j].p_cid = OSAL_NULL;
-		}
-	}
+	rc = ecore_eth_tx_queue_stop(p_hwfn,
+				     p_queue->cids[qid_usage_idx].p_cid);
+	if (rc != ECORE_SUCCESS)
+		return rc;
 
-	return rc;
+	p_queue->cids[qid_usage_idx].p_cid = OSAL_NULL;
+	return ECORE_SUCCESS;
 }
 
 static void ecore_iov_vf_mbx_stop_rxqs(struct ecore_hwfn *p_hwfn,
@@ -2610,20 +2711,34 @@ static void ecore_iov_vf_mbx_stop_rxqs(struct ecore_hwfn *p_hwfn,
 {
 	u16 length = sizeof(struct pfvf_def_resp_tlv);
 	struct ecore_iov_vf_mbx *mbx = &vf->vf_mbx;
-	u8 status = PFVF_STATUS_SUCCESS;
+	u8 status = PFVF_STATUS_FAILURE;
 	struct vfpf_stop_rxqs_tlv *req;
+	u8 qid_usage_idx;
 	enum _ecore_status_t rc;
 
-	/* We give the option of starting from qid != 0, in this case we
-	 * need to make sure that qid + num_qs doesn't exceed the actual
-	 * amount of queues that exist.
+	/* Starting with CHANNEL_TLV_QID, it's assumed the 'num_rxqs'
+	 * would be one. Since no older ecore passed multiple queues
+	 * using this API, sanitize on the value.
 	 */
 	req = &mbx->req_virt->stop_rxqs;
-	rc = ecore_iov_vf_stop_rxqs(p_hwfn, vf, req->rx_qid,
-				    req->num_rxqs, req->cqe_completion);
-	if (rc)
-		status = PFVF_STATUS_FAILURE;
+	if (req->num_rxqs != 1) {
+		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+			   "Odd; VF[%d] tried stopping multiple Rx queues\n",
+			   vf->relative_vf_id);
+		status = PFVF_STATUS_NOT_SUPPORTED;
+		goto out;
+	}
+
+	/* Find which qid-index is associated with the queue */
+	qid_usage_idx = ecore_iov_vf_mbx_qid(p_hwfn, vf, false);
+	if (qid_usage_idx == ECORE_IOV_QID_INVALID)
+		goto out;
 
+	rc = ecore_iov_vf_stop_rxqs(p_hwfn, vf, req->rx_qid,
+				    qid_usage_idx, req->cqe_completion);
+	if (rc == ECORE_SUCCESS)
+		status = PFVF_STATUS_SUCCESS;
+out:
 	ecore_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_STOP_RXQS,
 			       length, status);
 }
@@ -2634,19 +2749,35 @@ static void ecore_iov_vf_mbx_stop_txqs(struct ecore_hwfn *p_hwfn,
 {
 	u16 length = sizeof(struct pfvf_def_resp_tlv);
 	struct ecore_iov_vf_mbx *mbx = &vf->vf_mbx;
-	u8 status = PFVF_STATUS_SUCCESS;
+	u8 status = PFVF_STATUS_FAILURE;
 	struct vfpf_stop_txqs_tlv *req;
+	u8 qid_usage_idx;
 	enum _ecore_status_t rc;
 
-	/* We give the option of starting from qid != 0, in this case we
-	 * need to make sure that qid + num_qs doesn't exceed the actual
-	 * amount of queues that exist.
+	/* Starting with CHANNEL_TLV_QID, it's assumed the 'num_txqs'
+	 * would be one. Since no older ecore passed multiple queues
+	 * using this API, sanitize on the value.
 	 */
 	req = &mbx->req_virt->stop_txqs;
-	rc = ecore_iov_vf_stop_txqs(p_hwfn, vf, req->tx_qid, req->num_txqs);
-	if (rc)
-		status = PFVF_STATUS_FAILURE;
+	if (req->num_txqs != 1) {
+		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+			   "Odd; VF[%d] tried stopping multiple Tx queues\n",
+			   vf->relative_vf_id);
+		status = PFVF_STATUS_NOT_SUPPORTED;
+		goto out;
+	}
+
+	/* Find which qid-index is associated with the queue */
+	qid_usage_idx = ecore_iov_vf_mbx_qid(p_hwfn, vf, true);
+	if (qid_usage_idx == ECORE_IOV_QID_INVALID)
+		goto out;
 
+	rc = ecore_iov_vf_stop_txqs(p_hwfn, vf, req->tx_qid,
+				    qid_usage_idx);
+	if (rc == ECORE_SUCCESS)
+		status = PFVF_STATUS_SUCCESS;
+
+out:
 	ecore_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_STOP_TXQS,
 			       length, status);
 }
@@ -2662,6 +2793,7 @@ static void ecore_iov_vf_mbx_update_rxqs(struct ecore_hwfn *p_hwfn,
 	u8 status = PFVF_STATUS_FAILURE;
 	u8 complete_event_flg;
 	u8 complete_cqe_flg;
+	u8 qid_usage_idx;
 	enum _ecore_status_t rc;
 	u16 i;
 
@@ -2669,10 +2801,30 @@ static void ecore_iov_vf_mbx_update_rxqs(struct ecore_hwfn *p_hwfn,
 	complete_cqe_flg = !!(req->flags & VFPF_RXQ_UPD_COMPLETE_CQE_FLAG);
 	complete_event_flg = !!(req->flags & VFPF_RXQ_UPD_COMPLETE_EVENT_FLAG);
 
-	/* Validate inputs */
+	qid_usage_idx = ecore_iov_vf_mbx_qid(p_hwfn, vf, false);
+	if (qid_usage_idx == ECORE_IOV_QID_INVALID)
+		goto out;
+
+	/* Starting with the addition of CHANNEL_TLV_QID, this API started
+	 * expecting a single queue at a time. Validate this.
+	 */
+	if ((vf->acquire.vfdev_info.capabilities &
+	     VFPF_ACQUIRE_CAP_QUEUE_QIDS) &&
+	     req->num_rxqs != 1) {
+		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+			   "VF[%d] supports QIDs but sends multiple queues\n",
+			   vf->relative_vf_id);
+		goto out;
+	}
+
+	/* Validate inputs - for the legacy case this is still true since
+	 * qid_usage_idx for each Rx queue would be LEGACY_QID_RX.
+	 */
 	for (i = req->rx_qid; i < req->rx_qid + req->num_rxqs; i++) {
 		if (!ecore_iov_validate_rxq(p_hwfn, vf, i,
-					    ECORE_IOV_VALIDATE_Q_ENABLE)) {
+					    ECORE_IOV_VALIDATE_Q_NA) ||
+		    !vf->vf_queues[i].cids[qid_usage_idx].p_cid ||
+		    vf->vf_queues[i].cids[qid_usage_idx].b_is_tx) {
 			DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
 				   "VF[%d]: Incorrect Rxqs [%04x, %02x]\n",
 				   vf->relative_vf_id, req->rx_qid,
@@ -2682,12 +2834,9 @@ static void ecore_iov_vf_mbx_update_rxqs(struct ecore_hwfn *p_hwfn,
 	}
 
 	for (i = 0; i < req->num_rxqs; i++) {
-		struct ecore_vf_queue *p_queue;
 		u16 qid = req->rx_qid + i;
 
-		p_queue = &vf->vf_queues[qid];
-		handlers[i] = ecore_iov_get_vf_rx_queue_cid(p_hwfn, vf,
-							    p_queue);
+		handlers[i] = vf->vf_queues[qid].cids[qid_usage_idx].p_cid;
 	}
 
 	rc = ecore_sp_eth_rx_queues_update(p_hwfn, (void **)&handlers,
@@ -2696,7 +2845,7 @@ static void ecore_iov_vf_mbx_update_rxqs(struct ecore_hwfn *p_hwfn,
 					   complete_event_flg,
 					   ECORE_SPQ_MODE_EBLOCK,
 					   OSAL_NULL);
-	if (rc)
+	if (rc != ECORE_SUCCESS)
 		goto out;
 
 	status = PFVF_STATUS_SUCCESS;
@@ -2931,8 +3080,7 @@ ecore_iov_vp_update_rss_param(struct ecore_hwfn *p_hwfn,
 			goto out;
 		}
 
-		p_cid = ecore_iov_get_vf_rx_queue_cid(p_hwfn, vf,
-						      &vf->vf_queues[q_idx]);
+		p_cid = ecore_iov_get_vf_rx_queue_cid(&vf->vf_queues[q_idx]);
 		p_rss->rss_ind_table[i] = p_cid;
 	}
 
@@ -2945,7 +3093,6 @@ out:
 
 static void
 ecore_iov_vp_update_sge_tpa_param(struct ecore_hwfn *p_hwfn,
-				  struct ecore_vf_info *vf,
 				  struct ecore_sp_vport_update_params *p_data,
 				  struct ecore_sge_tpa_params *p_sge_tpa,
 				  struct ecore_iov_vf_mbx *p_mbx,
@@ -3035,7 +3182,7 @@ static void ecore_iov_vf_mbx_vport_update(struct ecore_hwfn *p_hwfn,
 	ecore_iov_vp_update_mcast_bin_param(p_hwfn, &params, mbx, &tlvs_mask);
 	ecore_iov_vp_update_accept_flag(p_hwfn, &params, mbx, &tlvs_mask);
 	ecore_iov_vp_update_accept_any_vlan(p_hwfn, &params, mbx, &tlvs_mask);
-	ecore_iov_vp_update_sge_tpa_param(p_hwfn, vf, &params,
+	ecore_iov_vp_update_sge_tpa_param(p_hwfn, &params,
 					  &sge_tpa_params, mbx, &tlvs_mask);
 
 	tlvs_accepted = tlvs_mask;
@@ -3066,8 +3213,8 @@ static void ecore_iov_vf_mbx_vport_update(struct ecore_hwfn *p_hwfn,
 				   "Upper-layer prevents said VF"
 				   " configuration\n");
 		else
-			DP_NOTICE(p_hwfn, true,
-				  "No feature tlvs found for vport update\n");
+			DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+				   "No feature tlvs found for vport update\n");
 		status = PFVF_STATUS_NOT_SUPPORTED;
 		goto out;
 	}
@@ -3272,12 +3419,13 @@ static void ecore_iov_vf_mbx_ucast_filter(struct ecore_hwfn *p_hwfn,
 		goto out;
 	}
 
-	/* Update shadow copy of the VF configuration */
+	/* Update shadow copy of the VF configuration. In case shadow indicates
+	 * the action should be blocked return success to VF to imitate the
+	 * firmware behaviour in such case.
+	 */
 	if (ecore_iov_vf_update_unicast_shadow(p_hwfn, vf, &params) !=
-	    ECORE_SUCCESS) {
-		status = PFVF_STATUS_FAILURE;
+	    ECORE_SUCCESS)
 		goto out;
-	}
 
 	/* Determine if the unicast filtering is acceptible by PF */
 	if ((p_bulletin->valid_bitmap & (1 << VLAN_ADDR_FORCED)) &&
@@ -3382,6 +3530,76 @@ static void ecore_iov_vf_mbx_release(struct ecore_hwfn *p_hwfn,
 			       length, status);
 }
 
+static void ecore_iov_vf_pf_get_coalesce(struct ecore_hwfn *p_hwfn,
+					 struct ecore_ptt *p_ptt,
+					 struct ecore_vf_info *p_vf)
+{
+	struct ecore_iov_vf_mbx *mbx = &p_vf->vf_mbx;
+	struct pfvf_read_coal_resp_tlv *p_resp;
+	struct vfpf_read_coal_req_tlv *req;
+	u8 status = PFVF_STATUS_FAILURE;
+	struct ecore_vf_queue *p_queue;
+	struct ecore_queue_cid *p_cid;
+	enum _ecore_status_t rc = ECORE_SUCCESS;
+	u16 coal = 0, qid, i;
+	bool b_is_rx;
+
+	mbx->offset = (u8 *)mbx->reply_virt;
+	req = &mbx->req_virt->read_coal_req;
+
+	qid = req->qid;
+	b_is_rx = req->is_rx ? true : false;
+
+	if (b_is_rx) {
+		if (!ecore_iov_validate_rxq(p_hwfn, p_vf, qid,
+					    ECORE_IOV_VALIDATE_Q_ENABLE)) {
+			DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+				   "VF[%d]: Invalid Rx queue_id = %d\n",
+				   p_vf->abs_vf_id, qid);
+			goto send_resp;
+		}
+
+		p_cid = ecore_iov_get_vf_rx_queue_cid(&p_vf->vf_queues[qid]);
+		rc = ecore_get_rxq_coalesce(p_hwfn, p_ptt, p_cid, &coal);
+		if (rc != ECORE_SUCCESS)
+			goto send_resp;
+	} else {
+		if (!ecore_iov_validate_txq(p_hwfn, p_vf, qid,
+					    ECORE_IOV_VALIDATE_Q_ENABLE)) {
+			DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+				   "VF[%d]: Invalid Tx queue_id = %d\n",
+				   p_vf->abs_vf_id, qid);
+			goto send_resp;
+		}
+		for (i = 0; i < MAX_QUEUES_PER_QZONE; i++) {
+			p_queue = &p_vf->vf_queues[qid];
+			if ((p_queue->cids[i].p_cid == OSAL_NULL) ||
+			    (!p_queue->cids[i].b_is_tx))
+				continue;
+
+			p_cid = p_queue->cids[i].p_cid;
+
+			rc = ecore_get_txq_coalesce(p_hwfn, p_ptt,
+						    p_cid, &coal);
+			if (rc != ECORE_SUCCESS)
+				goto send_resp;
+			break;
+		}
+	}
+
+	status = PFVF_STATUS_SUCCESS;
+
+send_resp:
+	p_resp = ecore_add_tlv(&mbx->offset, CHANNEL_TLV_COALESCE_READ,
+			       sizeof(*p_resp));
+	p_resp->coal = coal;
+
+	ecore_add_tlv(&mbx->offset, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	ecore_iov_send_response(p_hwfn, p_ptt, p_vf, sizeof(*p_resp), status);
+}
+
 static void ecore_iov_vf_pf_set_coalesce(struct ecore_hwfn *p_hwfn,
 					 struct ecore_ptt *p_ptt,
 					 struct ecore_vf_info *vf)
@@ -3422,8 +3640,7 @@ static void ecore_iov_vf_pf_set_coalesce(struct ecore_hwfn *p_hwfn,
 		   vf->abs_vf_id, rx_coal, tx_coal, qid);
 
 	if (rx_coal) {
-		p_cid = ecore_iov_get_vf_rx_queue_cid(p_hwfn, vf,
-						      &vf->vf_queues[qid]);
+		p_cid = ecore_iov_get_vf_rx_queue_cid(&vf->vf_queues[qid]);
 
 		rc = ecore_set_rxq_coalesce(p_hwfn, p_ptt, rx_coal, p_cid);
 		if (rc != ECORE_SUCCESS) {
@@ -3432,6 +3649,7 @@ static void ecore_iov_vf_pf_set_coalesce(struct ecore_hwfn *p_hwfn,
 				   vf->abs_vf_id, vf->vf_queues[qid].fw_rx_qid);
 			goto out;
 		}
+		vf->rx_coal = rx_coal;
 	}
 
 	/* TODO - in future, it might be possible to pass this in a per-cid
@@ -3456,6 +3674,7 @@ static void ecore_iov_vf_pf_set_coalesce(struct ecore_hwfn *p_hwfn,
 				goto out;
 			}
 		}
+		vf->tx_coal = tx_coal;
 	}
 
 	status = PFVF_STATUS_SUCCESS;
@@ -3464,6 +3683,92 @@ out:
 			       sizeof(struct pfvf_def_resp_tlv), status);
 }
 
+enum _ecore_status_t
+ecore_iov_pf_configure_vf_queue_coalesce(struct ecore_hwfn *p_hwfn,
+					 u16 rx_coal, u16 tx_coal,
+					 u16 vf_id, u16 qid)
+{
+	struct ecore_queue_cid *p_cid;
+	struct ecore_vf_info *vf;
+	struct ecore_ptt *p_ptt;
+	int i, rc = 0;
+
+	if (!ecore_iov_is_valid_vfid(p_hwfn, vf_id, true, true)) {
+		DP_NOTICE(p_hwfn, true,
+			  "VF[%d] - Can not set coalescing: VF is not active\n",
+			  vf_id);
+		return ECORE_INVAL;
+	}
+
+	vf = &p_hwfn->pf_iov_info->vfs_array[vf_id];
+	p_ptt = ecore_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return ECORE_AGAIN;
+
+	if (!ecore_iov_validate_rxq(p_hwfn, vf, qid,
+				    ECORE_IOV_VALIDATE_Q_ENABLE) &&
+	    rx_coal) {
+		DP_ERR(p_hwfn, "VF[%d]: Invalid Rx queue_id = %d\n",
+		       vf->abs_vf_id, qid);
+		goto out;
+	}
+
+	if (!ecore_iov_validate_txq(p_hwfn, vf, qid,
+				    ECORE_IOV_VALIDATE_Q_ENABLE) &&
+	    tx_coal) {
+		DP_ERR(p_hwfn, "VF[%d]: Invalid Tx queue_id = %d\n",
+		       vf->abs_vf_id, qid);
+		goto out;
+	}
+
+	DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+		   "VF[%d]: Setting coalesce for VF rx_coal = %d, tx_coal = %d at queue = %d\n",
+		   vf->abs_vf_id, rx_coal, tx_coal, qid);
+
+	if (rx_coal) {
+		p_cid = ecore_iov_get_vf_rx_queue_cid(&vf->vf_queues[qid]);
+
+		rc = ecore_set_rxq_coalesce(p_hwfn, p_ptt, rx_coal, p_cid);
+		if (rc != ECORE_SUCCESS) {
+			DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+				   "VF[%d]: Unable to set rx queue = %d coalesce\n",
+				   vf->abs_vf_id, vf->vf_queues[qid].fw_rx_qid);
+			goto out;
+		}
+		vf->rx_coal = rx_coal;
+	}
+
+	/* TODO - in future, it might be possible to pass this in a per-cid
+	 * granularity. For now, do this for all Tx queues.
+	 */
+	if (tx_coal) {
+		struct ecore_vf_queue *p_queue = &vf->vf_queues[qid];
+
+		for (i = 0; i < MAX_QUEUES_PER_QZONE; i++) {
+			if (p_queue->cids[i].p_cid == OSAL_NULL)
+				continue;
+
+			if (!p_queue->cids[i].b_is_tx)
+				continue;
+
+			rc = ecore_set_txq_coalesce(p_hwfn, p_ptt, tx_coal,
+						    p_queue->cids[i].p_cid);
+			if (rc != ECORE_SUCCESS) {
+				DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+					   "VF[%d]: Unable to set tx queue coalesce\n",
+					   vf->abs_vf_id);
+				goto out;
+			}
+		}
+		vf->tx_coal = tx_coal;
+	}
+
+out:
+	ecore_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
 static enum _ecore_status_t
 ecore_iov_vf_flr_poll_dorq(struct ecore_hwfn *p_hwfn,
 			   struct ecore_vf_info *p_vf, struct ecore_ptt *p_ptt)
@@ -3495,11 +3800,11 @@ static enum _ecore_status_t
 ecore_iov_vf_flr_poll_pbf(struct ecore_hwfn *p_hwfn,
 			  struct ecore_vf_info *p_vf, struct ecore_ptt *p_ptt)
 {
-	u32 cons[MAX_NUM_VOQS], distance[MAX_NUM_VOQS];
+	u32 cons[MAX_NUM_VOQS_E4], distance[MAX_NUM_VOQS_E4];
 	int i, cnt;
 
 	/* Read initial consumers & producers */
-	for (i = 0; i < MAX_NUM_VOQS; i++) {
+	for (i = 0; i < MAX_NUM_VOQS_E4; i++) {
 		u32 prod;
 
 		cons[i] = ecore_rd(p_hwfn, p_ptt,
@@ -3514,7 +3819,7 @@ ecore_iov_vf_flr_poll_pbf(struct ecore_hwfn *p_hwfn,
 	/* Wait for consumers to pass the producers */
 	i = 0;
 	for (cnt = 0; cnt < 50; cnt++) {
-		for (; i < MAX_NUM_VOQS; i++) {
+		for (; i < MAX_NUM_VOQS_E4; i++) {
 			u32 tmp;
 
 			tmp = ecore_rd(p_hwfn, p_ptt,
@@ -3524,7 +3829,7 @@ ecore_iov_vf_flr_poll_pbf(struct ecore_hwfn *p_hwfn,
 				break;
 		}
 
-		if (i == MAX_NUM_VOQS)
+		if (i == MAX_NUM_VOQS_E4)
 			break;
 
 		OSAL_MSLEEP(20);
@@ -3623,8 +3928,7 @@ cleanup:
 		ack_vfs[vfid / 32] |= (1 << (vfid % 32));
 		p_hwfn->pf_iov_info->pending_flr[rel_vf_id / 64] &=
 		    ~(1ULL << (rel_vf_id % 64));
-		p_hwfn->pf_iov_info->pending_events[rel_vf_id / 64] &=
-		    ~(1ULL << (rel_vf_id % 64));
+		p_vf->vf_mbx.b_pending_msg = false;
 	}
 
 	return rc;
@@ -3734,11 +4038,11 @@ void ecore_iov_get_link(struct ecore_hwfn *p_hwfn,
 	p_bulletin = p_vf->bulletin.p_virt;
 
 	if (p_params)
-		__ecore_vf_get_link_params(p_hwfn, p_params, p_bulletin);
+		__ecore_vf_get_link_params(p_params, p_bulletin);
 	if (p_link)
-		__ecore_vf_get_link_state(p_hwfn, p_link, p_bulletin);
+		__ecore_vf_get_link_state(p_link, p_bulletin);
 	if (p_caps)
-		__ecore_vf_get_link_caps(p_hwfn, p_caps, p_bulletin);
+		__ecore_vf_get_link_caps(p_caps, p_bulletin);
 }
 
 void ecore_iov_process_mbx_req(struct ecore_hwfn *p_hwfn,
@@ -3754,12 +4058,22 @@ void ecore_iov_process_mbx_req(struct ecore_hwfn *p_hwfn,
 	mbx = &p_vf->vf_mbx;
 
 	/* ecore_iov_process_mbx_request */
-	DP_VERBOSE(p_hwfn,
-		   ECORE_MSG_IOV,
-		   "VF[%02x]: Processing mailbox message\n", p_vf->abs_vf_id);
+#ifndef CONFIG_ECORE_SW_CHANNEL
+	if (!mbx->b_pending_msg) {
+		DP_NOTICE(p_hwfn, true,
+			  "VF[%02x]: Trying to process mailbox message when none is pending\n",
+			  p_vf->abs_vf_id);
+		return;
+	}
+	mbx->b_pending_msg = false;
+#endif
 
 	mbx->first_tlv = mbx->req_virt->first_tlv;
 
+	DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+		   "VF[%02x]: Processing mailbox message [type %04x]\n",
+		   p_vf->abs_vf_id, mbx->first_tlv.tl.type);
+
 	OSAL_IOV_VF_MSG_TYPE(p_hwfn,
 			     p_vf->relative_vf_id,
 			     mbx->first_tlv.tl.type);
@@ -3820,6 +4134,9 @@ void ecore_iov_process_mbx_req(struct ecore_hwfn *p_hwfn,
 		case CHANNEL_TLV_COALESCE_UPDATE:
 			ecore_iov_vf_pf_set_coalesce(p_hwfn, p_ptt, p_vf);
 			break;
+		case CHANNEL_TLV_COALESCE_READ:
+			ecore_iov_vf_pf_get_coalesce(p_hwfn, p_ptt, p_vf);
+			break;
 		}
 	} else if (ecore_iov_tlv_supported(mbx->first_tlv.tl.type)) {
 		/* If we've received a message from a VF we consider malicious
@@ -3884,26 +4201,20 @@ void ecore_iov_process_mbx_req(struct ecore_hwfn *p_hwfn,
 #endif
 }
 
-void ecore_iov_pf_add_pending_events(struct ecore_hwfn *p_hwfn, u8 vfid)
+void ecore_iov_pf_get_pending_events(struct ecore_hwfn *p_hwfn,
+				     u64 *events)
 {
-	u64 add_bit = 1ULL << (vfid % 64);
+	int i;
 
-	/* TODO - add locking mechanisms [no atomics in ecore, so we can't
-	* add the lock inside the ecore_pf_iov struct].
-	*/
-	p_hwfn->pf_iov_info->pending_events[vfid / 64] |= add_bit;
-}
+	OSAL_MEM_ZERO(events, sizeof(u64) * ECORE_VF_ARRAY_LENGTH);
 
-void ecore_iov_pf_get_and_clear_pending_events(struct ecore_hwfn *p_hwfn,
-					       u64 *events)
-{
-	u64 *p_pending_events = p_hwfn->pf_iov_info->pending_events;
+	ecore_for_each_vf(p_hwfn, i) {
+		struct ecore_vf_info *p_vf;
 
-	/* TODO - Take a lock */
-	OSAL_MEMCPY(events, p_pending_events,
-		    sizeof(u64) * ECORE_VF_ARRAY_LENGTH);
-	OSAL_MEMSET(p_pending_events, 0,
-		    sizeof(u64) * ECORE_VF_ARRAY_LENGTH);
+		p_vf = &p_hwfn->pf_iov_info->vfs_array[i];
+		if (p_vf->vf_mbx.b_pending_msg)
+			events[i / 64] |= 1ULL << (i % 64);
+	}
 }
 
 static struct ecore_vf_info *
@@ -3937,6 +4248,8 @@ static enum _ecore_status_t ecore_sriov_vfpf_msg(struct ecore_hwfn *p_hwfn,
 	 */
 	p_vf->vf_mbx.pending_req = (((u64)vf_msg->hi) << 32) | vf_msg->lo;
 
+	p_vf->vf_mbx.b_pending_msg = true;
+
 	return OSAL_PF_VF_MSG(p_hwfn, p_vf->relative_vf_id);
 }
 
@@ -3945,24 +4258,31 @@ static void ecore_sriov_vfpf_malicious(struct ecore_hwfn *p_hwfn,
 {
 	struct ecore_vf_info *p_vf;
 
-	p_vf = ecore_sriov_get_vf_from_absid(p_hwfn, p_data->vfId);
+	p_vf = ecore_sriov_get_vf_from_absid(p_hwfn, p_data->vf_id);
 
 	if (!p_vf)
 		return;
 
-	DP_INFO(p_hwfn,
-		"VF [%d] - Malicious behavior [%02x]\n",
-		p_vf->abs_vf_id, p_data->errId);
+	if (!p_vf->b_malicious) {
+		DP_NOTICE(p_hwfn, false,
+			  "VF [%d] - Malicious behavior [%02x]\n",
+			  p_vf->abs_vf_id, p_data->err_id);
 
-	p_vf->b_malicious = true;
+		p_vf->b_malicious = true;
+	} else {
+		DP_INFO(p_hwfn,
+			"VF [%d] - Malicious behavior [%02x]\n",
+			p_vf->abs_vf_id, p_data->err_id);
+	}
 
 	OSAL_PF_VF_MALICIOUS(p_hwfn, p_vf->relative_vf_id);
 }
 
-enum _ecore_status_t ecore_sriov_eqe_event(struct ecore_hwfn *p_hwfn,
-					   u8 opcode,
-					   __le16 echo,
-					   union event_ring_data *data)
+static enum _ecore_status_t ecore_sriov_eqe_event(struct ecore_hwfn *p_hwfn,
+						  u8 opcode,
+						  __le16 echo,
+						  union event_ring_data *data,
+						  u8 OSAL_UNUSED fw_return_code)
 {
 	switch (opcode) {
 	case COMMON_EVENT_VF_PF_CHANNEL:
@@ -4001,7 +4321,7 @@ u16 ecore_iov_get_next_active_vf(struct ecore_hwfn *p_hwfn, u16 rel_vf_id)
 			return i;
 
 out:
-	return E4_MAX_NUM_VFS;
+	return MAX_NUM_VFS_E4;
 }
 
 enum _ecore_status_t ecore_iov_copy_vf_msg(struct ecore_hwfn *p_hwfn,
@@ -4370,6 +4690,7 @@ enum _ecore_status_t ecore_iov_configure_tx_rate(struct ecore_hwfn *p_hwfn,
 						 struct ecore_ptt *p_ptt,
 						 int vfid, int val)
 {
+	struct ecore_mcp_link_state *p_link;
 	struct ecore_vf_info *vf;
 	u8 abs_vp_id = 0;
 	enum _ecore_status_t rc;
@@ -4383,7 +4704,10 @@ enum _ecore_status_t ecore_iov_configure_tx_rate(struct ecore_hwfn *p_hwfn,
 	if (rc != ECORE_SUCCESS)
 		return rc;
 
-	return ecore_init_vport_rl(p_hwfn, p_ptt, abs_vp_id, (u32)val);
+	p_link = &ECORE_LEADING_HWFN(p_hwfn->p_dev)->mcp_info->link_output;
+
+	return ecore_init_vport_rl(p_hwfn, p_ptt, abs_vp_id, (u32)val,
+				   p_link->speed);
 }
 
 enum _ecore_status_t ecore_iov_get_vf_stats(struct ecore_hwfn *p_hwfn,
@@ -4513,3 +4837,17 @@ ecore_iov_get_vf_min_rate(struct ecore_hwfn *p_hwfn, int vfid)
 	else
 		return 0;
 }
+
+#ifdef CONFIG_ECORE_SW_CHANNEL
+void ecore_iov_set_vf_hw_channel(struct ecore_hwfn *p_hwfn, int vfid,
+				 bool b_is_hw)
+{
+	struct ecore_vf_info *vf_info;
+
+	vf_info = ecore_iov_get_vf_info(p_hwfn, (u16)vfid, true);
+	if (!vf_info)
+		return;
+
+	vf_info->b_hw_channel = b_is_hw;
+}
+#endif
diff --git a/drivers/net/qede/base/ecore_sriov.h b/drivers/net/qede/base/ecore_sriov.h
index 3c2f58bd..850b1052 100644
--- a/drivers/net/qede/base/ecore_sriov.h
+++ b/drivers/net/qede/base/ecore_sriov.h
@@ -16,7 +16,7 @@
 #include "ecore_l2.h"
 
 #define ECORE_ETH_MAX_VF_NUM_VLAN_FILTERS \
-	(E4_MAX_NUM_VFS * ECORE_ETH_VF_NUM_VLAN_FILTERS)
+	(MAX_NUM_VFS_E4 * ECORE_ETH_VF_NUM_VLAN_FILTERS)
 
 /* Represents a full message. Both the request filled by VF
  * and the response filled by the PF. The VF needs one copy
@@ -45,6 +45,9 @@ struct ecore_iov_vf_mbx {
 	/* Address in VF where a pending message is located */
 	dma_addr_t		pending_req;
 
+	/* Message from VF awaits handling */
+	bool			b_pending_msg;
+
 	u8 *offset;
 
 #ifdef CONFIG_ECORE_SW_CHANNEL
@@ -63,6 +66,10 @@ struct ecore_iov_vf_mbx {
 					 */
 };
 
+#define ECORE_IOV_LEGACY_QID_RX (0)
+#define ECORE_IOV_LEGACY_QID_TX (1)
+#define ECORE_IOV_QID_INVALID (0xFE)
+
 struct ecore_vf_queue_cid {
 	bool b_is_tx;
 	struct ecore_queue_cid *p_cid;
@@ -110,6 +117,11 @@ struct ecore_vf_info {
 	struct ecore_bulletin	bulletin;
 	dma_addr_t		vf_bulletin;
 
+#ifdef CONFIG_ECORE_SW_CHANNEL
+	/* Determine whether PF communicate with VF using HW/SW channel */
+	bool	b_hw_channel;
+#endif
+
 	/* PF saves a copy of the last VF acquire message */
 	struct vfpf_acquire_tlv acquire;
 
@@ -129,6 +141,9 @@ struct ecore_vf_info {
 	u8			num_rxqs;
 	u8			num_txqs;
 
+	u16			rx_coal;
+	u16			tx_coal;
+
 	u8			num_sbs;
 
 	u8			num_mac_filters;
@@ -160,8 +175,7 @@ struct ecore_vf_info {
  * capability enabled.
  */
 struct ecore_pf_iov {
-	struct ecore_vf_info	vfs_array[E4_MAX_NUM_VFS];
-	u64			pending_events[ECORE_VF_ARRAY_LENGTH];
+	struct ecore_vf_info	vfs_array[MAX_NUM_VFS_E4];
 	u64			pending_flr[ECORE_VF_ARRAY_LENGTH];
 
 #ifndef REMOVE_DBG
@@ -197,17 +211,13 @@ enum _ecore_status_t ecore_iov_hw_info(struct ecore_hwfn *p_hwfn);
 /**
  * @brief ecore_add_tlv - place a given tlv on the tlv buffer at next offset
  *
- * @param p_hwfn
- * @param p_iov
+ * @param offset
  * @param type
  * @param length
  *
  * @return pointer to the newly placed tlv
  */
-void *ecore_add_tlv(struct ecore_hwfn	*p_hwfn,
-		    u8			**offset,
-		    u16			type,
-		    u16			length);
+void *ecore_add_tlv(u8 **offset, u16 type, u16 length);
 
 /**
  * @brief list the types and lengths of the tlvs on the buffer
@@ -231,10 +241,8 @@ enum _ecore_status_t ecore_iov_alloc(struct ecore_hwfn *p_hwfn);
  * @brief ecore_iov_setup - setup sriov related resources
  *
  * @param p_hwfn
- * @param p_ptt
  */
-void ecore_iov_setup(struct ecore_hwfn	*p_hwfn,
-		     struct ecore_ptt	*p_ptt);
+void ecore_iov_setup(struct ecore_hwfn	*p_hwfn);
 
 /**
  * @brief ecore_iov_free - free sriov related resources
@@ -251,38 +259,12 @@ void ecore_iov_free(struct ecore_hwfn *p_hwfn);
 void ecore_iov_free_hw_info(struct ecore_dev *p_dev);
 
 /**
- * @brief ecore_sriov_eqe_event - handle async sriov event arrived on eqe.
- *
- * @param p_hwfn
- * @param opcode
- * @param echo
- * @param data
- */
-enum _ecore_status_t ecore_sriov_eqe_event(struct ecore_hwfn	 *p_hwfn,
-					   u8			 opcode,
-					   __le16		 echo,
-					   union event_ring_data *data);
-
-/**
- * @brief calculate CRC for bulletin board validation
- *
- * @param basic crc seed
- * @param ptr to beginning of buffer
- * @length in bytes of buffer
- *
- * @return calculated crc over buffer [with respect to seed].
- */
-u32 ecore_crc32(u32 crc,
-		u8  *ptr,
-		u32 length);
-
-/**
  * @brief Mark structs of vfs that have been FLR-ed.
  *
  * @param p_hwfn
  * @param disabled_vfs - bitmask of all VFs on path that were FLRed
  *
- * @return 1 iff one of the PF's vfs got FLRed. 0 otherwise.
+ * @return true iff one of the PF's vfs got FLRed. false otherwise.
  */
 bool ecore_iov_mark_vf_flr(struct ecore_hwfn *p_hwfn,
 			   u32 *disabled_vfs);
diff --git a/drivers/net/qede/base/ecore_vf.c b/drivers/net/qede/base/ecore_vf.c
index f4d331cf..25109dbd 100644
--- a/drivers/net/qede/base/ecore_vf.c
+++ b/drivers/net/qede/base/ecore_vf.c
@@ -44,7 +44,7 @@ static void *ecore_vf_pf_prep(struct ecore_hwfn *p_hwfn, u16 type, u16 length)
 	OSAL_MEMSET(p_iov->pf2vf_reply, 0, sizeof(union pfvf_tlvs));
 
 	/* Init type and length */
-	p_tlv = ecore_add_tlv(p_hwfn, &p_iov->offset, type, length);
+	p_tlv = ecore_add_tlv(&p_iov->offset, type, length);
 
 	/* Init first tlv header */
 	((struct vfpf_first_tlv *)p_tlv)->reply_address =
@@ -65,6 +65,14 @@ static void ecore_vf_pf_req_end(struct ecore_hwfn *p_hwfn,
 	OSAL_MUTEX_RELEASE(&p_hwfn->vf_iov_info->mutex);
 }
 
+#ifdef CONFIG_ECORE_SW_CHANNEL
+/* The SW channel implementation of Windows needs to know the 'exact'
+ * response size of any given message. That means that for future
+ * messages we'd be unable to send TLVs to PF if he'll be unable to
+ * answer them if the |response| != |default response|.
+ * We'd need to handshake in acquire capabilities for any such.
+ */
+#endif
 static enum _ecore_status_t
 ecore_send_msg2pf(struct ecore_hwfn *p_hwfn,
 		  u8 *done, u32 resp_size)
@@ -122,35 +130,118 @@ ecore_send_msg2pf(struct ecore_hwfn *p_hwfn,
 	}
 
 	if (!*done) {
-		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
-			   "VF <-- PF Timeout [Type %d]\n",
-			   p_req->first_tlv.tl.type);
+		DP_NOTICE(p_hwfn, true,
+			  "VF <-- PF Timeout [Type %d]\n",
+			  p_req->first_tlv.tl.type);
 		rc = ECORE_TIMEOUT;
 	} else {
-		DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
-			   "PF response: %d [Type %d]\n",
-			   *done, p_req->first_tlv.tl.type);
+		if ((*done != PFVF_STATUS_SUCCESS) &&
+		    (*done != PFVF_STATUS_NO_RESOURCE))
+			DP_NOTICE(p_hwfn, false,
+				  "PF response: %d [Type %d]\n",
+				  *done, p_req->first_tlv.tl.type);
+		else
+			DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+				   "PF response: %d [Type %d]\n",
+				   *done, p_req->first_tlv.tl.type);
+	}
+
+	return rc;
+}
+
+static void ecore_vf_pf_add_qid(struct ecore_hwfn *p_hwfn,
+				struct ecore_queue_cid *p_cid)
+{
+	struct ecore_vf_iov *p_iov = p_hwfn->vf_iov_info;
+	struct vfpf_qid_tlv *p_qid_tlv;
+
+	/* Only add QIDs for the queue if it was negotiated with PF */
+	if (!(p_iov->acquire_resp.pfdev_info.capabilities &
+	      PFVF_ACQUIRE_CAP_QUEUE_QIDS))
+		return;
+
+	p_qid_tlv = ecore_add_tlv(&p_iov->offset,
+				  CHANNEL_TLV_QID, sizeof(*p_qid_tlv));
+	p_qid_tlv->qid = p_cid->qid_usage_idx;
+}
+
+enum _ecore_status_t _ecore_vf_pf_release(struct ecore_hwfn *p_hwfn,
+					  bool b_final)
+{
+	struct ecore_vf_iov *p_iov = p_hwfn->vf_iov_info;
+	struct pfvf_def_resp_tlv *resp;
+	struct vfpf_first_tlv *req;
+	u32 size;
+	enum _ecore_status_t rc;
+
+	/* clear mailbox and prep first tlv */
+	req = ecore_vf_pf_prep(p_hwfn, CHANNEL_TLV_RELEASE, sizeof(*req));
+
+	/* add list termination tlv */
+	ecore_add_tlv(&p_iov->offset,
+		      CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+
+	resp = &p_iov->pf2vf_reply->default_resp;
+	rc = ecore_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
+
+	if (rc == ECORE_SUCCESS && resp->hdr.status != PFVF_STATUS_SUCCESS)
+		rc = ECORE_AGAIN;
+
+	ecore_vf_pf_req_end(p_hwfn, rc);
+	if (!b_final)
+		return rc;
+
+	p_hwfn->b_int_enabled = 0;
+
+	if (p_iov->vf2pf_request)
+		OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev,
+				       p_iov->vf2pf_request,
+				       p_iov->vf2pf_request_phys,
+				       sizeof(union vfpf_tlvs));
+	if (p_iov->pf2vf_reply)
+		OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev,
+				       p_iov->pf2vf_reply,
+				       p_iov->pf2vf_reply_phys,
+				       sizeof(union pfvf_tlvs));
+
+	if (p_iov->bulletin.p_virt) {
+		size = sizeof(struct ecore_bulletin_content);
+		OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev,
+				       p_iov->bulletin.p_virt,
+				       p_iov->bulletin.phys,
+				       size);
 	}
 
+#ifdef CONFIG_ECORE_LOCK_ALLOC
+	OSAL_MUTEX_DEALLOC(&p_iov->mutex);
+#endif
+
+	OSAL_FREE(p_hwfn->p_dev, p_hwfn->vf_iov_info);
+	p_hwfn->vf_iov_info = OSAL_NULL;
+
 	return rc;
 }
 
+enum _ecore_status_t ecore_vf_pf_release(struct ecore_hwfn *p_hwfn)
+{
+	return _ecore_vf_pf_release(p_hwfn, true);
+}
+
 #define VF_ACQUIRE_THRESH 3
 static void ecore_vf_pf_acquire_reduce_resc(struct ecore_hwfn *p_hwfn,
 					    struct vf_pf_resc_request *p_req,
 					    struct pf_vf_resc *p_resp)
 {
 	DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
-		   "PF unwilling to fullill resource request: rxq [%02x/%02x]"
-		   " txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x]"
-		   " vlan [%02x/%02x] mc [%02x/%02x]."
-		   " Try PF recommended amount\n",
+		   "PF unwilling to fullill resource request: rxq [%02x/%02x] txq [%02x/%02x] sbs [%02x/%02x] mac [%02x/%02x] vlan [%02x/%02x] mc [%02x/%02x] cids [%02x/%02x]. Try PF recommended amount\n",
 		   p_req->num_rxqs, p_resp->num_rxqs,
 		   p_req->num_rxqs, p_resp->num_txqs,
 		   p_req->num_sbs, p_resp->num_sbs,
 		   p_req->num_mac_filters, p_resp->num_mac_filters,
 		   p_req->num_vlan_filters, p_resp->num_vlan_filters,
-		   p_req->num_mc_filters, p_resp->num_mc_filters);
+		   p_req->num_mc_filters, p_resp->num_mc_filters,
+		   p_req->num_cids, p_resp->num_cids);
 
 	/* humble our request */
 	p_req->num_txqs = p_resp->num_txqs;
@@ -159,6 +250,7 @@ static void ecore_vf_pf_acquire_reduce_resc(struct ecore_hwfn *p_hwfn,
 	p_req->num_mac_filters = p_resp->num_mac_filters;
 	p_req->num_vlan_filters = p_resp->num_vlan_filters;
 	p_req->num_mc_filters = p_resp->num_mc_filters;
+	p_req->num_cids = p_resp->num_cids;
 }
 
 static enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn)
@@ -185,6 +277,7 @@ static enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn)
 	p_resc->num_sbs = ECORE_MAX_VF_CHAINS_PER_PF;
 	p_resc->num_mac_filters = ECORE_ETH_VF_NUM_MAC_FILTERS;
 	p_resc->num_vlan_filters = ECORE_ETH_VF_NUM_VLAN_FILTERS;
+	p_resc->num_cids = ECORE_ETH_VF_DEFAULT_NUM_CIDS;
 
 	OSAL_MEMSET(&vf_sw_info, 0, sizeof(vf_sw_info));
 	OSAL_VF_FILL_ACQUIRE_RESC_REQ(p_hwfn, &req->resc_request, &vf_sw_info);
@@ -201,12 +294,17 @@ static enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn)
 	/* Fill capability field with any non-deprecated config we support */
 	req->vfdev_info.capabilities |= VFPF_ACQUIRE_CAP_100G;
 
+	/* If we've mapped the doorbell bar, try using queue qids */
+	if (p_iov->b_doorbell_bar)
+		req->vfdev_info.capabilities |= VFPF_ACQUIRE_CAP_PHYSICAL_BAR |
+						VFPF_ACQUIRE_CAP_QUEUE_QIDS;
+
 	/* pf 2 vf bulletin board address */
 	req->bulletin_addr = p_iov->bulletin.phys;
 	req->bulletin_size = p_iov->bulletin.size;
 
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -221,10 +319,8 @@ static enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn)
 		/* send acquire request */
 		rc = ecore_send_msg2pf(p_hwfn,
 				       &resp->hdr.status, sizeof(*resp));
-
-		/* PF timeout */
-		if (rc)
-			return rc;
+		if (rc != ECORE_SUCCESS)
+			goto exit;
 
 		/* copy acquire response from buffer to p_hwfn */
 		OSAL_MEMCPY(&p_iov->acquire_resp,
@@ -310,6 +406,15 @@ static enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn)
 	    VFPF_ACQUIRE_CAP_PRE_FP_HSI)
 		p_iov->b_pre_fp_hsi = true;
 
+	/* In case PF doesn't support multi-queue Tx, update the number of
+	 * CIDs to reflect the number of queues [older PFs didn't fill that
+	 * field].
+	 */
+	if (!(resp->pfdev_info.capabilities &
+	      PFVF_ACQUIRE_CAP_QUEUE_QIDS))
+		resp->resc.num_cids = resp->resc.num_rxqs +
+				      resp->resc.num_txqs;
+
 	rc = OSAL_VF_UPDATE_ACQUIRE_RESC_RESP(p_hwfn, &resp->resc);
 	if (rc) {
 		DP_NOTICE(p_hwfn, true,
@@ -325,7 +430,7 @@ static enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn)
 
 	/* get HW info */
 	p_hwfn->p_dev->type = resp->pfdev_info.dev_type;
-	p_hwfn->p_dev->chip_rev = resp->pfdev_info.chip_rev;
+	p_hwfn->p_dev->chip_rev = (u8)resp->pfdev_info.chip_rev;
 
 	DP_INFO(p_hwfn, "Chip details - %s%d\n",
 		ECORE_IS_BB(p_hwfn->p_dev) ? "BB" : "AH",
@@ -357,10 +462,28 @@ exit:
 	return rc;
 }
 
+u32 ecore_vf_hw_bar_size(struct ecore_hwfn *p_hwfn,
+			 enum BAR_ID bar_id)
+{
+	u32 bar_size;
+
+	/* Regview size is fixed */
+	if (bar_id == BAR_ID_0)
+		return 1 << 17;
+
+	/* Doorbell is received from PF */
+	bar_size = p_hwfn->vf_iov_info->acquire_resp.pfdev_info.bar_size;
+	if (bar_size)
+		return 1 << bar_size;
+	return 0;
+}
+
 enum _ecore_status_t ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn)
 {
+	struct ecore_hwfn *p_lead = ECORE_LEADING_HWFN(p_hwfn->p_dev);
 	struct ecore_vf_iov *p_iov;
 	u32 reg;
+	enum _ecore_status_t rc;
 
 	/* Set number of hwfns - might be overridden once leading hwfn learns
 	 * actual configuration from PF.
@@ -368,10 +491,6 @@ enum _ecore_status_t ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn)
 	if (IS_LEAD_HWFN(p_hwfn))
 		p_hwfn->p_dev->num_hwfns = 1;
 
-	/* Set the doorbell bar. Assumption: regview is set */
-	p_hwfn->doorbells = (u8 OSAL_IOMEM *)p_hwfn->regview +
-	    PXP_VF_BAR0_START_DQ;
-
 	reg = PXP_VF_BAR0_ME_OPAQUE_ADDRESS;
 	p_hwfn->hw_info.opaque_fid = (u16)REG_RD(p_hwfn, reg);
 
@@ -386,6 +505,31 @@ enum _ecore_status_t ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn)
 		return ECORE_NOMEM;
 	}
 
+	/* Doorbells are tricky; Upper-layer has alreday set the hwfn doorbell
+	 * value, but there are several incompatibily scenarios where that
+	 * would be incorrect and we'd need to override it.
+	 */
+	if (p_hwfn->doorbells == OSAL_NULL) {
+		p_hwfn->doorbells = (u8 OSAL_IOMEM *)p_hwfn->regview +
+						     PXP_VF_BAR0_START_DQ;
+	} else if (p_hwfn == p_lead) {
+		/* For leading hw-function, value is always correct, but need
+		 * to handle scenario where legacy PF would not support 100g
+		 * mapped bars later.
+		 */
+		p_iov->b_doorbell_bar = true;
+	} else {
+		/* here, value would be correct ONLY if the leading hwfn
+		 * received indication that mapped-bars are supported.
+		 */
+		if (p_lead->vf_iov_info->b_doorbell_bar)
+			p_iov->b_doorbell_bar = true;
+		else
+			p_hwfn->doorbells = (u8 OSAL_IOMEM *)
+					    p_hwfn->regview +
+					    PXP_VF_BAR0_START_DQ;
+	}
+
 	/* Allocate vf2pf msg */
 	p_iov->vf2pf_request = OSAL_DMA_ALLOC_COHERENT(p_hwfn->p_dev,
 							 &p_iov->
@@ -428,14 +572,44 @@ enum _ecore_status_t ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn)
 		   p_iov->bulletin.p_virt, (unsigned long)p_iov->bulletin.phys,
 		   p_iov->bulletin.size);
 
+#ifdef CONFIG_ECORE_LOCK_ALLOC
 	OSAL_MUTEX_ALLOC(p_hwfn, &p_iov->mutex);
+#endif
 	OSAL_MUTEX_INIT(&p_iov->mutex);
 
 	p_hwfn->vf_iov_info = p_iov;
 
 	p_hwfn->hw_info.personality = ECORE_PCI_ETH;
 
-	return ecore_vf_pf_acquire(p_hwfn);
+	rc = ecore_vf_pf_acquire(p_hwfn);
+
+	/* If VF is 100g using a mapped bar and PF is too old to support that,
+	 * acquisition would succeed - but the VF would have no way knowing
+	 * the size of the doorbell bar configured in HW and thus will not
+	 * know how to split it for 2nd hw-function.
+	 * In this case we re-try without the indication of the mapped
+	 * doorbell.
+	 */
+	if (rc == ECORE_SUCCESS &&
+	    p_iov->b_doorbell_bar &&
+	    !ecore_vf_hw_bar_size(p_hwfn, BAR_ID_1) &&
+	    ECORE_IS_CMT(p_hwfn->p_dev)) {
+		rc = _ecore_vf_pf_release(p_hwfn, false);
+		if (rc != ECORE_SUCCESS)
+			return rc;
+
+		p_iov->b_doorbell_bar = false;
+		p_hwfn->doorbells = (u8 OSAL_IOMEM *)p_hwfn->regview +
+						     PXP_VF_BAR0_START_DQ;
+		rc = ecore_vf_pf_acquire(p_hwfn);
+	}
+
+	DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
+		   "Regview [%p], Doorbell [%p], Device-doorbell [%p]\n",
+		   p_hwfn->regview, p_hwfn->doorbells,
+		   p_hwfn->p_dev->doorbells);
+
+	return rc;
 
 free_vf2pf_request:
 	OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_iov->vf2pf_request,
@@ -583,7 +757,7 @@ ecore_vf_pf_tunnel_param_update(struct ecore_hwfn *p_hwfn,
 				     ECORE_MODE_IPGRE_TUNN, &p_req->ipgre_clss);
 
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -627,8 +801,8 @@ ecore_vf_pf_rxq_start(struct ecore_hwfn *p_hwfn,
 	req->cqe_pbl_addr = cqe_pbl_addr;
 	req->cqe_pbl_size = cqe_pbl_size;
 	req->rxq_addr = bd_chain_phys_addr;
-	req->hw_sb = p_cid->rel.sb;
-	req->sb_index = p_cid->rel.sb_idx;
+	req->hw_sb = p_cid->sb_igu_id;
+	req->sb_index = p_cid->sb_idx;
 	req->bd_max_bytes = bd_max_bytes;
 	req->stat_id = -1; /* Keep initialized, for future compatibility */
 
@@ -649,8 +823,10 @@ ecore_vf_pf_rxq_start(struct ecore_hwfn *p_hwfn,
 				  (u32 *)(&init_prod_val));
 	}
 
+	ecore_vf_pf_add_qid(p_hwfn, p_cid);
+
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -704,8 +880,10 @@ enum _ecore_status_t ecore_vf_pf_rxq_stop(struct ecore_hwfn *p_hwfn,
 	req->num_rxqs = 1;
 	req->cqe_completion = cqe_completion;
 
+	ecore_vf_pf_add_qid(p_hwfn, p_cid);
+
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -745,11 +923,13 @@ ecore_vf_pf_txq_start(struct ecore_hwfn *p_hwfn,
 	/* Tx */
 	req->pbl_addr = pbl_addr;
 	req->pbl_size = pbl_size;
-	req->hw_sb = p_cid->rel.sb;
-	req->sb_index = p_cid->rel.sb_idx;
+	req->hw_sb = p_cid->sb_igu_id;
+	req->sb_index = p_cid->sb_idx;
+
+	ecore_vf_pf_add_qid(p_hwfn, p_cid);
 
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -799,8 +979,10 @@ enum _ecore_status_t ecore_vf_pf_txq_stop(struct ecore_hwfn *p_hwfn,
 	req->tx_qid = p_cid->rel.queue_id;
 	req->num_txqs = 1;
 
+	ecore_vf_pf_add_qid(p_hwfn, p_cid);
+
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -831,34 +1013,32 @@ enum _ecore_status_t ecore_vf_pf_rxqs_update(struct ecore_hwfn *p_hwfn,
 	struct vfpf_update_rxq_tlv *req;
 	enum _ecore_status_t rc;
 
-	/* TODO - API is limited to assuming continuous regions of queues,
-	 * but VF queues might not fullfil this requirement.
-	 * Need to consider whether we need new TLVs for this, or whether
-	 * simply doing it iteratively is good enough.
+	/* Starting with CHANNEL_TLV_QID and the need for additional queue
+	 * information, this API stopped supporting multiple rxqs.
+	 * TODO - remove this and change the API to accept a single queue-cid
+	 * in a follow-up patch.
 	 */
-	if (!num_rxqs)
+	if (num_rxqs != 1) {
+		DP_NOTICE(p_hwfn, true,
+			  "VFs can no longer update more than a single queue\n");
 		return ECORE_INVAL;
+	}
 
-again:
 	/* clear mailbox and prep first tlv */
 	req = ecore_vf_pf_prep(p_hwfn, CHANNEL_TLV_UPDATE_RXQ, sizeof(*req));
 
-	/* Find the length of the current contagious range of queues beginning
-	 * at first queue's index.
-	 */
 	req->rx_qid = (*pp_cid)->rel.queue_id;
-	for (req->num_rxqs = 1; req->num_rxqs < num_rxqs; req->num_rxqs++)
-		if (pp_cid[req->num_rxqs]->rel.queue_id !=
-		    req->rx_qid + req->num_rxqs)
-			break;
+	req->num_rxqs = 1;
 
 	if (comp_cqe_flg)
 		req->flags |= VFPF_RXQ_UPD_COMPLETE_CQE_FLAG;
 	if (comp_event_flg)
 		req->flags |= VFPF_RXQ_UPD_COMPLETE_EVENT_FLAG;
 
+	ecore_vf_pf_add_qid(p_hwfn, *pp_cid);
+
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -871,15 +1051,6 @@ again:
 		goto exit;
 	}
 
-	/* Make sure we're done with all the queues */
-	if (req->num_rxqs < num_rxqs) {
-		num_rxqs -= req->num_rxqs;
-		pp_cid += req->num_rxqs;
-		/* TODO - should we give a non-locked variant instead? */
-		ecore_vf_pf_req_end(p_hwfn, rc);
-		goto again;
-	}
-
 exit:
 	ecore_vf_pf_req_end(p_hwfn, rc);
 	return rc;
@@ -908,12 +1079,15 @@ ecore_vf_pf_vport_start(struct ecore_hwfn *p_hwfn, u8 vport_id,
 	req->only_untagged = only_untagged;
 
 	/* status blocks */
-	for (i = 0; i < p_hwfn->vf_iov_info->acquire_resp.resc.num_sbs; i++)
-		if (p_hwfn->sbs_info[i])
-			req->sb_addr[i] = p_hwfn->sbs_info[i]->sb_phys;
+	for (i = 0; i < p_hwfn->vf_iov_info->acquire_resp.resc.num_sbs; i++) {
+		struct ecore_sb_info *p_sb = p_hwfn->vf_iov_info->sbs_info[i];
+
+		if (p_sb)
+			req->sb_addr[i] = p_sb->sb_phys;
+	}
 
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -944,7 +1118,7 @@ enum _ecore_status_t ecore_vf_pf_vport_stop(struct ecore_hwfn *p_hwfn)
 			 sizeof(struct vfpf_first_tlv));
 
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -1051,7 +1225,7 @@ ecore_vf_pf_vport_update(struct ecore_hwfn *p_hwfn,
 		struct vfpf_vport_update_activate_tlv *p_act_tlv;
 
 		size = sizeof(struct vfpf_vport_update_activate_tlv);
-		p_act_tlv = ecore_add_tlv(p_hwfn, &p_iov->offset,
+		p_act_tlv = ecore_add_tlv(&p_iov->offset,
 					  CHANNEL_TLV_VPORT_UPDATE_ACTIVATE,
 					  size);
 		resp_size += sizeof(struct pfvf_def_resp_tlv);
@@ -1071,7 +1245,7 @@ ecore_vf_pf_vport_update(struct ecore_hwfn *p_hwfn,
 		struct vfpf_vport_update_vlan_strip_tlv *p_vlan_tlv;
 
 		size = sizeof(struct vfpf_vport_update_vlan_strip_tlv);
-		p_vlan_tlv = ecore_add_tlv(p_hwfn, &p_iov->offset,
+		p_vlan_tlv = ecore_add_tlv(&p_iov->offset,
 					   CHANNEL_TLV_VPORT_UPDATE_VLAN_STRIP,
 					   size);
 		resp_size += sizeof(struct pfvf_def_resp_tlv);
@@ -1084,7 +1258,7 @@ ecore_vf_pf_vport_update(struct ecore_hwfn *p_hwfn,
 
 		size = sizeof(struct vfpf_vport_update_tx_switch_tlv);
 		tlv = CHANNEL_TLV_VPORT_UPDATE_TX_SWITCH;
-		p_tx_switch_tlv = ecore_add_tlv(p_hwfn, &p_iov->offset,
+		p_tx_switch_tlv = ecore_add_tlv(&p_iov->offset,
 						tlv, size);
 		resp_size += sizeof(struct pfvf_def_resp_tlv);
 
@@ -1095,7 +1269,7 @@ ecore_vf_pf_vport_update(struct ecore_hwfn *p_hwfn,
 		struct vfpf_vport_update_mcast_bin_tlv *p_mcast_tlv;
 
 		size = sizeof(struct vfpf_vport_update_mcast_bin_tlv);
-		p_mcast_tlv = ecore_add_tlv(p_hwfn, &p_iov->offset,
+		p_mcast_tlv = ecore_add_tlv(&p_iov->offset,
 					    CHANNEL_TLV_VPORT_UPDATE_MCAST,
 					    size);
 		resp_size += sizeof(struct pfvf_def_resp_tlv);
@@ -1113,7 +1287,7 @@ ecore_vf_pf_vport_update(struct ecore_hwfn *p_hwfn,
 
 		tlv = CHANNEL_TLV_VPORT_UPDATE_ACCEPT_PARAM;
 		size = sizeof(struct vfpf_vport_update_accept_param_tlv);
-		p_accept_tlv = ecore_add_tlv(p_hwfn, &p_iov->offset, tlv, size);
+		p_accept_tlv = ecore_add_tlv(&p_iov->offset, tlv, size);
 		resp_size += sizeof(struct pfvf_def_resp_tlv);
 
 		if (update_rx) {
@@ -1135,7 +1309,7 @@ ecore_vf_pf_vport_update(struct ecore_hwfn *p_hwfn,
 		int i, table_size;
 
 		size = sizeof(struct vfpf_vport_update_rss_tlv);
-		p_rss_tlv = ecore_add_tlv(p_hwfn, &p_iov->offset,
+		p_rss_tlv = ecore_add_tlv(&p_iov->offset,
 					  CHANNEL_TLV_VPORT_UPDATE_RSS, size);
 		resp_size += sizeof(struct pfvf_def_resp_tlv);
 
@@ -1173,8 +1347,7 @@ ecore_vf_pf_vport_update(struct ecore_hwfn *p_hwfn,
 
 		size = sizeof(struct vfpf_vport_update_accept_any_vlan_tlv);
 		tlv = CHANNEL_TLV_VPORT_UPDATE_ACCEPT_ANY_VLAN;
-		p_any_vlan_tlv = ecore_add_tlv(p_hwfn, &p_iov->offset,
-					       tlv, size);
+		p_any_vlan_tlv = ecore_add_tlv(&p_iov->offset, tlv, size);
 
 		resp_size += sizeof(struct pfvf_def_resp_tlv);
 		p_any_vlan_tlv->accept_any_vlan = p_params->accept_any_vlan;
@@ -1188,7 +1361,7 @@ ecore_vf_pf_vport_update(struct ecore_hwfn *p_hwfn,
 
 		sge_tpa_params = p_params->sge_tpa_params;
 		size = sizeof(struct vfpf_vport_update_sge_tpa_tlv);
-		p_sge_tpa_tlv = ecore_add_tlv(p_hwfn, &p_iov->offset,
+		p_sge_tpa_tlv = ecore_add_tlv(&p_iov->offset,
 					      CHANNEL_TLV_VPORT_UPDATE_SGE_TPA,
 					      size);
 		resp_size += sizeof(struct pfvf_def_resp_tlv);
@@ -1226,7 +1399,7 @@ ecore_vf_pf_vport_update(struct ecore_hwfn *p_hwfn,
 	}
 
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -1258,7 +1431,7 @@ enum _ecore_status_t ecore_vf_pf_reset(struct ecore_hwfn *p_hwfn)
 	req = ecore_vf_pf_prep(p_hwfn, CHANNEL_TLV_CLOSE, sizeof(*req));
 
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -1280,55 +1453,6 @@ exit:
 	return rc;
 }
 
-enum _ecore_status_t ecore_vf_pf_release(struct ecore_hwfn *p_hwfn)
-{
-	struct ecore_vf_iov *p_iov = p_hwfn->vf_iov_info;
-	struct pfvf_def_resp_tlv *resp;
-	struct vfpf_first_tlv *req;
-	u32 size;
-	enum _ecore_status_t rc;
-
-	/* clear mailbox and prep first tlv */
-	req = ecore_vf_pf_prep(p_hwfn, CHANNEL_TLV_RELEASE, sizeof(*req));
-
-	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
-		      CHANNEL_TLV_LIST_END,
-		      sizeof(struct channel_list_end_tlv));
-
-	resp = &p_iov->pf2vf_reply->default_resp;
-	rc = ecore_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
-
-	if (rc == ECORE_SUCCESS && resp->hdr.status != PFVF_STATUS_SUCCESS)
-		rc = ECORE_AGAIN;
-
-	ecore_vf_pf_req_end(p_hwfn, rc);
-
-	p_hwfn->b_int_enabled = 0;
-
-	if (p_iov->vf2pf_request)
-		OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev,
-				       p_iov->vf2pf_request,
-				       p_iov->vf2pf_request_phys,
-				       sizeof(union vfpf_tlvs));
-	if (p_iov->pf2vf_reply)
-		OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev,
-				       p_iov->pf2vf_reply,
-				       p_iov->pf2vf_reply_phys,
-				       sizeof(union pfvf_tlvs));
-
-	if (p_iov->bulletin.p_virt) {
-		size = sizeof(struct ecore_bulletin_content);
-		OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev,
-				       p_iov->bulletin.p_virt,
-				       p_iov->bulletin.phys, size);
-	}
-
-	OSAL_FREE(p_hwfn->p_dev, p_hwfn->vf_iov_info);
-
-	return rc;
-}
-
 void ecore_vf_pf_filter_mcast(struct ecore_hwfn *p_hwfn,
 			      struct ecore_filter_mcast *p_filter_cmd)
 {
@@ -1374,7 +1498,7 @@ enum _ecore_status_t ecore_vf_pf_filter_ucast(struct ecore_hwfn *p_hwfn,
 	req->vlan = p_ucast->vlan;
 
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -1405,7 +1529,7 @@ enum _ecore_status_t ecore_vf_pf_int_cleanup(struct ecore_hwfn *p_hwfn)
 			 sizeof(struct vfpf_first_tlv));
 
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset,
+	ecore_add_tlv(&p_iov->offset,
 		      CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
@@ -1424,6 +1548,39 @@ exit:
 	return rc;
 }
 
+enum _ecore_status_t ecore_vf_pf_get_coalesce(struct ecore_hwfn *p_hwfn,
+					      u16 *p_coal,
+					      struct ecore_queue_cid *p_cid)
+{
+	struct ecore_vf_iov *p_iov = p_hwfn->vf_iov_info;
+	struct pfvf_read_coal_resp_tlv *resp;
+	struct vfpf_read_coal_req_tlv *req;
+	enum _ecore_status_t rc;
+
+	/* clear mailbox and prep header tlv */
+	req = ecore_vf_pf_prep(p_hwfn, CHANNEL_TLV_COALESCE_READ,
+			       sizeof(*req));
+	req->qid = p_cid->rel.queue_id;
+	req->is_rx = p_cid->b_is_rx ? 1 : 0;
+
+	ecore_add_tlv(&p_iov->offset, CHANNEL_TLV_LIST_END,
+		      sizeof(struct channel_list_end_tlv));
+	resp = &p_iov->pf2vf_reply->read_coal_resp;
+
+	rc = ecore_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
+	if (rc != ECORE_SUCCESS)
+		goto exit;
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
+		goto exit;
+
+	*p_coal = resp->coal;
+exit:
+	ecore_vf_pf_req_end(p_hwfn, rc);
+
+	return rc;
+}
+
 enum _ecore_status_t
 ecore_vf_pf_set_coalesce(struct ecore_hwfn *p_hwfn, u16 rx_coal, u16 tx_coal,
 			 struct ecore_queue_cid     *p_cid)
@@ -1446,7 +1603,7 @@ ecore_vf_pf_set_coalesce(struct ecore_hwfn *p_hwfn, u16 rx_coal, u16 tx_coal,
 		   rx_coal, tx_coal, req->qid);
 
 	/* add list termination tlv */
-	ecore_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END,
+	ecore_add_tlv(&p_iov->offset, CHANNEL_TLV_LIST_END,
 		      sizeof(struct channel_list_end_tlv));
 
 	resp = &p_iov->pf2vf_reply->default_resp;
@@ -1479,6 +1636,24 @@ u16 ecore_vf_get_igu_sb_id(struct ecore_hwfn *p_hwfn,
 	return p_iov->acquire_resp.resc.hw_sbs[sb_id].hw_sb_id;
 }
 
+void ecore_vf_set_sb_info(struct ecore_hwfn *p_hwfn,
+			  u16 sb_id, struct ecore_sb_info *p_sb)
+{
+	struct ecore_vf_iov *p_iov = p_hwfn->vf_iov_info;
+
+	if (!p_iov) {
+		DP_NOTICE(p_hwfn, true, "vf_sriov_info isn't initialized\n");
+		return;
+	}
+
+	if (sb_id >= PFVF_MAX_SBS_PER_VF) {
+		DP_NOTICE(p_hwfn, true, "Can't configure SB %04x\n", sb_id);
+		return;
+	}
+
+	p_iov->sbs_info[sb_id] = p_sb;
+}
+
 enum _ecore_status_t ecore_vf_read_bulletin(struct ecore_hwfn *p_hwfn,
 					    u8 *p_change)
 {
@@ -1497,8 +1672,8 @@ enum _ecore_status_t ecore_vf_read_bulletin(struct ecore_hwfn *p_hwfn,
 		return ECORE_SUCCESS;
 
 	/* Verify the bulletin we see is valid */
-	crc = ecore_crc32(0, (u8 *)&shadow + crc_size,
-			  p_iov->bulletin.size - crc_size);
+	crc = OSAL_CRC32(0, (u8 *)&shadow + crc_size,
+			 p_iov->bulletin.size - crc_size);
 	if (crc != shadow.crc)
 		return ECORE_AGAIN;
 
@@ -1513,8 +1688,7 @@ enum _ecore_status_t ecore_vf_read_bulletin(struct ecore_hwfn *p_hwfn,
 	return ECORE_SUCCESS;
 }
 
-void __ecore_vf_get_link_params(struct ecore_hwfn *p_hwfn,
-				struct ecore_mcp_link_params *p_params,
+void __ecore_vf_get_link_params(struct ecore_mcp_link_params *p_params,
 				struct ecore_bulletin_content *p_bulletin)
 {
 	OSAL_MEMSET(p_params, 0, sizeof(*p_params));
@@ -1531,12 +1705,11 @@ void __ecore_vf_get_link_params(struct ecore_hwfn *p_hwfn,
 void ecore_vf_get_link_params(struct ecore_hwfn *p_hwfn,
 			      struct ecore_mcp_link_params *params)
 {
-	__ecore_vf_get_link_params(p_hwfn, params,
+	__ecore_vf_get_link_params(params,
 				   &p_hwfn->vf_iov_info->bulletin_shadow);
 }
 
-void __ecore_vf_get_link_state(struct ecore_hwfn *p_hwfn,
-			       struct ecore_mcp_link_state *p_link,
+void __ecore_vf_get_link_state(struct ecore_mcp_link_state *p_link,
 			       struct ecore_bulletin_content *p_bulletin)
 {
 	OSAL_MEMSET(p_link, 0, sizeof(*p_link));
@@ -1558,12 +1731,11 @@ void __ecore_vf_get_link_state(struct ecore_hwfn *p_hwfn,
 void ecore_vf_get_link_state(struct ecore_hwfn *p_hwfn,
 			     struct ecore_mcp_link_state *link)
 {
-	__ecore_vf_get_link_state(p_hwfn, link,
+	__ecore_vf_get_link_state(link,
 				  &p_hwfn->vf_iov_info->bulletin_shadow);
 }
 
-void __ecore_vf_get_link_caps(struct ecore_hwfn *p_hwfn,
-			      struct ecore_mcp_link_capabilities *p_link_caps,
+void __ecore_vf_get_link_caps(struct ecore_mcp_link_capabilities *p_link_caps,
 			      struct ecore_bulletin_content *p_bulletin)
 {
 	OSAL_MEMSET(p_link_caps, 0, sizeof(*p_link_caps));
@@ -1573,7 +1745,7 @@ void __ecore_vf_get_link_caps(struct ecore_hwfn *p_hwfn,
 void ecore_vf_get_link_caps(struct ecore_hwfn *p_hwfn,
 			    struct ecore_mcp_link_capabilities *p_link_caps)
 {
-	__ecore_vf_get_link_caps(p_hwfn, p_link_caps,
+	__ecore_vf_get_link_caps(p_link_caps,
 				 &p_hwfn->vf_iov_info->bulletin_shadow);
 }
 
@@ -1703,3 +1875,10 @@ void ecore_vf_get_fw_version(struct ecore_hwfn *p_hwfn,
 	*fw_rev = info->fw_rev;
 	*fw_eng = info->fw_eng;
 }
+
+#ifdef CONFIG_ECORE_SW_CHANNEL
+void ecore_vf_set_hw_channel(struct ecore_hwfn *p_hwfn, bool b_is_hw)
+{
+	p_hwfn->vf_iov_info->b_hw_channel = b_is_hw;
+}
+#endif
diff --git a/drivers/net/qede/base/ecore_vf.h b/drivers/net/qede/base/ecore_vf.h
index f4713884..de2758cb 100644
--- a/drivers/net/qede/base/ecore_vf.h
+++ b/drivers/net/qede/base/ecore_vf.h
@@ -14,6 +14,11 @@
 #include "ecore_l2_api.h"
 #include "ecore_vfpf_if.h"
 
+/* Default number of CIDs [total of both Rx and Tx] to be requested
+ * by default.
+ */
+#define ECORE_ETH_VF_DEFAULT_NUM_CIDS	(32)
+
 /* This data is held in the ecore_hwfn structure for VFs only. */
 struct ecore_vf_iov {
 	union vfpf_tlvs			*vf2pf_request;
@@ -36,25 +41,47 @@ struct ecore_vf_iov {
 	 * this has to be propagated as it affects the fastpath.
 	 */
 	bool b_pre_fp_hsi;
-};
 
+	/* Current day VFs are passing the SBs physical address on vport
+	 * start, and as they lack an IGU mapping they need to store the
+	 * addresses of previously registered SBs.
+	 * Even if we were to change configuration flow, due to backward
+	 * compatibility [with older PFs] we'd still need to store these.
+	 */
+	struct ecore_sb_info *sbs_info[PFVF_MAX_SBS_PER_VF];
+
+#ifdef CONFIG_ECORE_SW_CHANNEL
+	/* Would be set if the VF is to try communicating with it PF
+	 * using a hw channel.
+	 */
+	bool b_hw_channel;
+#endif
+
+	/* Determines whether VF utilizes doorbells via limited register
+	 * bar or via the doorbell bar.
+	 */
+	bool b_doorbell_bar;
+};
 
-enum _ecore_status_t ecore_set_rxq_coalesce(struct ecore_hwfn *p_hwfn,
-					    struct ecore_ptt *p_ptt,
-					    u16 coalesce,
-					    struct ecore_queue_cid *p_cid);
-enum _ecore_status_t ecore_set_txq_coalesce(struct ecore_hwfn *p_hwfn,
-					    struct ecore_ptt *p_ptt,
-					    u16 coalesce,
-					    struct ecore_queue_cid *p_cid);
+/**
+ * @brief VF - Get coalesce per VF's relative queue.
+ *
+ * @param p_hwfn
+ * @param p_coal - coalesce value in micro second for VF queues.
+ * @param p_cid  - queue cid
+ *
+ **/
+enum _ecore_status_t ecore_vf_pf_get_coalesce(struct ecore_hwfn *p_hwfn,
+					      u16 *p_coal,
+					      struct ecore_queue_cid *p_cid);
 /**
  * @brief VF - Set Rx/Tx coalesce per VF's relative queue.
- *	Coalesce value '0' will omit the configuration.
+ *             Coalesce value '0' will omit the configuration.
  *
- *	@param p_hwfn
- *	@param rx_coal - coalesce value in micro second for rx queue
- *	@param tx_coal - coalesce value in micro second for tx queue
- *	@param queue_cid
+ * @param p_hwfn
+ * @param rx_coal - coalesce value in micro second for rx queue
+ * @param tx_coal - coalesce value in micro second for tx queue
+ * @param p_cid   - queue cid
  *
  **/
 enum _ecore_status_t ecore_vf_pf_set_coalesce(struct ecore_hwfn *p_hwfn,
@@ -200,6 +227,15 @@ enum _ecore_status_t ecore_vf_pf_release(struct ecore_hwfn *p_hwfn);
 u16 ecore_vf_get_igu_sb_id(struct ecore_hwfn *p_hwfn,
 			   u16               sb_id);
 
+/**
+ * @brief Stores [or removes] a configured sb_info.
+ *
+ * @param p_hwfn
+ * @param sb_id - zero-based SB index [for fastpath]
+ * @param sb_info - may be OSAL_NULL [during removal].
+ */
+void ecore_vf_set_sb_info(struct ecore_hwfn *p_hwfn,
+			  u16 sb_id, struct ecore_sb_info *p_sb);
 
 /**
  * @brief ecore_vf_pf_vport_start - perform vport start for VF.
@@ -251,34 +287,28 @@ enum _ecore_status_t ecore_vf_pf_int_cleanup(struct ecore_hwfn *p_hwfn);
 /**
  * @brief - return the link params in a given bulletin board
  *
- * @param p_hwfn
  * @param p_params - pointer to a struct to fill with link params
  * @param p_bulletin
  */
-void __ecore_vf_get_link_params(struct ecore_hwfn *p_hwfn,
-				struct ecore_mcp_link_params *p_params,
+void __ecore_vf_get_link_params(struct ecore_mcp_link_params *p_params,
 				struct ecore_bulletin_content *p_bulletin);
 
 /**
  * @brief - return the link state in a given bulletin board
  *
- * @param p_hwfn
  * @param p_link - pointer to a struct to fill with link state
  * @param p_bulletin
  */
-void __ecore_vf_get_link_state(struct ecore_hwfn *p_hwfn,
-			       struct ecore_mcp_link_state *p_link,
+void __ecore_vf_get_link_state(struct ecore_mcp_link_state *p_link,
 			       struct ecore_bulletin_content *p_bulletin);
 
 /**
  * @brief - return the link capabilities in a given bulletin board
  *
- * @param p_hwfn
  * @param p_link - pointer to a struct to fill with link capabilities
  * @param p_bulletin
  */
-void __ecore_vf_get_link_caps(struct ecore_hwfn *p_hwfn,
-			      struct ecore_mcp_link_capabilities *p_link_caps,
+void __ecore_vf_get_link_caps(struct ecore_mcp_link_capabilities *p_link_caps,
 			      struct ecore_bulletin_content *p_bulletin);
 
 enum _ecore_status_t
@@ -286,5 +316,8 @@ ecore_vf_pf_tunnel_param_update(struct ecore_hwfn *p_hwfn,
 				struct ecore_tunnel_info *p_tunn);
 
 void ecore_vf_set_vf_start_tunn_update_param(struct ecore_tunnel_info *p_tun);
+
+u32 ecore_vf_hw_bar_size(struct ecore_hwfn *p_hwfn,
+		     enum BAR_ID bar_id);
 #endif
 #endif /* __ECORE_VF_H__ */
diff --git a/drivers/net/qede/base/ecore_vf_api.h b/drivers/net/qede/base/ecore_vf_api.h
index be3a326b..9815cf8a 100644
--- a/drivers/net/qede/base/ecore_vf_api.h
+++ b/drivers/net/qede/base/ecore_vf_api.h
@@ -163,5 +163,18 @@ void ecore_vf_get_fw_version(struct ecore_hwfn *p_hwfn,
 			     u16 *fw_eng);
 void ecore_vf_bulletin_get_udp_ports(struct ecore_hwfn *p_hwfn,
 				     u16 *p_vxlan_port, u16 *p_geneve_port);
+
+#ifdef CONFIG_ECORE_SW_CHANNEL
+/**
+ * @brief set the VF to use a SW/HW channel when communicating with PF.
+ *        NOTICE: today the likely first place to call this from VF
+ *        would be OSAL_VF_FILL_ACQUIRE_RESC_REQ(); Might want to consider
+ *        something a bit more appropriate.
+ *
+ * @param p_hwfn
+ * @param b_is_hw - true iff VF is to use a HW-channel
+ */
+void ecore_vf_set_hw_channel(struct ecore_hwfn *p_hwfn, bool b_is_hw);
+#endif
 #endif
 #endif
diff --git a/drivers/net/qede/base/ecore_vfpf_if.h b/drivers/net/qede/base/ecore_vfpf_if.h
index 66184421..3ccc7665 100644
--- a/drivers/net/qede/base/ecore_vfpf_if.h
+++ b/drivers/net/qede/base/ecore_vfpf_if.h
@@ -19,13 +19,14 @@
  *
  **/
 struct vf_pf_resc_request {
-	u8  num_rxqs;
-	u8  num_txqs;
-	u8  num_sbs;
-	u8  num_mac_filters;
-	u8  num_vlan_filters;
-	u8  num_mc_filters; /* No limit  so superfluous */
-	u16 padding;
+	u8 num_rxqs;
+	u8 num_txqs;
+	u8 num_sbs;
+	u8 num_mac_filters;
+	u8 num_vlan_filters;
+	u8 num_mc_filters; /* No limit  so superfluous */
+	u8 num_cids;
+	u8 padding;
 };
 
 struct hw_sb_info {
@@ -92,6 +93,20 @@ struct vfpf_acquire_tlv {
 /* VF pre-FP hsi version */
 #define VFPF_ACQUIRE_CAP_PRE_FP_HSI	(1 << 0)
 #define VFPF_ACQUIRE_CAP_100G		(1 << 1) /* VF can support 100g */
+
+	/* A requirement for supporting multi-Tx queues on a single queue-zone,
+	 * VF would pass qids as additional information whenever passing queue
+	 * references.
+	 * TODO - due to the CID limitations in Bar0, VFs currently don't pass
+	 * this, and use the legacy CID scheme.
+	 */
+#define VFPF_ACQUIRE_CAP_QUEUE_QIDS	(1 << 2)
+
+	/* The VF is using the physical bar. While this is mostly internal
+	 * to the VF, might affect the number of CIDs supported assuming
+	 * QUEUE_QIDS is set.
+	 */
+#define VFPF_ACQUIRE_CAP_PHYSICAL_BAR	(1 << 3)
 		u64 capabilities;
 		u8 fw_major;
 		u8 fw_minor;
@@ -170,6 +185,9 @@ struct pfvf_acquire_resp_tlv {
 #endif
 #define PFVF_ACQUIRE_CAP_POST_FW_OVERRIDE	(1 << 2)
 
+	/* PF expects queues to be received with additional qids */
+#define PFVF_ACQUIRE_CAP_QUEUE_QIDS		(1 << 3)
+
 		u16 db_size;
 		u8  indices_per_sb;
 		u8 os_type;
@@ -178,7 +196,8 @@ struct pfvf_acquire_resp_tlv {
 		u16 chip_rev;
 		u8 dev_type;
 
-		u8 padding;
+		/* Doorbell bar size configured in HW: log(size) or 0 */
+		u8 bar_size;
 
 		struct pfvf_stats_info stats_info;
 
@@ -210,7 +229,8 @@ struct pfvf_acquire_resp_tlv {
 		u8      num_mac_filters;
 		u8      num_vlan_filters;
 		u8      num_mc_filters;
-		u8      padding[2];
+		u8	num_cids;
+		u8      padding;
 	} resc;
 
 	u32 bulletin_size;
@@ -223,6 +243,16 @@ struct pfvf_start_queue_resp_tlv {
 	u8 padding[4];
 };
 
+/* Extended queue information - additional index for reference inside qzone.
+ * If commmunicated between VF/PF, each TLV relating to queues should be
+ * extended by one such [or have a future base TLV that already contains info].
+ */
+struct vfpf_qid_tlv {
+	struct channel_tlv	tl;
+	u8			qid;
+	u8			padding[3];
+};
+
 /* Setup Queue */
 struct vfpf_start_rxq_tlv {
 	struct vfpf_first_tlv	first_tlv;
@@ -265,7 +295,15 @@ struct vfpf_stop_rxqs_tlv {
 	struct vfpf_first_tlv	first_tlv;
 
 	u16			rx_qid;
+
+	/* While the API supports multiple Rx-queues on a single TLV
+	 * message, in practice older VFs always used it as one [ecore].
+	 * And there are PFs [starting with the CHANNEL_TLV_QID] which
+	 * would start assuming this is always a '1'. So in practice this
+	 * field should be considered deprecated and *Always* set to '1'.
+	 */
 	u8			num_rxqs;
+
 	u8			cqe_completion;
 	u8			padding[4];
 };
@@ -275,6 +313,13 @@ struct vfpf_stop_txqs_tlv {
 	struct vfpf_first_tlv	first_tlv;
 
 	u16			tx_qid;
+
+	/* While the API supports multiple Tx-queues on a single TLV
+	 * message, in practice older VFs always used it as one [ecore].
+	 * And there are PFs [starting with the CHANNEL_TLV_QID] which
+	 * would start assuming this is always a '1'. So in practice this
+	 * field should be considered deprecated and *Always* set to '1'.
+	 */
 	u8			num_txqs;
 	u8			padding[5];
 };
@@ -465,6 +510,19 @@ struct vfpf_update_coalesce {
 	u8 padding[2];
 };
 
+struct vfpf_read_coal_req_tlv {
+	struct vfpf_first_tlv first_tlv;
+	u16 qid;
+	u8 is_rx;
+	u8 padding[5];
+};
+
+struct pfvf_read_coal_resp_tlv {
+	struct pfvf_tlv hdr;
+	u16 coal;
+	u8 padding[6];
+};
+
 union vfpf_tlvs {
 	struct vfpf_first_tlv			first_tlv;
 	struct vfpf_acquire_tlv			acquire;
@@ -478,6 +536,7 @@ union vfpf_tlvs {
 	struct vfpf_ucast_filter_tlv		ucast_filter;
 	struct vfpf_update_tunn_param_tlv	tunn_param_update;
 	struct vfpf_update_coalesce		update_coalesce;
+	struct vfpf_read_coal_req_tlv		read_coal_req;
 	struct tlv_buffer_size			tlv_buf_size;
 };
 
@@ -487,6 +546,7 @@ union pfvf_tlvs {
 	struct tlv_buffer_size			tlv_buf_size;
 	struct pfvf_start_queue_resp_tlv	queue_start;
 	struct pfvf_update_tunn_param_tlv	tunn_param_resp;
+	struct pfvf_read_coal_resp_tlv		read_coal_resp;
 };
 
 /* This is a structure which is allocated in the VF, which the PF may update
@@ -605,6 +665,8 @@ enum {
 	CHANNEL_TLV_VPORT_UPDATE_SGE_TPA,
 	CHANNEL_TLV_UPDATE_TUNN_PARAM,
 	CHANNEL_TLV_COALESCE_UPDATE,
+	CHANNEL_TLV_QID,
+	CHANNEL_TLV_COALESCE_READ,
 	CHANNEL_TLV_MAX,
 
 	/* Required for iterating over vport-update tlvs.
diff --git a/drivers/net/qede/base/mcp_public.h b/drivers/net/qede/base/mcp_public.h
index 1ad8a962..81ca6634 100644
--- a/drivers/net/qede/base/mcp_public.h
+++ b/drivers/net/qede/base/mcp_public.h
@@ -28,19 +28,19 @@
 
 typedef u32 offsize_t;      /* In DWORDS !!! */
 /* Offset from the beginning of the MCP scratchpad */
-#define OFFSIZE_OFFSET_SHIFT	0
+#define OFFSIZE_OFFSET_OFFSET	0
 #define OFFSIZE_OFFSET_MASK	0x0000ffff
 /* Size of specific element (not the whole array if any) */
-#define OFFSIZE_SIZE_SHIFT	16
+#define OFFSIZE_SIZE_OFFSET	16
 #define OFFSIZE_SIZE_MASK	0xffff0000
 
 /* SECTION_OFFSET is calculating the offset in bytes out of offsize */
 #define SECTION_OFFSET(_offsize)	\
-	((((_offsize & OFFSIZE_OFFSET_MASK) >> OFFSIZE_OFFSET_SHIFT) << 2))
+	((((_offsize & OFFSIZE_OFFSET_MASK) >> OFFSIZE_OFFSET_OFFSET) << 2))
 
 /* SECTION_SIZE is calculating the size in bytes out of offsize */
 #define SECTION_SIZE(_offsize)		\
-	(((_offsize & OFFSIZE_SIZE_MASK) >> OFFSIZE_SIZE_SHIFT) << 2)
+	(((_offsize & OFFSIZE_SIZE_MASK) >> OFFSIZE_SIZE_OFFSET) << 2)
 
 /* SECTION_ADDR returns the GRC addr of a section, given offsize and index
  * within section
@@ -59,7 +59,7 @@ struct eth_phy_cfg {
 /* 0 = autoneg, 1000/10000/20000/25000/40000/50000/100000 */
 	u32 speed;
 #define ETH_SPEED_AUTONEG   0
-#define ETH_SPEED_SMARTLINQ  0x8
+#define ETH_SPEED_SMARTLINQ  0x8 /* deprecated - use link_modes field instead */
 
 	u32 pause;      /* bitmask */
 #define ETH_PAUSE_NONE		0x0
@@ -84,38 +84,28 @@ struct eth_phy_cfg {
 /* Remote Serdes Loopback (RX to TX) */
 #define ETH_LOOPBACK_INT_PHY_FEA_AH_ONLY (9)
 
-	/* Used to configure the EEE Tx LPI timer, has several modes of
-	 * operation, according to bits 29:28
-	 * 2'b00: Timer will be configured by nvram, output will be the value
-	 *        from nvram.
-	 * 2'b01: Timer will be configured by nvram, output will be in
-	 *        16xmicroseconds.
-	 * 2'b10: bits 1:0 contain an nvram value which will be used instead
-	 *        of the one located in the nvram. Output will be that value.
-	 * 2'b11: bits 19:0 contain the idle timer in microseconds; output
-	 *        will be in 16xmicroseconds.
-	 * Bits 31:30 should be 2'b11 in order for EEE to be enabled.
-	 */
-	u32 eee_mode;
-#define EEE_MODE_TIMER_USEC_MASK	(0x000fffff)
-#define EEE_MODE_TIMER_USEC_OFFSET	(0)
-#define EEE_MODE_TIMER_USEC_BALANCED_TIME	(0xa00)
-#define EEE_MODE_TIMER_USEC_AGGRESSIVE_TIME	(0x100)
-#define EEE_MODE_TIMER_USEC_LATENCY_TIME	(0x6000)
-/* Set by the driver to request status timer will be in microseconds and and not
- * in EEE policy definition
+	u32 eee_cfg;
+/* EEE is enabled (configuration). Refer to eee_status->active for negotiated
+ * status
  */
-#define EEE_MODE_OUTPUT_TIME		(1 << 28)
-/* Set by the driver to override default nvm timer */
-#define EEE_MODE_OVERRIDE_NVRAM		(1 << 29)
-#define EEE_MODE_ENABLE_LPI		(1 << 30) /* Set when */
-#define EEE_MODE_ADV_LPI		(1 << 31) /* Set when EEE is enabled */
+#define EEE_CFG_EEE_ENABLED	(1 << 0)
+#define EEE_CFG_TX_LPI		(1 << 1)
+#define EEE_CFG_ADV_SPEED_1G	(1 << 2)
+#define EEE_CFG_ADV_SPEED_10G	(1 << 3)
+#define EEE_TX_TIMER_USEC_MASK	(0xfffffff0)
+#define EEE_TX_TIMER_USEC_OFFSET	4
+#define EEE_TX_TIMER_USEC_BALANCED_TIME		(0xa00)
+#define EEE_TX_TIMER_USEC_AGGRESSIVE_TIME	(0x100)
+#define EEE_TX_TIMER_USEC_LATENCY_TIME		(0x6000)
+
+	u32 link_modes; /* Additional link modes */
+#define LINK_MODE_SMARTLINQ_ENABLE		0x1  /* XXX Deprecate */
 };
 
 struct port_mf_cfg {
 	u32 dynamic_cfg;    /* device control channel */
 #define PORT_MF_CFG_OV_TAG_MASK              0x0000ffff
-#define PORT_MF_CFG_OV_TAG_SHIFT             0
+#define PORT_MF_CFG_OV_TAG_OFFSET             0
 #define PORT_MF_CFG_OV_TAG_DEFAULT         PORT_MF_CFG_OV_TAG_MASK
 
 	u32 reserved[1];
@@ -274,11 +264,11 @@ struct couple_mode_teaming {
 /**************************************
  *     LLDP and DCBX HSI structures
  **************************************/
-#define LLDP_CHASSIS_ID_STAT_LEN 4
-#define LLDP_PORT_ID_STAT_LEN 4
+#define LLDP_CHASSIS_ID_STAT_LEN	4
+#define LLDP_PORT_ID_STAT_LEN		4
 #define DCBX_MAX_APP_PROTOCOL		32
-#define MAX_SYSTEM_LLDP_TLV_DATA    32
-
+#define MAX_SYSTEM_LLDP_TLV_DATA	32  /* In dwords. 128 in bytes*/
+#define MAX_TLV_BUFFER			128 /* In dwords. 512 in bytes*/
 typedef enum _lldp_agent_e {
 	LLDP_NEAREST_BRIDGE = 0,
 	LLDP_NEAREST_NON_TPMR_BRIDGE,
@@ -289,15 +279,15 @@ typedef enum _lldp_agent_e {
 struct lldp_config_params_s {
 	u32 config;
 #define LLDP_CONFIG_TX_INTERVAL_MASK        0x000000ff
-#define LLDP_CONFIG_TX_INTERVAL_SHIFT       0
+#define LLDP_CONFIG_TX_INTERVAL_OFFSET       0
 #define LLDP_CONFIG_HOLD_MASK               0x00000f00
-#define LLDP_CONFIG_HOLD_SHIFT              8
+#define LLDP_CONFIG_HOLD_OFFSET              8
 #define LLDP_CONFIG_MAX_CREDIT_MASK         0x0000f000
-#define LLDP_CONFIG_MAX_CREDIT_SHIFT        12
+#define LLDP_CONFIG_MAX_CREDIT_OFFSET        12
 #define LLDP_CONFIG_ENABLE_RX_MASK          0x40000000
-#define LLDP_CONFIG_ENABLE_RX_SHIFT         30
+#define LLDP_CONFIG_ENABLE_RX_OFFSET         30
 #define LLDP_CONFIG_ENABLE_TX_MASK          0x80000000
-#define LLDP_CONFIG_ENABLE_TX_SHIFT         31
+#define LLDP_CONFIG_ENABLE_TX_OFFSET         31
 	/* Holds local Chassis ID TLV header, subtype and 9B of payload.
 	 * If firtst byte is 0, then we will use default chassis ID
 	 */
@@ -321,17 +311,17 @@ struct lldp_status_params_s {
 struct dcbx_ets_feature {
 	u32 flags;
 #define DCBX_ETS_ENABLED_MASK                   0x00000001
-#define DCBX_ETS_ENABLED_SHIFT                  0
+#define DCBX_ETS_ENABLED_OFFSET                  0
 #define DCBX_ETS_WILLING_MASK                   0x00000002
-#define DCBX_ETS_WILLING_SHIFT                  1
+#define DCBX_ETS_WILLING_OFFSET                  1
 #define DCBX_ETS_ERROR_MASK                     0x00000004
-#define DCBX_ETS_ERROR_SHIFT                    2
+#define DCBX_ETS_ERROR_OFFSET                    2
 #define DCBX_ETS_CBS_MASK                       0x00000008
-#define DCBX_ETS_CBS_SHIFT                      3
+#define DCBX_ETS_CBS_OFFSET                      3
 #define DCBX_ETS_MAX_TCS_MASK                   0x000000f0
-#define DCBX_ETS_MAX_TCS_SHIFT                  4
+#define DCBX_ETS_MAX_TCS_OFFSET                  4
 #define DCBX_OOO_TC_MASK                        0x00000f00
-#define DCBX_OOO_TC_SHIFT                       8
+#define DCBX_OOO_TC_OFFSET                       8
 /* Entries in tc table are orginized that the left most is pri 0, right most is
  * prio 7
  */
@@ -363,7 +353,7 @@ struct dcbx_ets_feature {
 struct dcbx_app_priority_entry {
 	u32 entry;
 #define DCBX_APP_PRI_MAP_MASK       0x000000ff
-#define DCBX_APP_PRI_MAP_SHIFT      0
+#define DCBX_APP_PRI_MAP_OFFSET      0
 #define DCBX_APP_PRI_0              0x01
 #define DCBX_APP_PRI_1              0x02
 #define DCBX_APP_PRI_2              0x04
@@ -373,11 +363,11 @@ struct dcbx_app_priority_entry {
 #define DCBX_APP_PRI_6              0x40
 #define DCBX_APP_PRI_7              0x80
 #define DCBX_APP_SF_MASK            0x00000300
-#define DCBX_APP_SF_SHIFT           8
+#define DCBX_APP_SF_OFFSET           8
 #define DCBX_APP_SF_ETHTYPE         0
 #define DCBX_APP_SF_PORT            1
 #define DCBX_APP_SF_IEEE_MASK       0x0000f000
-#define DCBX_APP_SF_IEEE_SHIFT      12
+#define DCBX_APP_SF_IEEE_OFFSET      12
 #define DCBX_APP_SF_IEEE_RESERVED   0
 #define DCBX_APP_SF_IEEE_ETHTYPE    1
 #define DCBX_APP_SF_IEEE_TCP_PORT   2
@@ -385,7 +375,7 @@ struct dcbx_app_priority_entry {
 #define DCBX_APP_SF_IEEE_TCP_UDP_PORT 4
 
 #define DCBX_APP_PROTOCOL_ID_MASK   0xffff0000
-#define DCBX_APP_PROTOCOL_ID_SHIFT  16
+#define DCBX_APP_PROTOCOL_ID_OFFSET  16
 };
 
 
@@ -393,19 +383,19 @@ struct dcbx_app_priority_entry {
 struct dcbx_app_priority_feature {
 	u32 flags;
 #define DCBX_APP_ENABLED_MASK           0x00000001
-#define DCBX_APP_ENABLED_SHIFT          0
+#define DCBX_APP_ENABLED_OFFSET          0
 #define DCBX_APP_WILLING_MASK           0x00000002
-#define DCBX_APP_WILLING_SHIFT          1
+#define DCBX_APP_WILLING_OFFSET          1
 #define DCBX_APP_ERROR_MASK             0x00000004
-#define DCBX_APP_ERROR_SHIFT            2
+#define DCBX_APP_ERROR_OFFSET            2
 	/* Not in use
 	#define DCBX_APP_DEFAULT_PRI_MASK       0x00000f00
-	#define DCBX_APP_DEFAULT_PRI_SHIFT      8
+	#define DCBX_APP_DEFAULT_PRI_OFFSET      8
 	*/
 #define DCBX_APP_MAX_TCS_MASK           0x0000f000
-#define DCBX_APP_MAX_TCS_SHIFT          12
+#define DCBX_APP_MAX_TCS_OFFSET          12
 #define DCBX_APP_NUM_ENTRIES_MASK       0x00ff0000
-#define DCBX_APP_NUM_ENTRIES_SHIFT      16
+#define DCBX_APP_NUM_ENTRIES_OFFSET      16
 	struct dcbx_app_priority_entry  app_pri_tbl[DCBX_MAX_APP_PROTOCOL];
 };
 
@@ -416,7 +406,7 @@ struct dcbx_features {
 	/* PFC feature */
 	u32 pfc;
 #define DCBX_PFC_PRI_EN_BITMAP_MASK             0x000000ff
-#define DCBX_PFC_PRI_EN_BITMAP_SHIFT            0
+#define DCBX_PFC_PRI_EN_BITMAP_OFFSET            0
 #define DCBX_PFC_PRI_EN_BITMAP_PRI_0            0x01
 #define DCBX_PFC_PRI_EN_BITMAP_PRI_1            0x02
 #define DCBX_PFC_PRI_EN_BITMAP_PRI_2            0x04
@@ -427,17 +417,17 @@ struct dcbx_features {
 #define DCBX_PFC_PRI_EN_BITMAP_PRI_7            0x80
 
 #define DCBX_PFC_FLAGS_MASK                     0x0000ff00
-#define DCBX_PFC_FLAGS_SHIFT                    8
+#define DCBX_PFC_FLAGS_OFFSET                    8
 #define DCBX_PFC_CAPS_MASK                      0x00000f00
-#define DCBX_PFC_CAPS_SHIFT                     8
+#define DCBX_PFC_CAPS_OFFSET                     8
 #define DCBX_PFC_MBC_MASK                       0x00004000
-#define DCBX_PFC_MBC_SHIFT                      14
+#define DCBX_PFC_MBC_OFFSET                      14
 #define DCBX_PFC_WILLING_MASK                   0x00008000
-#define DCBX_PFC_WILLING_SHIFT                  15
+#define DCBX_PFC_WILLING_OFFSET                  15
 #define DCBX_PFC_ENABLED_MASK                   0x00010000
-#define DCBX_PFC_ENABLED_SHIFT                  16
+#define DCBX_PFC_ENABLED_OFFSET                  16
 #define DCBX_PFC_ERROR_MASK                     0x00020000
-#define DCBX_PFC_ERROR_SHIFT                    17
+#define DCBX_PFC_ERROR_OFFSET                    17
 
 	/* APP feature */
 	struct dcbx_app_priority_feature app;
@@ -446,10 +436,12 @@ struct dcbx_features {
 struct dcbx_local_params {
 	u32 config;
 #define DCBX_CONFIG_VERSION_MASK            0x00000007
-#define DCBX_CONFIG_VERSION_SHIFT           0
+#define DCBX_CONFIG_VERSION_OFFSET           0
 #define DCBX_CONFIG_VERSION_DISABLED        0
 #define DCBX_CONFIG_VERSION_IEEE            1
 #define DCBX_CONFIG_VERSION_CEE             2
+#define DCBX_CONFIG_VERSION_DYNAMIC         \
+	(DCBX_CONFIG_VERSION_IEEE | DCBX_CONFIG_VERSION_CEE)
 #define DCBX_CONFIG_VERSION_STATIC          4
 
 	u32 flags;
@@ -461,7 +453,7 @@ struct dcbx_mib {
 	u32 flags;
 	/*
 	#define DCBX_CONFIG_VERSION_MASK            0x00000007
-	#define DCBX_CONFIG_VERSION_SHIFT           0
+	#define DCBX_CONFIG_VERSION_OFFSET           0
 	#define DCBX_CONFIG_VERSION_DISABLED        0
 	#define DCBX_CONFIG_VERSION_IEEE            1
 	#define DCBX_CONFIG_VERSION_CEE             2
@@ -472,19 +464,49 @@ struct dcbx_mib {
 };
 
 struct lldp_system_tlvs_buffer_s {
-	u16 valid;
-	u16 length;
+	u32 flags;
+#define LLDP_SYSTEM_TLV_VALID_MASK		0x1
+#define LLDP_SYSTEM_TLV_VALID_OFFSET		0
+/* This bit defines if system TLVs are instead of mandatory TLVS or in
+ * addition to them. Set 1 for replacing mandatory TLVs
+ */
+#define LLDP_SYSTEM_TLV_MANDATORY_MASK		0x2
+#define LLDP_SYSTEM_TLV_MANDATORY_OFFSET	1
+#define LLDP_SYSTEM_TLV_LENGTH_MASK		0xffff0000
+#define LLDP_SYSTEM_TLV_LENGTH_OFFSET		16
 	u32 data[MAX_SYSTEM_LLDP_TLV_DATA];
 };
 
+/* Since this struct is written by MFW and read by driver need to add
+ * sequence guards (as in case of DCBX MIB)
+ */
+struct lldp_received_tlvs_s {
+	u32 prefix_seq_num;
+	u32 length;
+	u32 tlvs_buffer[MAX_TLV_BUFFER];
+	u32 suffix_seq_num;
+};
+
 struct dcb_dscp_map {
 	u32 flags;
 #define DCB_DSCP_ENABLE_MASK			0x1
-#define DCB_DSCP_ENABLE_SHIFT			0
+#define DCB_DSCP_ENABLE_OFFSET			0
 #define DCB_DSCP_ENABLE				1
 	u32 dscp_pri_map[8];
 };
 
+/**************************************
+ *     Attributes commands
+ **************************************/
+
+enum _attribute_commands_e {
+	ATTRIBUTE_CMD_READ = 0,
+	ATTRIBUTE_CMD_WRITE,
+	ATTRIBUTE_CMD_READ_CLEAR,
+	ATTRIBUTE_CMD_CLEAR,
+	ATTRIBUTE_NUM_OF_COMMANDS
+};
+
 /**************************************/
 /*                                    */
 /*     P U B L I C      G L O B A L   */
@@ -512,12 +534,12 @@ struct public_global {
 #define MDUMP_REASON_DUMP_AGED		(1 << 2)
 	u32 ext_phy_upgrade_fw;
 #define EXT_PHY_FW_UPGRADE_STATUS_MASK		(0x0000ffff)
-#define EXT_PHY_FW_UPGRADE_STATUS_SHIFT		(0)
+#define EXT_PHY_FW_UPGRADE_STATUS_OFFSET		(0)
 #define EXT_PHY_FW_UPGRADE_STATUS_IN_PROGRESS	(1)
 #define EXT_PHY_FW_UPGRADE_STATUS_FAILED	(2)
 #define EXT_PHY_FW_UPGRADE_STATUS_SUCCESS	(3)
 #define EXT_PHY_FW_UPGRADE_TYPE_MASK		(0xffff0000)
-#define EXT_PHY_FW_UPGRADE_TYPE_SHIFT		(16)
+#define EXT_PHY_FW_UPGRADE_TYPE_OFFSET		(16)
 };
 
 /**************************************/
@@ -567,9 +589,9 @@ struct public_path {
 /* Reset on mcp reset, and incremented for eveny process kill event. */
 	u32 process_kill;
 #define PROCESS_KILL_COUNTER_MASK		0x0000ffff
-#define PROCESS_KILL_COUNTER_SHIFT		0
+#define PROCESS_KILL_COUNTER_OFFSET		0
 #define PROCESS_KILL_GLOB_AEU_BIT_MASK		0xffff0000
-#define PROCESS_KILL_GLOB_AEU_BIT_SHIFT		16
+#define PROCESS_KILL_GLOB_AEU_BIT_OFFSET	16
 #define GLOBAL_AEU_BIT(aeu_reg_id, aeu_bit) (aeu_reg_id * 32 + aeu_bit)
 };
 
@@ -697,6 +719,8 @@ struct public_port {
 #define LFA_SPEED_MISMATCH				(1 << 3)
 #define LFA_FLOW_CTRL_MISMATCH				(1 << 4)
 #define LFA_ADV_SPEED_MISMATCH				(1 << 5)
+#define LFA_EEE_MISMATCH				(1 << 6)
+#define LFA_LINK_MODES_MISMATCH			(1 << 7)
 #define LINK_FLAP_AVOIDANCE_COUNT_OFFSET	8
 #define LINK_FLAP_AVOIDANCE_COUNT_MASK		0x0000ff00
 #define LINK_FLAP_COUNT_OFFSET			16
@@ -721,13 +745,13 @@ struct public_port {
 	u32 fc_npiv_nvram_tbl_size;
 	u32 transceiver_data;
 #define ETH_TRANSCEIVER_STATE_MASK			0x000000FF
-#define ETH_TRANSCEIVER_STATE_SHIFT			0x00000000
+#define ETH_TRANSCEIVER_STATE_OFFSET			0x00000000
 #define ETH_TRANSCEIVER_STATE_UNPLUGGED			0x00000000
 #define ETH_TRANSCEIVER_STATE_PRESENT			0x00000001
 #define ETH_TRANSCEIVER_STATE_VALID			0x00000003
 #define ETH_TRANSCEIVER_STATE_UPDATING			0x00000008
 #define ETH_TRANSCEIVER_TYPE_MASK			0x0000FF00
-#define ETH_TRANSCEIVER_TYPE_SHIFT			0x00000008
+#define ETH_TRANSCEIVER_TYPE_OFFSET			0x00000008
 #define ETH_TRANSCEIVER_TYPE_NONE			0x00000000
 #define ETH_TRANSCEIVER_TYPE_UNKNOWN			0x000000FF
 /* 1G Passive copper cable */
@@ -775,6 +799,7 @@ struct public_port {
 
 #define ETH_TRANSCEIVER_TYPE_4x10G			0x1f
 #define ETH_TRANSCEIVER_TYPE_4x25G_CR			0x20
+#define ETH_TRANSCEIVER_TYPE_1000BASET			0x21
 #define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_SR	0x30
 #define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_CR	0x31
 #define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_LR	0x32
@@ -787,38 +812,55 @@ struct public_port {
 	u32 wol_pkt_details;
 	struct dcb_dscp_map dcb_dscp_map;
 
-	/* the status of EEE auto-negotiation
-	 * bits 19:0 the configured tx-lpi entry timer value. Depends on bit 31.
-	 * bits 23:20 the speeds advertised for EEE.
-	 * bits 27:24 the speeds the Link partner advertised for EEE.
-	 * The supported/adv. modes in bits 27:19 originate from the
-	 * SHMEM_EEE_XXX_ADV definitions (where XXX is replaced by speed).
-	 * bit 28 when 1'b1 EEE was requested.
-	 * bit 29 when 1'b1 tx lpi was requested.
-	 * bit 30 when 1'b1 EEE was negotiated. Tx lpi will be asserted if 30:29
-	 *        are 2'b11.
-	 * bit 31 - When 1'b0 bits 15:0 contain
-	 *          NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_XXX define as value.
-	 *          When 1'b1 those bits contains a value times 16 microseconds.
-	 */
 	u32 eee_status;
-#define EEE_TIMER_MASK		0x000fffff
-#define EEE_ADV_STATUS_MASK	0x00f00000
-#define EEE_1G_ADV	(1 << 1)
-#define EEE_10G_ADV	(1 << 2)
-#define EEE_ADV_STATUS_SHIFT	20
-#define	EEE_LP_ADV_STATUS_MASK	0x0f000000
-#define EEE_LP_ADV_STATUS_SHIFT	24
-#define EEE_REQUESTED_BIT	0x10000000
-#define EEE_LPI_REQUESTED_BIT	0x20000000
-#define EEE_ACTIVE_BIT		0x40000000
-#define EEE_TIME_OUTPUT_BIT	0x80000000
+/* Set when EEE negotiation is complete. */
+#define EEE_ACTIVE_BIT		(1 << 0)
+
+/* Shows the Local Device EEE capabilities */
+#define EEE_LD_ADV_STATUS_MASK	0x000000f0
+#define EEE_LD_ADV_STATUS_OFFSET	4
+	#define EEE_1G_ADV	(1 << 1)
+	#define EEE_10G_ADV	(1 << 2)
+/* Same values as in EEE_LD_ADV, but for Link Parter */
+#define	EEE_LP_ADV_STATUS_MASK	0x00000f00
+#define EEE_LP_ADV_STATUS_OFFSET	8
+
+/* Supported speeds for EEE */
+#define EEE_SUPPORTED_SPEED_MASK	0x0000f000
+#define EEE_SUPPORTED_SPEED_OFFSET	12
+	#define EEE_1G_SUPPORTED	(1 << 1)
+	#define EEE_10G_SUPPORTED	(1 << 2)
 
 	u32 eee_remote;	/* Used for EEE in LLDP */
 #define EEE_REMOTE_TW_TX_MASK	0x0000ffff
-#define EEE_REMOTE_TW_TX_SHIFT	0
+#define EEE_REMOTE_TW_TX_OFFSET	0
 #define EEE_REMOTE_TW_RX_MASK	0xffff0000
-#define EEE_REMOTE_TW_RX_SHIFT	16
+#define EEE_REMOTE_TW_RX_OFFSET	16
+
+	u32 module_info;
+#define ETH_TRANSCEIVER_MONITORING_TYPE_MASK		0x000000FF
+#define ETH_TRANSCEIVER_MONITORING_TYPE_OFFSET		0
+#define ETH_TRANSCEIVER_ADDR_CHNG_REQUIRED		(1 << 2)
+#define ETH_TRANSCEIVER_RCV_PWR_MEASURE_TYPE		(1 << 3)
+#define ETH_TRANSCEIVER_EXTERNALLY_CALIBRATED		(1 << 4)
+#define ETH_TRANSCEIVER_INTERNALLY_CALIBRATED		(1 << 5)
+#define ETH_TRANSCEIVER_HAS_DIAGNOSTIC			(1 << 6)
+#define ETH_TRANSCEIVER_IDENT_MASK			0x0000ff00
+#define ETH_TRANSCEIVER_IDENT_OFFSET			8
+
+	u32 oem_cfg_port;
+#define OEM_CFG_CHANNEL_TYPE_MASK			0x00000003
+#define OEM_CFG_CHANNEL_TYPE_OFFSET			0
+#define OEM_CFG_CHANNEL_TYPE_VLAN_PARTITION		0x1
+#define OEM_CFG_CHANNEL_TYPE_STAGGED			0x2
+
+#define OEM_CFG_SCHED_TYPE_MASK				0x0000000C
+#define OEM_CFG_SCHED_TYPE_OFFSET			2
+#define OEM_CFG_SCHED_TYPE_ETS				0x1
+#define OEM_CFG_SCHED_TYPE_VNIC_BW			0x2
+
+	struct lldp_received_tlvs_s lldp_received_tlvs[LLDP_MAX_LLDP_AGENTS];
+	u32 system_lldp_tlvs_buf2[MAX_SYSTEM_LLDP_TLV_DATA];
 };
 
 /**************************************/
@@ -857,11 +899,11 @@ struct public_func {
 	/* function 0 of each port cannot be hidden */
 #define FUNC_MF_CFG_FUNC_HIDE                   0x00000001
 #define FUNC_MF_CFG_PAUSE_ON_HOST_RING          0x00000002
-#define FUNC_MF_CFG_PAUSE_ON_HOST_RING_SHIFT    0x00000001
+#define FUNC_MF_CFG_PAUSE_ON_HOST_RING_OFFSET    0x00000001
 
 
 #define FUNC_MF_CFG_PROTOCOL_MASK               0x000000f0
-#define FUNC_MF_CFG_PROTOCOL_SHIFT              4
+#define FUNC_MF_CFG_PROTOCOL_OFFSET              4
 #define FUNC_MF_CFG_PROTOCOL_ETHERNET           0x00000000
 #define FUNC_MF_CFG_PROTOCOL_ISCSI              0x00000010
 #define FUNC_MF_CFG_PROTOCOL_FCOE		0x00000020
@@ -871,18 +913,20 @@ struct public_func {
 	/* MINBW, MAXBW */
 	/* value range - 0..100, increments in 1 %  */
 #define FUNC_MF_CFG_MIN_BW_MASK                 0x0000ff00
-#define FUNC_MF_CFG_MIN_BW_SHIFT                8
+#define FUNC_MF_CFG_MIN_BW_OFFSET                8
 #define FUNC_MF_CFG_MIN_BW_DEFAULT              0x00000000
 #define FUNC_MF_CFG_MAX_BW_MASK                 0x00ff0000
-#define FUNC_MF_CFG_MAX_BW_SHIFT                16
+#define FUNC_MF_CFG_MAX_BW_OFFSET                16
 #define FUNC_MF_CFG_MAX_BW_DEFAULT              0x00640000
 
 	u32 status;
-#define FUNC_STATUS_VLINK_DOWN			0x00000001
+#define FUNC_STATUS_VIRTUAL_LINK_UP		0x00000001
+#define FUNC_STATUS_LOGICAL_LINK_UP		0x00000002
+#define FUNC_STATUS_FORCED_LINK			0x00000004
 
 	u32 mac_upper;      /* MAC */
 #define FUNC_MF_CFG_UPPERMAC_MASK               0x0000ffff
-#define FUNC_MF_CFG_UPPERMAC_SHIFT              0
+#define FUNC_MF_CFG_UPPERMAC_OFFSET              0
 #define FUNC_MF_CFG_UPPERMAC_DEFAULT            FUNC_MF_CFG_UPPERMAC_MASK
 	u32 mac_lower;
 #define FUNC_MF_CFG_LOWERMAC_DEFAULT            0xffffffff
@@ -895,7 +939,7 @@ struct public_func {
 
 	u32 ovlan_stag;     /* tags */
 #define FUNC_MF_CFG_OV_STAG_MASK              0x0000ffff
-#define FUNC_MF_CFG_OV_STAG_SHIFT             0
+#define FUNC_MF_CFG_OV_STAG_OFFSET             0
 #define FUNC_MF_CFG_OV_STAG_DEFAULT           FUNC_MF_CFG_OV_STAG_MASK
 
 	u32 pf_allocation; /* vf per pf */
@@ -912,29 +956,46 @@ struct public_func {
 
 	u32 drv_id;
 #define DRV_ID_PDA_COMP_VER_MASK	0x0000ffff
-#define DRV_ID_PDA_COMP_VER_SHIFT	0
+#define DRV_ID_PDA_COMP_VER_OFFSET	0
 
 #define LOAD_REQ_HSI_VERSION		2
 #define DRV_ID_MCP_HSI_VER_MASK		0x00ff0000
-#define DRV_ID_MCP_HSI_VER_SHIFT	16
+#define DRV_ID_MCP_HSI_VER_OFFSET	16
 #define DRV_ID_MCP_HSI_VER_CURRENT	(LOAD_REQ_HSI_VERSION << \
-					 DRV_ID_MCP_HSI_VER_SHIFT)
+					 DRV_ID_MCP_HSI_VER_OFFSET)
 
 #define DRV_ID_DRV_TYPE_MASK		0x7f000000
-#define DRV_ID_DRV_TYPE_SHIFT		24
-#define DRV_ID_DRV_TYPE_UNKNOWN		(0 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_LINUX		(1 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_WINDOWS		(2 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_DIAG		(3 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_PREBOOT		(4 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_SOLARIS		(5 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_VMWARE		(6 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_FREEBSD		(7 << DRV_ID_DRV_TYPE_SHIFT)
-#define DRV_ID_DRV_TYPE_AIX		(8 << DRV_ID_DRV_TYPE_SHIFT)
+#define DRV_ID_DRV_TYPE_OFFSET		24
+#define DRV_ID_DRV_TYPE_UNKNOWN		(0 << DRV_ID_DRV_TYPE_OFFSET)
+#define DRV_ID_DRV_TYPE_LINUX		(1 << DRV_ID_DRV_TYPE_OFFSET)
+#define DRV_ID_DRV_TYPE_WINDOWS		(2 << DRV_ID_DRV_TYPE_OFFSET)
+#define DRV_ID_DRV_TYPE_DIAG		(3 << DRV_ID_DRV_TYPE_OFFSET)
+#define DRV_ID_DRV_TYPE_PREBOOT		(4 << DRV_ID_DRV_TYPE_OFFSET)
+#define DRV_ID_DRV_TYPE_SOLARIS		(5 << DRV_ID_DRV_TYPE_OFFSET)
+#define DRV_ID_DRV_TYPE_VMWARE		(6 << DRV_ID_DRV_TYPE_OFFSET)
+#define DRV_ID_DRV_TYPE_FREEBSD		(7 << DRV_ID_DRV_TYPE_OFFSET)
+#define DRV_ID_DRV_TYPE_AIX		(8 << DRV_ID_DRV_TYPE_OFFSET)
 
 #define DRV_ID_DRV_INIT_HW_MASK		0x80000000
-#define DRV_ID_DRV_INIT_HW_SHIFT	31
-#define DRV_ID_DRV_INIT_HW_FLAG		(1 << DRV_ID_DRV_INIT_HW_SHIFT)
+#define DRV_ID_DRV_INIT_HW_OFFSET	31
+#define DRV_ID_DRV_INIT_HW_FLAG		(1 << DRV_ID_DRV_INIT_HW_OFFSET)
+
+	u32 oem_cfg_func;
+#define OEM_CFG_FUNC_TC_MASK			0x0000000F
+#define OEM_CFG_FUNC_TC_OFFSET			0
+#define OEM_CFG_FUNC_TC_0			0x0
+#define OEM_CFG_FUNC_TC_1			0x1
+#define OEM_CFG_FUNC_TC_2			0x2
+#define OEM_CFG_FUNC_TC_3			0x3
+#define OEM_CFG_FUNC_TC_4			0x4
+#define OEM_CFG_FUNC_TC_5			0x5
+#define OEM_CFG_FUNC_TC_6			0x6
+#define OEM_CFG_FUNC_TC_7			0x7
+
+#define OEM_CFG_FUNC_HOST_PRI_CTRL_MASK		0x00000030
+#define OEM_CFG_FUNC_HOST_PRI_CTRL_OFFSET	4
+#define OEM_CFG_FUNC_HOST_PRI_CTRL_VNIC		0x1
+#define OEM_CFG_FUNC_HOST_PRI_CTRL_OS		0x2
 };
 
 /**************************************/
@@ -1019,13 +1080,13 @@ struct ocbb_data_stc {
 #define MFW_SENSOR_LOCATION_EXTERNAL		2
 #define MFW_SENSOR_LOCATION_SFP			3
 
-#define SENSOR_LOCATION_SHIFT			0
+#define SENSOR_LOCATION_OFFSET			0
 #define SENSOR_LOCATION_MASK			0x000000ff
-#define THRESHOLD_HIGH_SHIFT			8
+#define THRESHOLD_HIGH_OFFSET			8
 #define THRESHOLD_HIGH_MASK			0x0000ff00
-#define CRITICAL_TEMPERATURE_SHIFT		16
+#define CRITICAL_TEMPERATURE_OFFSET		16
 #define CRITICAL_TEMPERATURE_MASK		0x00ff0000
-#define CURRENT_TEMP_SHIFT			24
+#define CURRENT_TEMP_OFFSET			24
 #define CURRENT_TEMP_MASK			0xff000000
 struct temperature_status_stc {
 	u32 num_of_sensors;
@@ -1090,18 +1151,18 @@ struct load_req_stc {
 	u32 fw_ver;
 	u32 misc0;
 #define LOAD_REQ_ROLE_MASK		0x000000FF
-#define LOAD_REQ_ROLE_SHIFT		0
+#define LOAD_REQ_ROLE_OFFSET		0
 #define LOAD_REQ_LOCK_TO_MASK		0x0000FF00
-#define LOAD_REQ_LOCK_TO_SHIFT		8
+#define LOAD_REQ_LOCK_TO_OFFSET		8
 #define LOAD_REQ_LOCK_TO_DEFAULT	0
 #define LOAD_REQ_LOCK_TO_NONE		255
 #define LOAD_REQ_FORCE_MASK		0x000F0000
-#define LOAD_REQ_FORCE_SHIFT		16
+#define LOAD_REQ_FORCE_OFFSET		16
 #define LOAD_REQ_FORCE_NONE		0
 #define LOAD_REQ_FORCE_PF		1
 #define LOAD_REQ_FORCE_ALL		2
 #define LOAD_REQ_FLAGS0_MASK		0x00F00000
-#define LOAD_REQ_FLAGS0_SHIFT		20
+#define LOAD_REQ_FLAGS0_OFFSET		20
 #define LOAD_REQ_FLAGS0_AVOID_RESET	(0x1 << 0)
 };
 
@@ -1111,14 +1172,27 @@ struct load_rsp_stc {
 	u32 fw_ver;
 	u32 misc0;
 #define LOAD_RSP_ROLE_MASK		0x000000FF
-#define LOAD_RSP_ROLE_SHIFT		0
+#define LOAD_RSP_ROLE_OFFSET		0
 #define LOAD_RSP_HSI_MASK		0x0000FF00
-#define LOAD_RSP_HSI_SHIFT		8
+#define LOAD_RSP_HSI_OFFSET		8
 #define LOAD_RSP_FLAGS0_MASK		0x000F0000
-#define LOAD_RSP_FLAGS0_SHIFT		16
+#define LOAD_RSP_FLAGS0_OFFSET		16
 #define LOAD_RSP_FLAGS0_DRV_EXISTS	(0x1 << 0)
 };
 
+struct mdump_retain_data_stc {
+	u32 valid;
+	u32 epoch;
+	u32 pf;
+	u32 status;
+};
+
+struct attribute_cmd_write_stc {
+	u32 val;
+	u32 mask;
+	u32 offset;
+};
+
 union drv_union_data {
 	struct mcp_mac wol_mac; /* UNLOAD_DONE */
 
@@ -1149,6 +1223,8 @@ union drv_union_data {
 
 	struct load_req_stc load_req;
 	struct load_rsp_stc load_rsp;
+	struct mdump_retain_data_stc mdump_retain;
+	struct attribute_cmd_write_stc attribute_cmd_write;
 	/* ... */
 };
 
@@ -1166,8 +1242,8 @@ struct public_drv_mb {
 	/*        - DONT_CARE - Don't flap the link if up */
 #define DRV_MSG_CODE_LINK_RESET			0x23000000
 
-	/* Vitaly: LLDP commands */
 #define DRV_MSG_CODE_SET_LLDP                   0x24000000
+#define DRV_MSG_CODE_REGISTER_LLDP_TLVS_RX      0x24100000
 #define DRV_MSG_CODE_SET_DCBX                   0x25000000
 	/* OneView feature driver HSI*/
 #define DRV_MSG_CODE_OV_UPDATE_CURR_CFG		0x26000000
@@ -1189,18 +1265,19 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_INITIATE_PF_FLR            0x02010000
 #define DRV_MSG_CODE_VF_DISABLED_DONE           0xc0000000
 #define DRV_MSG_CODE_CFG_VF_MSIX                0xc0010000
+#define DRV_MSG_CODE_CFG_PF_VFS_MSIX            0xc0020000
 /* Param is either DRV_MB_PARAM_NVM_PUT_FILE_BEGIN_MFW/IMAGE */
 #define DRV_MSG_CODE_NVM_PUT_FILE_BEGIN		0x00010000
 /* Param should be set to the transaction size (up to 64 bytes) */
 #define DRV_MSG_CODE_NVM_PUT_FILE_DATA		0x00020000
 /* MFW will place the file offset and len in file_att struct */
 #define DRV_MSG_CODE_NVM_GET_FILE_ATT		0x00030000
-/* Read 32bytes of nvram data. Param is [0:23] – Offset [24:31] –
- * Len in Bytes
+/* Read 32bytes of nvram data. Param is [0:23] ??? Offset [24:31] -
+ * ??? Len in Bytes
  */
 #define DRV_MSG_CODE_NVM_READ_NVRAM		0x00050000
-/* Writes up to 32Bytes to nvram. Param is [0:23] – Offset [24:31] –
- * Len in Bytes. In case this address is in the range of secured file in
+/* Writes up to 32Bytes to nvram. Param is [0:23] ??? Offset [24:31]
+ * ??? Len in Bytes. In case this address is in the range of secured file in
  * secured mode, the operation will fail
  */
 #define DRV_MSG_CODE_NVM_WRITE_NVRAM		0x00060000
@@ -1242,7 +1319,7 @@ struct public_drv_mb {
  * [3:0] - func, drv_data[7:0] - MAC/WWNN/WWPN
  */
 #define DRV_MSG_CODE_GET_VMAC                   0x00120000
-#define DRV_MSG_CODE_VMAC_TYPE_SHIFT            4
+#define DRV_MSG_CODE_VMAC_TYPE_OFFSET		4
 #define DRV_MSG_CODE_VMAC_TYPE_MASK             0x30
 #define DRV_MSG_CODE_VMAC_TYPE_MAC              1
 #define DRV_MSG_CODE_VMAC_TYPE_WWNN             2
@@ -1270,9 +1347,9 @@ struct public_drv_mb {
 /* Set function BW, params[15:8] - min, params[7:0] - max */
 #define DRV_MSG_CODE_SET_BW			0x00190000
 #define BW_MAX_MASK				0x000000ff
-#define BW_MAX_SHIFT				0
+#define BW_MAX_OFFSET				0
 #define BW_MIN_MASK				0x0000ff00
-#define BW_MIN_SHIFT				8
+#define BW_MIN_OFFSET				8
 
 /* When param is set to 1, all parities will be masked(disabled). When params
  * are set to 0, parities will be unmasked again.
@@ -1305,9 +1382,9 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_RESOURCE_CMD		0x00230000
 
 #define RESOURCE_CMD_REQ_RESC_MASK		0x0000001F
-#define RESOURCE_CMD_REQ_RESC_SHIFT		0
+#define RESOURCE_CMD_REQ_RESC_OFFSET		0
 #define RESOURCE_CMD_REQ_OPCODE_MASK		0x000000E0
-#define RESOURCE_CMD_REQ_OPCODE_SHIFT		5
+#define RESOURCE_CMD_REQ_OPCODE_OFFSET		5
 /* request resource ownership with default aging */
 #define RESOURCE_OPCODE_REQ			1
 /* request resource ownership without aging */
@@ -1318,12 +1395,12 @@ struct public_drv_mb {
 /* force resource release */
 #define RESOURCE_OPCODE_FORCE_RELEASE		5
 #define RESOURCE_CMD_REQ_AGE_MASK		0x0000FF00
-#define RESOURCE_CMD_REQ_AGE_SHIFT		8
+#define RESOURCE_CMD_REQ_AGE_OFFSET		8
 
 #define RESOURCE_CMD_RSP_OWNER_MASK		0x000000FF
-#define RESOURCE_CMD_RSP_OWNER_SHIFT		0
+#define RESOURCE_CMD_RSP_OWNER_OFFSET		0
 #define RESOURCE_CMD_RSP_OPCODE_MASK		0x00000700
-#define RESOURCE_CMD_RSP_OPCODE_SHIFT		8
+#define RESOURCE_CMD_RSP_OPCODE_OFFSET		8
 /* resource is free and granted to requester */
 #define RESOURCE_OPCODE_GNT			1
 /* resource is busy, param[7:0] indicates owner as follow 0-15 = PF0-15,
@@ -1361,6 +1438,8 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_MDUMP_SET_ENABLE		0x05
 /* Clear all logs */
 #define DRV_MSG_CODE_MDUMP_CLEAR_LOGS		0x06
+#define DRV_MSG_CODE_MDUMP_GET_RETAIN		0x07 /* Get retained data */
+#define DRV_MSG_CODE_MDUMP_CLR_RETAIN		0x08 /* Clear retain data */
 #define DRV_MSG_CODE_MEM_ECC_EVENTS		0x00260000 /* Param: None */
 /* Param: [0:15] - gpio number */
 #define DRV_MSG_CODE_GPIO_INFO			0x00270000
@@ -1368,14 +1447,28 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_EXT_PHY_READ		0x00280000
 /* Value should be placed in union */
 #define DRV_MSG_CODE_EXT_PHY_WRITE		0x00290000
-#define DRV_MB_PARAM_ADDR_SHIFT			0
+#define DRV_MB_PARAM_ADDR_OFFSET			0
 #define DRV_MB_PARAM_ADDR_MASK			0x0000FFFF
-#define DRV_MB_PARAM_DEVAD_SHIFT		16
+#define DRV_MB_PARAM_DEVAD_OFFSET		16
 #define DRV_MB_PARAM_DEVAD_MASK			0x001F0000
-#define DRV_MB_PARAM_PORT_SHIFT			21
+#define DRV_MB_PARAM_PORT_OFFSET			21
 #define DRV_MB_PARAM_PORT_MASK			0x00600000
 #define DRV_MSG_CODE_EXT_PHY_FW_UPGRADE		0x002a0000
 
+#define DRV_MSG_CODE_GET_TLV_DONE		0x002f0000 /* Param: None */
+/* Param: Set DRV_MB_PARAM_FEATURE_SUPPORT_* */
+#define DRV_MSG_CODE_FEATURE_SUPPORT            0x00300000
+/* return FW_MB_PARAM_FEATURE_SUPPORT_*  */
+#define DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT	0x00310000
+#define DRV_MSG_CODE_READ_WOL_REG		0X00320000
+#define DRV_MSG_CODE_WRITE_WOL_REG		0X00330000
+#define DRV_MSG_CODE_GET_WOL_BUFFER		0X00340000
+/* Param: [0:23] Attribute key, [24:31] Attribute sub command */
+#define DRV_MSG_CODE_ATTRIBUTE			0x00350000
+
+/* Param: Password len. Union: Plain Password */
+#define DRV_MSG_CODE_ENCRYPT_PASSWORD		0x00360000
+
 #define DRV_MSG_SEQ_NUMBER_MASK                 0x0000ffff
 
 	u32 drv_mb_param;
@@ -1393,45 +1486,56 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_INIT_PHY_DONT_CARE		0x00000002
 
 	/* LLDP / DCBX params*/
+	/* To be used with SET_LLDP command */
 #define DRV_MB_PARAM_LLDP_SEND_MASK		0x00000001
-#define DRV_MB_PARAM_LLDP_SEND_SHIFT		0
+#define DRV_MB_PARAM_LLDP_SEND_OFFSET		0
+	/* To be used with SET_LLDP and REGISTER_LLDP_TLVS_RX commands */
 #define DRV_MB_PARAM_LLDP_AGENT_MASK		0x00000006
-#define DRV_MB_PARAM_LLDP_AGENT_SHIFT		1
+#define DRV_MB_PARAM_LLDP_AGENT_OFFSET		1
+	/* To be used with REGISTER_LLDP_TLVS_RX command */
+#define DRV_MB_PARAM_LLDP_TLV_RX_VALID_MASK	0x00000001
+#define DRV_MB_PARAM_LLDP_TLV_RX_VALID_OFFSET	0
+#define DRV_MB_PARAM_LLDP_TLV_RX_TYPE_MASK	0x000007f0
+#define DRV_MB_PARAM_LLDP_TLV_RX_TYPE_OFFSET	4
+	/* To be used with SET_DCBX command */
 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK		0x00000008
-#define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT		3
+#define DRV_MB_PARAM_DCBX_NOTIFY_OFFSET		3
 
 #define DRV_MB_PARAM_NIG_DRAIN_PERIOD_MS_MASK	0x000000FF
-#define DRV_MB_PARAM_NIG_DRAIN_PERIOD_MS_SHIFT	0
+#define DRV_MB_PARAM_NIG_DRAIN_PERIOD_MS_OFFSET	0
 
 #define DRV_MB_PARAM_NVM_PUT_FILE_BEGIN_MFW	0x1
 #define DRV_MB_PARAM_NVM_PUT_FILE_BEGIN_IMAGE	0x2
 
-#define DRV_MB_PARAM_NVM_OFFSET_SHIFT		0
+#define DRV_MB_PARAM_NVM_OFFSET_OFFSET		0
 #define DRV_MB_PARAM_NVM_OFFSET_MASK		0x00FFFFFF
-#define DRV_MB_PARAM_NVM_LEN_SHIFT		24
+#define DRV_MB_PARAM_NVM_LEN_OFFSET		24
 #define DRV_MB_PARAM_NVM_LEN_MASK		0xFF000000
 
-#define DRV_MB_PARAM_PHY_ADDR_SHIFT		0
+#define DRV_MB_PARAM_PHY_ADDR_OFFSET		0
 #define DRV_MB_PARAM_PHY_ADDR_MASK		0x1FF0FFFF
-#define DRV_MB_PARAM_PHY_LANE_SHIFT		16
+#define DRV_MB_PARAM_PHY_LANE_OFFSET		16
 #define DRV_MB_PARAM_PHY_LANE_MASK		0x000F0000
-#define DRV_MB_PARAM_PHY_SELECT_PORT_SHIFT	29
+#define DRV_MB_PARAM_PHY_SELECT_PORT_OFFSET	29
 #define DRV_MB_PARAM_PHY_SELECT_PORT_MASK	0x20000000
-#define DRV_MB_PARAM_PHY_PORT_SHIFT		30
+#define DRV_MB_PARAM_PHY_PORT_OFFSET		30
 #define DRV_MB_PARAM_PHY_PORT_MASK		0xc0000000
 
-#define DRV_MB_PARAM_PHYMOD_LANE_SHIFT		0
+#define DRV_MB_PARAM_PHYMOD_LANE_OFFSET		0
 #define DRV_MB_PARAM_PHYMOD_LANE_MASK		0x000000FF
-#define DRV_MB_PARAM_PHYMOD_SIZE_SHIFT		8
+#define DRV_MB_PARAM_PHYMOD_SIZE_OFFSET		8
 #define DRV_MB_PARAM_PHYMOD_SIZE_MASK		0x000FFF00
-	/* configure vf MSIX params*/
-#define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT	0
+	/* configure vf MSIX params BB */
+#define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_OFFSET	0
 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK	0x000000FF
-#define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT	8
+#define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_OFFSET	8
 #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_MASK	0x0000FF00
+	/* configure vf MSIX for PF params AH*/
+#define DRV_MB_PARAM_CFG_PF_VFS_MSIX_SB_NUM_OFFSET	0
+#define DRV_MB_PARAM_CFG_PF_VFS_MSIX_SB_NUM_MASK	0x000000FF
 
 	/* OneView configuration parametres */
-#define DRV_MB_PARAM_OV_CURR_CFG_SHIFT		0
+#define DRV_MB_PARAM_OV_CURR_CFG_OFFSET		0
 #define DRV_MB_PARAM_OV_CURR_CFG_MASK		0x0000000F
 #define DRV_MB_PARAM_OV_CURR_CFG_NONE		0
 #define DRV_MB_PARAM_OV_CURR_CFG_OS			1
@@ -1442,7 +1546,7 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_OV_CURR_CFG_DCI		6
 #define DRV_MB_PARAM_OV_CURR_CFG_HII		7
 
-#define DRV_MB_PARAM_OV_UPDATE_BOOT_PROG_SHIFT				0
+#define DRV_MB_PARAM_OV_UPDATE_BOOT_PROG_OFFSET				0
 #define DRV_MB_PARAM_OV_UPDATE_BOOT_PROG_MASK			0x000000FF
 #define DRV_MB_PARAM_OV_UPDATE_BOOT_PROG_NONE				(1 << 0)
 #define DRV_MB_PARAM_OV_UPDATE_BOOT_PROG_ISCSI_IP_ACQUIRED		(1 << 1)
@@ -1455,17 +1559,17 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_OV_UPDATE_BOOT_PROG_OS_HANDOFF			(1 << 6)
 #define DRV_MB_PARAM_OV_UPDATE_BOOT_COMPLETED				0
 
-#define DRV_MB_PARAM_OV_PCI_BUS_NUM_SHIFT				0
+#define DRV_MB_PARAM_OV_PCI_BUS_NUM_OFFSET				0
 #define DRV_MB_PARAM_OV_PCI_BUS_NUM_MASK		0x000000FF
 
-#define DRV_MB_PARAM_OV_STORM_FW_VER_SHIFT		0
+#define DRV_MB_PARAM_OV_STORM_FW_VER_OFFSET		0
 #define DRV_MB_PARAM_OV_STORM_FW_VER_MASK			0xFFFFFFFF
 #define DRV_MB_PARAM_OV_STORM_FW_VER_MAJOR_MASK		0xFF000000
 #define DRV_MB_PARAM_OV_STORM_FW_VER_MINOR_MASK		0x00FF0000
 #define DRV_MB_PARAM_OV_STORM_FW_VER_BUILD_MASK		0x0000FF00
 #define DRV_MB_PARAM_OV_STORM_FW_VER_DROP_MASK		0x000000FF
 
-#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_SHIFT		0
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_OFFSET		0
 #define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_MASK		0xF
 #define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_UNKNOWN		0x1
 /* Not Installed*/
@@ -1476,36 +1580,36 @@ struct public_drv_mb {
 /* installed and active */
 #define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE		0x5
 
-#define DRV_MB_PARAM_OV_MTU_SIZE_SHIFT		0
+#define DRV_MB_PARAM_OV_MTU_SIZE_OFFSET		0
 #define DRV_MB_PARAM_OV_MTU_SIZE_MASK		0xFFFFFFFF
 
 #define DRV_MB_PARAM_SET_LED_MODE_OPER		0x0
 #define DRV_MB_PARAM_SET_LED_MODE_ON		0x1
 #define DRV_MB_PARAM_SET_LED_MODE_OFF		0x2
 
-#define DRV_MB_PARAM_TRANSCEIVER_PORT_SHIFT		0
+#define DRV_MB_PARAM_TRANSCEIVER_PORT_OFFSET		0
 #define DRV_MB_PARAM_TRANSCEIVER_PORT_MASK		0x00000003
-#define DRV_MB_PARAM_TRANSCEIVER_SIZE_SHIFT		2
+#define DRV_MB_PARAM_TRANSCEIVER_SIZE_OFFSET		2
 #define DRV_MB_PARAM_TRANSCEIVER_SIZE_MASK		0x000000FC
-#define DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_SHIFT	8
+#define DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_OFFSET	8
 #define DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK	0x0000FF00
-#define DRV_MB_PARAM_TRANSCEIVER_OFFSET_SHIFT		16
+#define DRV_MB_PARAM_TRANSCEIVER_OFFSET_OFFSET		16
 #define DRV_MB_PARAM_TRANSCEIVER_OFFSET_MASK		0xFFFF0000
 
-#define DRV_MB_PARAM_GPIO_NUMBER_SHIFT		0
+#define DRV_MB_PARAM_GPIO_NUMBER_OFFSET		0
 #define DRV_MB_PARAM_GPIO_NUMBER_MASK		0x0000FFFF
-#define DRV_MB_PARAM_GPIO_VALUE_SHIFT		16
+#define DRV_MB_PARAM_GPIO_VALUE_OFFSET		16
 #define DRV_MB_PARAM_GPIO_VALUE_MASK		0xFFFF0000
-#define DRV_MB_PARAM_GPIO_DIRECTION_SHIFT	16
+#define DRV_MB_PARAM_GPIO_DIRECTION_OFFSET	16
 #define DRV_MB_PARAM_GPIO_DIRECTION_MASK	0x00FF0000
-#define DRV_MB_PARAM_GPIO_CTRL_SHIFT		24
+#define DRV_MB_PARAM_GPIO_CTRL_OFFSET		24
 #define DRV_MB_PARAM_GPIO_CTRL_MASK		0xFF000000
 
 	/* Resource Allocation params - Driver version support*/
 #define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK	0xFFFF0000
-#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT		16
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_OFFSET		16
 #define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_MASK	0x0000FFFF
-#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT		0
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_OFFSET		0
 
 #define DRV_MB_PARAM_BIST_UNKNOWN_TEST		0
 #define DRV_MB_PARAM_BIST_REGISTER_TEST		1
@@ -1518,11 +1622,27 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_BIST_RC_FAILED		2
 #define DRV_MB_PARAM_BIST_RC_INVALID_PARAMETER		3
 
-#define DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT      0
+#define DRV_MB_PARAM_BIST_TEST_INDEX_OFFSET      0
 #define DRV_MB_PARAM_BIST_TEST_INDEX_MASK       0x000000FF
-#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT      8
+#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_OFFSET      8
 #define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_MASK       0x0000FF00
 
+#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_MASK      0x0000FFFF
+#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_OFFSET     0
+/* driver supports SmartLinQ parameter */
+#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_SMARTLINQ 0x00000001
+/* driver supports EEE parameter */
+#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE       0x00000002
+#define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_MASK      0xFFFF0000
+#define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_OFFSET     16
+/* driver supports virtual link parameter */
+#define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK     0x00010000
+	/* Driver attributes params */
+#define DRV_MB_PARAM_ATTRIBUTE_KEY_OFFSET		 0
+#define DRV_MB_PARAM_ATTRIBUTE_KEY_MASK		0x00FFFFFF
+#define DRV_MB_PARAM_ATTRIBUTE_CMD_OFFSET		24
+#define DRV_MB_PARAM_ATTRIBUTE_CMD_MASK		0xFF000000
+
 	u32 fw_mb_header;
 #define FW_MSG_CODE_MASK                        0xffff0000
 #define FW_MSG_CODE_UNSUPPORTED			0x00000000
@@ -1545,6 +1665,7 @@ struct public_drv_mb {
 #define FW_MSG_CODE_LINK_RESET_DONE		0x23000000
 #define FW_MSG_CODE_SET_LLDP_DONE               0x24000000
 #define FW_MSG_CODE_SET_LLDP_UNSUPPORTED_AGENT  0x24010000
+#define FW_MSG_CODE_REGISTER_LLDP_TLVS_RX_DONE  0x24100000
 #define FW_MSG_CODE_SET_DCBX_DONE               0x25000000
 #define FW_MSG_CODE_UPDATE_CURR_CFG_DONE        0x26000000
 #define FW_MSG_CODE_UPDATE_BUS_NUM_DONE         0x27000000
@@ -1597,6 +1718,7 @@ struct public_drv_mb {
 #define FW_MSG_CODE_SET_SECURE_MODE_OK		0x00140000
 #define FW_MSG_MODE_PHY_PRIVILEGE_ERROR		0x00150000
 #define FW_MSG_CODE_OK				0x00160000
+#define FW_MSG_CODE_ERROR			0x00170000
 #define FW_MSG_CODE_LED_MODE_INVALID		0x00170000
 #define FW_MSG_CODE_PHY_DIAG_OK			0x00160000
 #define FW_MSG_CODE_PHY_DIAG_ERROR		0x00170000
@@ -1628,16 +1750,37 @@ struct public_drv_mb {
 #define FW_MSG_CODE_MDUMP_IN_PROGRESS		0x00040000
 #define FW_MSG_CODE_MDUMP_WRITE_FAILED		0x00050000
 
+
+#define FW_MSG_CODE_DRV_CFG_PF_VFS_MSIX_DONE     0x00870000
+#define FW_MSG_CODE_DRV_CFG_PF_VFS_MSIX_BAD_ASIC 0x00880000
+
+#define FW_MSG_CODE_WOL_READ_WRITE_OK		0x00820000
+#define FW_MSG_CODE_WOL_READ_WRITE_INVALID_VAL	0x00830000
+#define FW_MSG_CODE_WOL_READ_WRITE_INVALID_ADDR	0x00840000
+#define FW_MSG_CODE_WOL_READ_BUFFER_OK		0x00850000
+#define FW_MSG_CODE_WOL_READ_BUFFER_INVALID_VAL	0x00860000
+
 #define FW_MSG_SEQ_NUMBER_MASK                  0x0000ffff
 
+#define FW_MSG_CODE_ATTRIBUTE_INVALID_KEY	0x00020000
+#define FW_MSG_CODE_ATTRIBUTE_INVALID_CMD	0x00030000
 
 	u32 fw_mb_param;
 /* Resource Allocation params - MFW  version support */
 #define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK	0xFFFF0000
-#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT		16
+#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_OFFSET		16
 #define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_MASK	0x0000FFFF
-#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT		0
+#define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_OFFSET		0
+
+/* get MFW feature support response */
+/* MFW supports SmartLinQ */
+#define FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ   0x00000001
+/* MFW supports EEE */
+#define FW_MB_PARAM_FEATURE_SUPPORT_EEE         0x00000002
+/* MFW supports virtual link */
+#define FW_MB_PARAM_FEATURE_SUPPORT_VLINK       0x00010000
 
+#define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR	(1 << 0)
 
 	u32 drv_pulse_mb;
 #define DRV_PULSE_SEQ_MASK                      0x00007fff
@@ -1702,6 +1845,9 @@ enum MFW_DRV_MSG_TYPE {
 	MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
 	MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED,
 	MFW_DRV_MSG_EEE_NEGOTIATION_COMPLETE,
+	MFW_DRV_MSG_GET_TLV_REQ,
+	MFW_DRV_MSG_OEM_CFG_UPDATE,
+	MFW_DRV_MSG_LLDP_RECEIVED_TLVS_UPDATED,
 	MFW_DRV_MSG_MAX
 };
 
diff --git a/drivers/net/qede/base/nvm_cfg.h b/drivers/net/qede/base/nvm_cfg.h
index 4e588350..c99e805d 100644
--- a/drivers/net/qede/base/nvm_cfg.h
+++ b/drivers/net/qede/base/nvm_cfg.h
@@ -13,20 +13,20 @@
  * Description: NVM config file - Generated file from nvm cfg excel.
  *              DO NOT MODIFY !!!
  *
- * Created:     12/15/2016
+ * Created:     5/8/2017
  *
  ****************************************************************************/
 
 #ifndef NVM_CFG_H
 #define NVM_CFG_H
 
-#define NVM_CFG_version 0x81805
+#define NVM_CFG_version 0x83000
 
-#define NVM_CFG_new_option_seq 15
+#define NVM_CFG_new_option_seq 23
 
-#define NVM_CFG_removed_option_seq 0
+#define NVM_CFG_removed_option_seq 1
 
-#define NVM_CFG_updated_value_seq 1
+#define NVM_CFG_updated_value_seq 4
 
 struct nvm_cfg_mac_address {
 	u32 mac_addr_hi;
@@ -342,9 +342,8 @@ struct nvm_cfg1_glob {
 		#define NVM_CFG1_GLOB_VENDOR_ID_MASK 0x0000FFFF
 		#define NVM_CFG1_GLOB_VENDOR_ID_OFFSET 0
 	/*  Set caution temperature */
-		#define NVM_CFG1_GLOB_CAUTION_THRESHOLD_TEMPERATURE_MASK \
-			0x00FF0000
-		#define NVM_CFG1_GLOB_CAUTION_THRESHOLD_TEMPERATURE_OFFSET 16
+		#define NVM_CFG1_GLOB_DEAD_TEMP_TH_TEMPERATURE_MASK 0x00FF0000
+		#define NVM_CFG1_GLOB_DEAD_TEMP_TH_TEMPERATURE_OFFSET 16
 	/*  Set external thermal sensor I2C address */
 		#define NVM_CFG1_GLOB_EXTERNAL_THERMAL_SENSOR_ADDRESS_MASK \
 			0xFF000000
@@ -509,6 +508,10 @@ struct nvm_cfg1_glob {
 		#define NVM_CFG1_GLOB_PF_MAPPING_OFFSET 26
 		#define NVM_CFG1_GLOB_PF_MAPPING_CONTINUOUS 0x0
 		#define NVM_CFG1_GLOB_PF_MAPPING_FIXED 0x1
+		#define NVM_CFG1_GLOB_VOLTAGE_REGULATOR_TYPE_MASK 0x30000000
+		#define NVM_CFG1_GLOB_VOLTAGE_REGULATOR_TYPE_OFFSET 28
+		#define NVM_CFG1_GLOB_VOLTAGE_REGULATOR_TYPE_DISABLED 0x0
+		#define NVM_CFG1_GLOB_VOLTAGE_REGULATOR_TYPE_TI 0x1
 	u32 led_global_settings; /* 0x74 */
 		#define NVM_CFG1_GLOB_LED_SWAP_0_MASK 0x0000000F
 		#define NVM_CFG1_GLOB_LED_SWAP_0_OFFSET 0
@@ -1036,7 +1039,13 @@ struct nvm_cfg1_glob {
 		#define NVM_CFG1_GLOB_THERMAL_ALARM_GPIO_GPIO29 0x1E
 		#define NVM_CFG1_GLOB_THERMAL_ALARM_GPIO_GPIO30 0x1F
 		#define NVM_CFG1_GLOB_THERMAL_ALARM_GPIO_GPIO31 0x20
-	u32 reserved[58]; /* 0x140 */
+	u32 preboot_debug_mode_std; /* 0x140 */
+	u32 preboot_debug_mode_ext; /* 0x144 */
+	u32 ext_phy_cfg1; /* 0x148 */
+	/*  Ext PHY MDI pair swap value */
+		#define NVM_CFG1_GLOB_EXT_PHY_MDI_PAIR_SWAP_MASK 0x0000FFFF
+		#define NVM_CFG1_GLOB_EXT_PHY_MDI_PAIR_SWAP_OFFSET 0
+	u32 reserved[55]; /* 0x14C */
 };
 
 struct nvm_cfg1_path {
@@ -1134,6 +1143,7 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_OFFSET 0
 		#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G 0x1
 		#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G 0x2
+		#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G 0x4
 		#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G 0x8
 		#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G 0x10
 		#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G 0x20
@@ -1142,6 +1152,7 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_OFFSET 16
 		#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MFW_SPEED_CAPABILITY_MASK_50G 0x20
@@ -1152,11 +1163,11 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_DRV_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_DRV_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_DRV_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_DRV_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_DRV_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_DRV_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_DRV_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_DRV_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_DRV_LINK_SPEED_SMARTLINQ 0x8
 		#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK 0x00000070
 		#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_OFFSET 4
 		#define NVM_CFG1_PORT_DRV_FLOW_CONTROL_AUTONEG 0x1
@@ -1167,11 +1178,11 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MFW_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MFW_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MFW_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MFW_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MFW_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MFW_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MFW_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MFW_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MFW_LINK_SPEED_SMARTLINQ 0x8
 		#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_MASK 0x00003800
 		#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_OFFSET 11
 		#define NVM_CFG1_PORT_MFW_FLOW_CONTROL_AUTONEG 0x1
@@ -1203,6 +1214,14 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_FEC_AN_MODE_25G_RS 0x4
 		#define NVM_CFG1_PORT_FEC_AN_MODE_25G_FIRECODE_AND_RS 0x5
 		#define NVM_CFG1_PORT_FEC_AN_MODE_ALL 0x6
+		#define NVM_CFG1_PORT_SMARTLINQ_MODE_MASK 0x00800000
+		#define NVM_CFG1_PORT_SMARTLINQ_MODE_OFFSET 23
+		#define NVM_CFG1_PORT_SMARTLINQ_MODE_DISABLED 0x0
+		#define NVM_CFG1_PORT_SMARTLINQ_MODE_ENABLED 0x1
+		#define NVM_CFG1_PORT_RESERVED_WAS_MFW_SMARTLINQ_MASK 0x01000000
+		#define NVM_CFG1_PORT_RESERVED_WAS_MFW_SMARTLINQ_OFFSET 24
+		#define NVM_CFG1_PORT_RESERVED_WAS_MFW_SMARTLINQ_DISABLED 0x0
+		#define NVM_CFG1_PORT_RESERVED_WAS_MFW_SMARTLINQ_ENABLED 0x1
 	u32 phy_cfg; /* 0x1C */
 		#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_MASK 0x0000FFFF
 		#define NVM_CFG1_PORT_OPTIONAL_LINK_MODES_OFFSET 0
@@ -1243,6 +1262,7 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_OFFSET 0
 		#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_NONE 0x0
 		#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_BCM8485X 0x1
+		#define NVM_CFG1_PORT_EXTERNAL_PHY_TYPE_BCM5422X 0x2
 		#define NVM_CFG1_PORT_EXTERNAL_PHY_ADDRESS_MASK 0x0000FF00
 		#define NVM_CFG1_PORT_EXTERNAL_PHY_ADDRESS_OFFSET 8
 	/*  EEE power saving mode */
@@ -1276,19 +1296,27 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_PREBOOT_LINK_SPEED_SMARTLINQ 0x8
 		#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_RETRY_COUNT_MASK \
 			0x00E00000
 		#define NVM_CFG1_PORT_RESERVED__M_MBA_BOOT_RETRY_COUNT_OFFSET 21
+		#define NVM_CFG1_PORT_RESERVED_WAS_PREBOOT_SMARTLINQ_MASK \
+			0x01000000
+		#define NVM_CFG1_PORT_RESERVED_WAS_PREBOOT_SMARTLINQ_OFFSET 24
+		#define NVM_CFG1_PORT_RESERVED_WAS_PREBOOT_SMARTLINQ_DISABLED \
+			0x0
+		#define NVM_CFG1_PORT_RESERVED_WAS_PREBOOT_SMARTLINQ_ENABLED 0x1
 	u32 mba_cfg2; /* 0x2C */
 		#define NVM_CFG1_PORT_RESERVED65_MASK 0x0000FFFF
 		#define NVM_CFG1_PORT_RESERVED65_OFFSET 0
 		#define NVM_CFG1_PORT_RESERVED66_MASK 0x00010000
 		#define NVM_CFG1_PORT_RESERVED66_OFFSET 16
+		#define NVM_CFG1_PORT_PREBOOT_LINK_UP_DELAY_MASK 0x01FE0000
+		#define NVM_CFG1_PORT_PREBOOT_LINK_UP_DELAY_OFFSET 17
 	u32 vf_cfg; /* 0x30 */
 		#define NVM_CFG1_PORT_RESERVED8_MASK 0x0000FFFF
 		#define NVM_CFG1_PORT_RESERVED8_OFFSET 0
@@ -1304,9 +1332,12 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_LANE_LED_SPD_2_SEL_OFFSET 16
 		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_1G 0x1
 		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_10G 0x2
-		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_25G 0x8
-		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_40G 0x10
-		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_50G 0x20
+		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_AH_25G 0x4
+		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_BB_25G 0x8
+		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_AH_40G 0x8
+		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_BB_40G 0x10
+		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_AH_50G 0x10
+		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_BB_50G 0x20
 		#define NVM_CFG1_PORT_LANE_LED_SPD__SEL_BB_100G 0x40
 	u32 transceiver_00; /* 0x40 */
 	/*  Define for mapping of transceiver signal module absent */
@@ -1412,6 +1443,7 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_10G_DRV_SPEED_CAPABILITY_MASK_OFFSET 0
 		#define NVM_CFG1_PORT_MNM_10G_DRV_SPEED_CAPABILITY_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MNM_10G_DRV_SPEED_CAPABILITY_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MNM_10G_DRV_SPEED_CAPABILITY_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MNM_10G_DRV_SPEED_CAPABILITY_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MNM_10G_DRV_SPEED_CAPABILITY_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MNM_10G_DRV_SPEED_CAPABILITY_MASK_50G 0x20
@@ -1423,6 +1455,7 @@ struct nvm_cfg1_port {
 			16
 		#define NVM_CFG1_PORT_MNM_10G_MFW_SPEED_CAPABILITY_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MNM_10G_MFW_SPEED_CAPABILITY_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MNM_10G_MFW_SPEED_CAPABILITY_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MNM_10G_MFW_SPEED_CAPABILITY_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MNM_10G_MFW_SPEED_CAPABILITY_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MNM_10G_MFW_SPEED_CAPABILITY_MASK_50G 0x20
@@ -1434,21 +1467,21 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_10G_DRV_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MNM_10G_DRV_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MNM_10G_DRV_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MNM_10G_DRV_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MNM_10G_DRV_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MNM_10G_DRV_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MNM_10G_DRV_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MNM_10G_DRV_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MNM_10G_DRV_LINK_SPEED_SMARTLINQ 0x8
 		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_MASK 0x000000F0
 		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_OFFSET 4
 		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MNM_10G_MFW_LINK_SPEED_SMARTLINQ 0x8
 	/*  This field defines the board technology
 	 * (backpane,transceiver,external PHY)
 	*/
@@ -1490,6 +1523,7 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_25G_DRV_SPEED_CAPABILITY_MASK_OFFSET 0
 		#define NVM_CFG1_PORT_MNM_25G_DRV_SPEED_CAPABILITY_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MNM_25G_DRV_SPEED_CAPABILITY_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MNM_25G_DRV_SPEED_CAPABILITY_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MNM_25G_DRV_SPEED_CAPABILITY_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MNM_25G_DRV_SPEED_CAPABILITY_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MNM_25G_DRV_SPEED_CAPABILITY_MASK_50G 0x20
@@ -1501,6 +1535,7 @@ struct nvm_cfg1_port {
 			16
 		#define NVM_CFG1_PORT_MNM_25G_MFW_SPEED_CAPABILITY_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MNM_25G_MFW_SPEED_CAPABILITY_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MNM_25G_MFW_SPEED_CAPABILITY_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MNM_25G_MFW_SPEED_CAPABILITY_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MNM_25G_MFW_SPEED_CAPABILITY_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MNM_25G_MFW_SPEED_CAPABILITY_MASK_50G 0x20
@@ -1512,21 +1547,21 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_25G_DRV_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MNM_25G_DRV_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MNM_25G_DRV_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MNM_25G_DRV_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MNM_25G_DRV_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MNM_25G_DRV_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MNM_25G_DRV_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MNM_25G_DRV_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MNM_25G_DRV_LINK_SPEED_SMARTLINQ 0x8
 		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_MASK 0x000000F0
 		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_OFFSET 4
 		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MNM_25G_MFW_LINK_SPEED_SMARTLINQ 0x8
 	/*  This field defines the board technology
 	 * (backpane,transceiver,external PHY)
 	*/
@@ -1568,6 +1603,7 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_40G_DRV_SPEED_CAPABILITY_MASK_OFFSET 0
 		#define NVM_CFG1_PORT_MNM_40G_DRV_SPEED_CAPABILITY_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MNM_40G_DRV_SPEED_CAPABILITY_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MNM_40G_DRV_SPEED_CAPABILITY_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MNM_40G_DRV_SPEED_CAPABILITY_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MNM_40G_DRV_SPEED_CAPABILITY_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MNM_40G_DRV_SPEED_CAPABILITY_MASK_50G 0x20
@@ -1579,6 +1615,7 @@ struct nvm_cfg1_port {
 			16
 		#define NVM_CFG1_PORT_MNM_40G_MFW_SPEED_CAPABILITY_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MNM_40G_MFW_SPEED_CAPABILITY_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MNM_40G_MFW_SPEED_CAPABILITY_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MNM_40G_MFW_SPEED_CAPABILITY_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MNM_40G_MFW_SPEED_CAPABILITY_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MNM_40G_MFW_SPEED_CAPABILITY_MASK_50G 0x20
@@ -1590,21 +1627,21 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_40G_DRV_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MNM_40G_DRV_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MNM_40G_DRV_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MNM_40G_DRV_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MNM_40G_DRV_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MNM_40G_DRV_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MNM_40G_DRV_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MNM_40G_DRV_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MNM_40G_DRV_LINK_SPEED_SMARTLINQ 0x8
 		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_MASK 0x000000F0
 		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_OFFSET 4
 		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MNM_40G_MFW_LINK_SPEED_SMARTLINQ 0x8
 	/*  This field defines the board technology
 	 * (backpane,transceiver,external PHY)
 	*/
@@ -1646,6 +1683,7 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_50G_DRV_SPEED_CAPABILITY_MASK_OFFSET 0
 		#define NVM_CFG1_PORT_MNM_50G_DRV_SPEED_CAPABILITY_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MNM_50G_DRV_SPEED_CAPABILITY_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MNM_50G_DRV_SPEED_CAPABILITY_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MNM_50G_DRV_SPEED_CAPABILITY_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MNM_50G_DRV_SPEED_CAPABILITY_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MNM_50G_DRV_SPEED_CAPABILITY_MASK_50G 0x20
@@ -1658,6 +1696,7 @@ struct nvm_cfg1_port {
 			16
 		#define NVM_CFG1_PORT_MNM_50G_MFW_SPEED_CAPABILITY_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MNM_50G_MFW_SPEED_CAPABILITY_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MNM_50G_MFW_SPEED_CAPABILITY_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MNM_50G_MFW_SPEED_CAPABILITY_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MNM_50G_MFW_SPEED_CAPABILITY_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MNM_50G_MFW_SPEED_CAPABILITY_MASK_50G 0x20
@@ -1670,21 +1709,21 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_50G_DRV_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MNM_50G_DRV_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MNM_50G_DRV_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MNM_50G_DRV_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MNM_50G_DRV_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MNM_50G_DRV_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MNM_50G_DRV_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MNM_50G_DRV_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MNM_50G_DRV_LINK_SPEED_SMARTLINQ 0x8
 		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_MASK 0x000000F0
 		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_OFFSET 4
 		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MNM_50G_MFW_LINK_SPEED_SMARTLINQ 0x8
 	/*  This field defines the board technology
 	 * (backpane,transceiver,external PHY)
 	*/
@@ -1726,6 +1765,7 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_100G_DRV_SPEED_CAP_MASK_OFFSET 0
 		#define NVM_CFG1_PORT_MNM_100G_DRV_SPEED_CAP_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MNM_100G_DRV_SPEED_CAP_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MNM_100G_DRV_SPEED_CAP_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MNM_100G_DRV_SPEED_CAP_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MNM_100G_DRV_SPEED_CAP_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MNM_100G_DRV_SPEED_CAP_MASK_50G 0x20
@@ -1735,6 +1775,7 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_100G_MFW_SPEED_CAP_MASK_OFFSET 16
 		#define NVM_CFG1_PORT_MNM_100G_MFW_SPEED_CAP_MASK_1G 0x1
 		#define NVM_CFG1_PORT_MNM_100G_MFW_SPEED_CAP_MASK_10G 0x2
+		#define NVM_CFG1_PORT_MNM_100G_MFW_SPEED_CAP_MASK_20G 0x4
 		#define NVM_CFG1_PORT_MNM_100G_MFW_SPEED_CAP_MASK_25G 0x8
 		#define NVM_CFG1_PORT_MNM_100G_MFW_SPEED_CAP_MASK_40G 0x10
 		#define NVM_CFG1_PORT_MNM_100G_MFW_SPEED_CAP_MASK_50G 0x20
@@ -1745,21 +1786,21 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_100G_DRV_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MNM_100G_DRV_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MNM_100G_DRV_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MNM_100G_DRV_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MNM_100G_DRV_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MNM_100G_DRV_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MNM_100G_DRV_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MNM_100G_DRV_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MNM_100G_DRV_LINK_SPEED_SMARTLINQ 0x8
 		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_MASK 0x000000F0
 		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_OFFSET 4
 		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_AUTONEG 0x0
 		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_1G 0x1
 		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_10G 0x2
+		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_20G 0x3
 		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_25G 0x4
 		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_40G 0x5
 		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_50G 0x6
 		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_BB_100G 0x7
-		#define NVM_CFG1_PORT_MNM_100G_MFW_LINK_SPEED_SMARTLINQ 0x8
 	/*  This field defines the board technology
 	 * (backpane,transceiver,external PHY)
 	*/
@@ -1795,7 +1836,13 @@ struct nvm_cfg1_port {
 		#define NVM_CFG1_PORT_MNM_100G_FEC_FORCE_MODE_FIRECODE 0x1
 		#define NVM_CFG1_PORT_MNM_100G_FEC_FORCE_MODE_RS 0x2
 		#define NVM_CFG1_PORT_MNM_100G_FEC_FORCE_MODE_AUTO 0x7
-	u32 reserved[116]; /* 0x88 */
+	u32 temperature; /* 0x88 */
+		#define NVM_CFG1_PORT_PHY_MODULE_DEAD_TEMP_TH_MASK 0x000000FF
+		#define NVM_CFG1_PORT_PHY_MODULE_DEAD_TEMP_TH_OFFSET 0
+		#define NVM_CFG1_PORT_PHY_MODULE_ALOM_FAN_ON_TEMP_TH_MASK \
+			0x0000FF00
+		#define NVM_CFG1_PORT_PHY_MODULE_ALOM_FAN_ON_TEMP_TH_OFFSET 8
+	u32 reserved[115]; /* 0x8C */
 };
 
 struct nvm_cfg1_func {
@@ -1910,6 +1957,7 @@ struct nvm_cfg1_func {
 		#define NVM_CFG1_FUNC_NPAR_ENABLED_PROTOCOL_ETHERNET 0x1
 		#define NVM_CFG1_FUNC_NPAR_ENABLED_PROTOCOL_FCOE 0x2
 		#define NVM_CFG1_FUNC_NPAR_ENABLED_PROTOCOL_ISCSI 0x4
+		#define NVM_CFG1_FUNC_NPAR_ENABLED_PROTOCOL_RDMA 0x8
 	u32 reserved[8]; /* 0x30 */
 };
 
diff --git a/drivers/net/qede/base/reg_addr.h b/drivers/net/qede/base/reg_addr.h
index 60286545..ad15d28a 100644
--- a/drivers/net/qede/base/reg_addr.h
+++ b/drivers/net/qede/base/reg_addr.h
@@ -1205,3 +1205,20 @@
 #define NIG_REG_LLH_FUNC_FILTER_EN_BB_K2 0x501a80UL
 #define NIG_REG_LLH_FUNC_FILTER_MODE_BB_K2 0x501ac0UL
 #define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_BB_K2 0x501b00UL
+
+#define PSWRQ2_REG_WR_MBS0 0x240400UL
+#define PGLUE_B_REG_MASTER_WRITE_PAD_ENABLE 0x2aae30UL
+#define DORQ_REG_PF_USAGE_CNT 0x1009c0UL
+#define DORQ_REG_DPM_FORCE_ABORT 0x1009d8UL
+#define DORQ_REG_PF_OVFL_STICKY 0x1009d0UL
+#define DORQ_REG_INT_STS 0x100180UL
+  #define DORQ_REG_INT_STS_DB_DROP (0x1 << 1)
+  #define DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR (0x1 << 2)
+  #define DORQ_REG_INT_STS_DORQ_FIFO_AFULL (0x1 << 3)
+#define DORQ_REG_DB_DROP_DETAILS_REL 0x100a28UL
+#define DORQ_REG_INT_STS_WR 0x100188UL
+#define DORQ_REG_DB_DROP_DETAILS_REASON 0x100a20UL
+#define MCP_REG_CPU_PROGRAM_COUNTER 0xe0501cUL
+  #define MCP_REG_CPU_STATE_SOFT_HALTED (0x1 << 10)
+#define PRS_REG_SEARCH_TENANT_ID 0x1f044cUL
+#define PGLUE_B_REG_VF_BAR1_SIZE 0x2aae68UL
diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
index 0e059898..88321451 100644
--- a/drivers/net/qede/qede_ethdev.c
+++ b/drivers/net/qede/qede_ethdev.c
@@ -453,6 +453,12 @@ int qede_activate_vport(struct rte_eth_dev *eth_dev, bool flg)
 	params.update_vport_active_tx_flg = 1;
 	params.vport_active_rx_flg = flg;
 	params.vport_active_tx_flg = flg;
+#ifndef RTE_LIBRTE_QEDE_VF_TX_SWITCH
+	if (IS_VF(edev)) {
+		params.update_tx_switching_flg = 1;
+		params.tx_switching_flg = !flg;
+	}
+#endif
 	for_each_hwfn(edev, i) {
 		p_hwfn = &edev->hwfns[i];
 		params.opaque_fid = p_hwfn->hw_info.opaque_fid;
@@ -463,7 +469,8 @@ int qede_activate_vport(struct rte_eth_dev *eth_dev, bool flg)
 			break;
 		}
 	}
-	DP_INFO(edev, "vport %s\n", flg ? "activated" : "deactivated");
+	DP_INFO(edev, "vport %s VF tx-switch %s\n", flg ? "activated" : "deactivated",
+			params.tx_switching_flg ? "enabled" : "disabled");
 	return rc;
 }
 
@@ -520,7 +527,7 @@ int qede_enable_tpa(struct rte_eth_dev *eth_dev, bool flg)
 			return -1;
 		}
 	}
-
+	qdev->enable_lro = flg;
 	DP_INFO(edev, "LRO is %s\n", flg ? "enabled" : "disabled");
 
 	return 0;
@@ -602,15 +609,53 @@ qed_configure_filter_rx_mode(struct rte_eth_dev *eth_dev,
 	return ecore_filter_accept_cmd(edev, 0, flags, false, false,
 			ECORE_SPQ_MODE_CB, NULL);
 }
-static void qede_set_cmn_tunn_param(struct ecore_tunnel_info *p_tunn,
-				    uint8_t clss, bool mode, bool mask)
+
+static int
+qede_vxlan_enable(struct rte_eth_dev *eth_dev, uint8_t clss,
+		  bool enable, bool mask)
 {
-	memset(p_tunn, 0, sizeof(struct ecore_tunnel_info));
-	p_tunn->vxlan.b_update_mode = mode;
-	p_tunn->vxlan.b_mode_enabled = mask;
-	p_tunn->b_update_rx_cls = true;
-	p_tunn->b_update_tx_cls = true;
-	p_tunn->vxlan.tun_cls = clss;
+	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+	enum _ecore_status_t rc = ECORE_INVAL;
+	struct ecore_ptt *p_ptt;
+	struct ecore_tunnel_info tunn;
+	struct ecore_hwfn *p_hwfn;
+	int i;
+
+	memset(&tunn, 0, sizeof(struct ecore_tunnel_info));
+	tunn.vxlan.b_update_mode = enable;
+	tunn.vxlan.b_mode_enabled = mask;
+	tunn.b_update_rx_cls = true;
+	tunn.b_update_tx_cls = true;
+	tunn.vxlan.tun_cls = clss;
+
+	for_each_hwfn(edev, i) {
+		p_hwfn = &edev->hwfns[i];
+		if (IS_PF(edev)) {
+			p_ptt = ecore_ptt_acquire(p_hwfn);
+			if (!p_ptt)
+				return -EAGAIN;
+		} else {
+			p_ptt = NULL;
+		}
+		rc = ecore_sp_pf_update_tunn_cfg(p_hwfn, p_ptt,
+				&tunn, ECORE_SPQ_MODE_CB, NULL);
+		if (rc != ECORE_SUCCESS) {
+			DP_ERR(edev, "Failed to update tunn_clss %u\n",
+					tunn.vxlan.tun_cls);
+			if (IS_PF(edev))
+				ecore_ptt_release(p_hwfn, p_ptt);
+			break;
+		}
+	}
+
+	if (rc == ECORE_SUCCESS) {
+		qdev->vxlan.enable = enable;
+		qdev->vxlan.udp_port = (enable) ? QEDE_VXLAN_DEF_PORT : 0;
+		DP_INFO(edev, "vxlan is %s\n", enable ? "enabled" : "disabled");
+	}
+
+	return rc;
 }
 
 static int
@@ -975,7 +1020,7 @@ static int qede_vlan_filter_set(struct rte_eth_dev *eth_dev,
 	return rc;
 }
 
-static void qede_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
+static int qede_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
 {
 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
@@ -1013,6 +1058,8 @@ static void qede_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
 
 	DP_INFO(edev, "vlan offload mask %d vlan-strip %d vlan-filter %d\n",
 		mask, rxmode->hw_vlan_strip, rxmode->hw_vlan_filter);
+
+	return 0;
 }
 
 static void qede_prandom_bytes(uint32_t *buff)
@@ -1078,6 +1125,7 @@ static void qede_fastpath_start(struct ecore_dev *edev)
 
 static int qede_dev_start(struct rte_eth_dev *eth_dev)
 {
+	struct rte_eth_rxmode *rxmode = &eth_dev->data->dev_conf.rxmode;
 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
 
@@ -1088,10 +1136,15 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev)
 		if (qede_update_mtu(eth_dev, qdev->new_mtu))
 			goto err;
 		qdev->mtu = qdev->new_mtu;
-		/* If MTU has changed then update TPA too */
-		if (qdev->enable_lro)
-			if (qede_enable_tpa(eth_dev, true))
-				goto err;
+	}
+
+	/* Configure TPA parameters */
+	if (rxmode->enable_lro) {
+		if (qede_enable_tpa(eth_dev, true))
+			return -EINVAL;
+		/* Enable scatter mode for LRO */
+		if (!rxmode->enable_scatter)
+			eth_dev->data->scattered_rx = 1;
 	}
 
 	/* Start queues */
@@ -1103,7 +1156,7 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev)
 	 * Also, we would like to retain similar behavior in PF case, so we
 	 * don't do PF/VF specific check here.
 	 */
-	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS)
+	if (rxmode->mq_mode == ETH_MQ_RX_RSS)
 		if (qede_config_rss(eth_dev))
 			goto err;
 
@@ -1114,6 +1167,9 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev)
 	/* Bring-up the link */
 	qede_dev_set_link_state(eth_dev, true);
 
+	/* Update link status */
+	qede_link_update(eth_dev, 0);
+
 	/* Start/resume traffic */
 	qede_fastpath_start(edev);
 
@@ -1139,7 +1195,6 @@ static void qede_dev_stop(struct rte_eth_dev *eth_dev)
 	if (qdev->enable_lro)
 		qede_enable_tpa(eth_dev, false);
 
-	/* TODO: Do we need disable LRO or RSS */
 	/* Stop queues */
 	qede_stop_queues(eth_dev);
 
@@ -1157,11 +1212,12 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev)
 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
 	struct rte_eth_rxmode *rxmode = &eth_dev->data->dev_conf.rxmode;
+	int ret;
 
 	PMD_INIT_FUNC_TRACE(edev);
 
 	/* Check requirements for 100G mode */
-	if (edev->num_hwfns > 1) {
+	if (ECORE_IS_CMT(edev)) {
 		if (eth_dev->data->nb_rx_queues < 2 ||
 				eth_dev->data->nb_tx_queues < 2) {
 			DP_ERR(edev, "100G mode needs min. 2 RX/TX queues\n");
@@ -1226,20 +1282,12 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev)
 	qdev->mtu = rxmode->max_rx_pkt_len;
 	qdev->new_mtu = qdev->mtu;
 
-	/* Configure TPA parameters */
-	if (rxmode->enable_lro) {
-		if (qede_enable_tpa(eth_dev, true))
-			return -EINVAL;
-		/* Enable scatter mode for LRO */
-		if (!rxmode->enable_scatter)
-			eth_dev->data->scattered_rx = 1;
-	}
-	qdev->enable_lro = rxmode->enable_lro;
-
 	/* Enable VLAN offloads by default */
-	qede_vlan_offload_set(eth_dev, ETH_VLAN_STRIP_MASK  |
+	ret = qede_vlan_offload_set(eth_dev, ETH_VLAN_STRIP_MASK  |
 			ETH_VLAN_FILTER_MASK |
 			ETH_VLAN_EXTEND_MASK);
+	if (ret)
+		return ret;
 
 	DP_INFO(edev, "Device configured with RSS=%d TSS=%d\n",
 			QEDE_RSS_COUNT(qdev), QEDE_TSS_COUNT(qdev));
@@ -1330,7 +1378,7 @@ qede_dev_info_get(struct rte_eth_dev *eth_dev,
 }
 
 /* return 0 means link status changed, -1 means not changed */
-static int
+int
 qede_link_update(struct rte_eth_dev *eth_dev, __rte_unused int wait_to_complete)
 {
 	struct qede_dev *qdev = eth_dev->data->dev_private;
@@ -1456,11 +1504,11 @@ static void qede_dev_close(struct rte_eth_dev *eth_dev)
 	rte_intr_disable(&pci_dev->intr_handle);
 	rte_intr_callback_unregister(&pci_dev->intr_handle,
 				     qede_interrupt_handler, (void *)eth_dev);
-	if (edev->num_hwfns > 1)
+	if (ECORE_IS_CMT(edev))
 		rte_eal_alarm_cancel(qede_poll_sp_sb_cb, (void *)eth_dev);
 }
 
-static void
+static int
 qede_get_stats(struct rte_eth_dev *eth_dev, struct rte_eth_stats *eth_stats)
 {
 	struct qede_dev *qdev = eth_dev->data->dev_private;
@@ -1544,6 +1592,8 @@ qede_get_stats(struct rte_eth_dev *eth_dev, struct rte_eth_stats *eth_stats)
 		if (j == txq_stat_cntrs)
 			break;
 	}
+
+	return 0;
 }
 
 static unsigned
@@ -1806,8 +1856,22 @@ static const uint32_t *
 qede_dev_supported_ptypes_get(struct rte_eth_dev *eth_dev)
 {
 	static const uint32_t ptypes[] = {
+		RTE_PTYPE_L2_ETHER,
+		RTE_PTYPE_L2_ETHER_VLAN,
 		RTE_PTYPE_L3_IPV4,
 		RTE_PTYPE_L3_IPV6,
+		RTE_PTYPE_L4_TCP,
+		RTE_PTYPE_L4_UDP,
+		RTE_PTYPE_TUNNEL_VXLAN,
+		RTE_PTYPE_L4_FRAG,
+		/* Inner */
+		RTE_PTYPE_INNER_L2_ETHER,
+		RTE_PTYPE_INNER_L2_ETHER_VLAN,
+		RTE_PTYPE_INNER_L3_IPV4,
+		RTE_PTYPE_INNER_L3_IPV6,
+		RTE_PTYPE_INNER_L4_TCP,
+		RTE_PTYPE_INNER_L4_UDP,
+		RTE_PTYPE_INNER_L4_FRAG,
 		RTE_PTYPE_UNKNOWN
 	};
 
@@ -2012,6 +2076,10 @@ int qede_rss_reta_update(struct rte_eth_dev *eth_dev,
 	memset(&vport_update_params, 0, sizeof(vport_update_params));
 	params = rte_zmalloc("qede_rss", sizeof(*params) * edev->num_hwfns,
 			     RTE_CACHE_LINE_SIZE);
+	if (params == NULL) {
+		DP_ERR(edev, "failed to allocate memory\n");
+		return -ENOMEM;
+	}
 
 	for (i = 0; i < reta_size; i++) {
 		idx = i / RTE_RETA_GROUP_SIZE;
@@ -2031,7 +2099,7 @@ int qede_rss_reta_update(struct rte_eth_dev *eth_dev,
 	params->update_rss_config = 1;
 
 	/* Fix up RETA for CMT mode device */
-	if (edev->num_hwfns > 1)
+	if (ECORE_IS_CMT(edev))
 		qdev->rss_enable = qede_update_rss_parm_cmt(edev,
 							    params);
 	vport_update_params.vport_id = 0;
@@ -2152,25 +2220,76 @@ qede_conf_udp_dst_port(struct rte_eth_dev *eth_dev,
 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
 	struct ecore_tunnel_info tunn; /* @DPDK */
 	struct ecore_hwfn *p_hwfn;
+	struct ecore_ptt *p_ptt;
+	uint16_t udp_port;
 	int rc, i;
 
 	PMD_INIT_FUNC_TRACE(edev);
 
 	memset(&tunn, 0, sizeof(tunn));
 	if (tunnel_udp->prot_type == RTE_TUNNEL_TYPE_VXLAN) {
+		/* Enable VxLAN tunnel if needed before UDP port update using
+		 * default MAC/VLAN classification.
+		 */
+		if (add) {
+			if (qdev->vxlan.udp_port == tunnel_udp->udp_port) {
+				DP_INFO(edev,
+					"UDP port %u was already configured\n",
+					tunnel_udp->udp_port);
+				return ECORE_SUCCESS;
+			}
+			/* Enable VXLAN if it was not enabled while adding
+			 * VXLAN filter.
+			 */
+			if (!qdev->vxlan.enable) {
+				rc = qede_vxlan_enable(eth_dev,
+					ECORE_TUNN_CLSS_MAC_VLAN, true, true);
+				if (rc != ECORE_SUCCESS) {
+					DP_ERR(edev, "Failed to enable VXLAN "
+						"prior to updating UDP port\n");
+					return rc;
+				}
+			}
+			udp_port = tunnel_udp->udp_port;
+		} else {
+			if (qdev->vxlan.udp_port != tunnel_udp->udp_port) {
+				DP_ERR(edev, "UDP port %u doesn't exist\n",
+					tunnel_udp->udp_port);
+				return ECORE_INVAL;
+			}
+			udp_port = 0;
+		}
+
 		tunn.vxlan_port.b_update_port = true;
-		tunn.vxlan_port.port = (add) ? tunnel_udp->udp_port :
-						  QEDE_VXLAN_DEF_PORT;
+		tunn.vxlan_port.port = udp_port;
 		for_each_hwfn(edev, i) {
 			p_hwfn = &edev->hwfns[i];
-			rc = ecore_sp_pf_update_tunn_cfg(p_hwfn, &tunn,
+			if (IS_PF(edev)) {
+				p_ptt = ecore_ptt_acquire(p_hwfn);
+				if (!p_ptt)
+					return -EAGAIN;
+			} else {
+				p_ptt = NULL;
+			}
+			rc = ecore_sp_pf_update_tunn_cfg(p_hwfn, p_ptt, &tunn,
 						ECORE_SPQ_MODE_CB, NULL);
 			if (rc != ECORE_SUCCESS) {
 				DP_ERR(edev, "Unable to config UDP port %u\n",
 				       tunn.vxlan_port.port);
+				if (IS_PF(edev))
+					ecore_ptt_release(p_hwfn, p_ptt);
 				return rc;
 			}
 		}
+
+		qdev->vxlan.udp_port = udp_port;
+		/* If the request is to delete UDP port and if the number of
+		 * VXLAN filters have reached 0 then VxLAN offload can be be
+		 * disabled.
+		 */
+		if (!add && qdev->vxlan.enable && qdev->vxlan.num_filters == 0)
+			return qede_vxlan_enable(eth_dev,
+					ECORE_TUNN_CLSS_MAC_VLAN, false, true);
 	}
 
 	return 0;
@@ -2260,35 +2379,38 @@ static int qede_vxlan_tunn_config(struct rte_eth_dev *eth_dev,
 {
 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-	struct ecore_tunnel_info tunn;
-	struct ecore_hwfn *p_hwfn;
 	enum ecore_filter_ucast_type type;
-	enum ecore_tunn_clss clss;
-	struct ecore_filter_ucast ucast;
+	enum ecore_tunn_clss clss = MAX_ECORE_TUNN_CLSS;
+	struct ecore_filter_ucast ucast = {0};
 	char str[80];
-	uint16_t filter_type;
-	int rc, i;
+	uint16_t filter_type = 0;
+	int rc;
 
 	PMD_INIT_FUNC_TRACE(edev);
 
-	filter_type = conf->filter_type | qdev->vxlan_filter_type;
-	/* First determine if the given filter classification is supported */
-	qede_get_ecore_tunn_params(filter_type, &type, &clss, str);
-	if (clss == MAX_ECORE_TUNN_CLSS) {
-		DP_ERR(edev, "Wrong filter type\n");
-		return -EINVAL;
-	}
-	/* Init tunnel ucast params */
-	rc = qede_set_ucast_tunn_cmn_param(&ucast, conf, type);
-	if (rc != ECORE_SUCCESS) {
-		DP_ERR(edev, "Unsupported VxLAN filter type 0x%x\n",
-				conf->filter_type);
-		return rc;
-	}
-	DP_INFO(edev, "Rule: \"%s\", op %d, type 0x%x\n",
-		str, filter_op, ucast.type);
 	switch (filter_op) {
 	case RTE_ETH_FILTER_ADD:
+		if (IS_VF(edev))
+			return qede_vxlan_enable(eth_dev,
+					ECORE_TUNN_CLSS_MAC_VLAN, true, true);
+
+		filter_type = conf->filter_type;
+		/* Determine if the given filter classification is supported */
+		qede_get_ecore_tunn_params(filter_type, &type, &clss, str);
+		if (clss == MAX_ECORE_TUNN_CLSS) {
+			DP_ERR(edev, "Unsupported filter type\n");
+			return -EINVAL;
+		}
+		/* Init tunnel ucast params */
+		rc = qede_set_ucast_tunn_cmn_param(&ucast, conf, type);
+		if (rc != ECORE_SUCCESS) {
+			DP_ERR(edev, "Unsupported VxLAN filter type 0x%x\n",
+			conf->filter_type);
+			return rc;
+		}
+		DP_INFO(edev, "Rule: \"%s\", op %d, type 0x%x\n",
+			str, filter_op, ucast.type);
+
 		ucast.opcode = ECORE_FILTER_ADD;
 
 		/* Skip MAC/VLAN if filter is based on VNI */
@@ -2308,22 +2430,34 @@ static int qede_vxlan_tunn_config(struct rte_eth_dev *eth_dev,
 		if (rc != ECORE_SUCCESS)
 			return rc;
 
-		qdev->vxlan_filter_type = filter_type;
+		qdev->vxlan.num_filters++;
+		qdev->vxlan.filter_type = filter_type;
+		if (!qdev->vxlan.enable)
+			return qede_vxlan_enable(eth_dev, clss, true, true);
 
-		DP_INFO(edev, "Enabling VXLAN tunneling\n");
-		qede_set_cmn_tunn_param(&tunn, clss, true, true);
-		for_each_hwfn(edev, i) {
-			p_hwfn = &edev->hwfns[i];
-			rc = ecore_sp_pf_update_tunn_cfg(p_hwfn,
-				&tunn, ECORE_SPQ_MODE_CB, NULL);
-			if (rc != ECORE_SUCCESS) {
-				DP_ERR(edev, "Failed to update tunn_clss %u\n",
-				       tunn.vxlan.tun_cls);
-			}
-		}
-		qdev->num_tunn_filters++; /* Filter added successfully */
 	break;
 	case RTE_ETH_FILTER_DELETE:
+		if (IS_VF(edev))
+			return qede_vxlan_enable(eth_dev,
+				ECORE_TUNN_CLSS_MAC_VLAN, false, true);
+
+		filter_type = conf->filter_type;
+		/* Determine if the given filter classification is supported */
+		qede_get_ecore_tunn_params(filter_type, &type, &clss, str);
+		if (clss == MAX_ECORE_TUNN_CLSS) {
+			DP_ERR(edev, "Unsupported filter type\n");
+			return -EINVAL;
+		}
+		/* Init tunnel ucast params */
+		rc = qede_set_ucast_tunn_cmn_param(&ucast, conf, type);
+		if (rc != ECORE_SUCCESS) {
+			DP_ERR(edev, "Unsupported VxLAN filter type 0x%x\n",
+			conf->filter_type);
+			return rc;
+		}
+		DP_INFO(edev, "Rule: \"%s\", op %d, type 0x%x\n",
+			str, filter_op, ucast.type);
+
 		ucast.opcode = ECORE_FILTER_REMOVE;
 
 		if (!(filter_type & ETH_TUNNEL_FILTER_TENID)) {
@@ -2337,33 +2471,16 @@ static int qede_vxlan_tunn_config(struct rte_eth_dev *eth_dev,
 		if (rc != ECORE_SUCCESS)
 			return rc;
 
-		qdev->vxlan_filter_type = filter_type;
-		qdev->num_tunn_filters--;
+		qdev->vxlan.num_filters--;
 
 		/* Disable VXLAN if VXLAN filters become 0 */
-		if (qdev->num_tunn_filters == 0) {
-			DP_INFO(edev, "Disabling VXLAN tunneling\n");
-
-			/* Use 0 as tunnel mode */
-			qede_set_cmn_tunn_param(&tunn, clss, false, true);
-			for_each_hwfn(edev, i) {
-				p_hwfn = &edev->hwfns[i];
-				rc = ecore_sp_pf_update_tunn_cfg(p_hwfn, &tunn,
-					ECORE_SPQ_MODE_CB, NULL);
-				if (rc != ECORE_SUCCESS) {
-					DP_ERR(edev,
-						"Failed to update tunn_clss %u\n",
-						tunn.vxlan.tun_cls);
-					break;
-				}
-			}
-		}
+		if (qdev->vxlan.num_filters == 0)
+			return qede_vxlan_enable(eth_dev, clss, false, true);
 	break;
 	default:
 		DP_ERR(edev, "Unsupported operation %d\n", filter_op);
 		return -EINVAL;
 	}
-	DP_INFO(edev, "Current VXLAN filters %d\n", qdev->num_tunn_filters);
 
 	return 0;
 }
@@ -2491,6 +2608,8 @@ static const struct eth_dev_ops qede_eth_vf_dev_ops = {
 	.reta_update  = qede_rss_reta_update,
 	.reta_query  = qede_rss_reta_query,
 	.mtu_set = qede_set_mtu,
+	.udp_tunnel_port_add = qede_udp_dst_port_add,
+	.udp_tunnel_port_del = qede_udp_dst_port_del,
 };
 
 static void qede_update_pf_params(struct ecore_dev *edev)
@@ -2523,6 +2642,7 @@ static int qede_common_dev_init(struct rte_eth_dev *eth_dev, bool is_vf)
 
 	/* Extract key data structures */
 	adapter = eth_dev->data->dev_private;
+	adapter->ethdev = eth_dev;
 	edev = &adapter->edev;
 	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	pci_addr = pci_dev->addr;
@@ -2583,7 +2703,7 @@ static int qede_common_dev_init(struct rte_eth_dev *eth_dev, bool is_vf)
 	 * This is required since uio device uses only one MSI-x
 	 * interrupt vector but we need one for each engine.
 	 */
-	if (edev->num_hwfns > 1 && IS_PF(edev)) {
+	if (ECORE_IS_CMT(edev) && IS_PF(edev)) {
 		rc = rte_eal_alarm_set(timer_period * US_PER_S,
 				       qede_poll_sp_sb_cb,
 				       (void *)eth_dev);
diff --git a/drivers/net/qede/qede_ethdev.h b/drivers/net/qede/qede_ethdev.h
index a3254b12..021de5c0 100644
--- a/drivers/net/qede/qede_ethdev.h
+++ b/drivers/net/qede/qede_ethdev.h
@@ -29,15 +29,11 @@
 #include "base/ecore_chain.h"
 #include "base/ecore_status.h"
 #include "base/ecore_hsi_eth.h"
-#include "base/ecore_dev_api.h"
 #include "base/ecore_iov_api.h"
 #include "base/ecore_cxt.h"
 #include "base/nvm_cfg.h"
-#include "base/ecore_iov_api.h"
 #include "base/ecore_sp_commands.h"
 #include "base/ecore_l2.h"
-#include "base/ecore_dev_api.h"
-#include "base/ecore_l2.h"
 
 #include "qede_logs.h"
 #include "qede_if.h"
@@ -49,8 +45,8 @@
 /* Driver versions */
 #define QEDE_PMD_VER_PREFIX		"QEDE PMD"
 #define QEDE_PMD_VERSION_MAJOR		2
-#define QEDE_PMD_VERSION_MINOR	        5
-#define QEDE_PMD_VERSION_REVISION       2
+#define QEDE_PMD_VERSION_MINOR	        6
+#define QEDE_PMD_VERSION_REVISION       0
 #define QEDE_PMD_VERSION_PATCH	        1
 
 #define QEDE_PMD_VERSION qede_stringify(QEDE_PMD_VERSION_MAJOR) "."     \
@@ -122,7 +118,6 @@
 #define PCI_DEVICE_ID_QLOGIC_AH_IOV            CHIP_NUM_AH_IOV
 
 
-#define QEDE_VXLAN_DEF_PORT		8472
 
 extern char fw_file[];
 
@@ -171,6 +166,13 @@ struct qede_fdir_info {
 	SLIST_HEAD(fdir_list_head, qede_fdir_entry)fdir_list_head;
 };
 
+struct qede_vxlan_tunn {
+	bool enable;
+	uint16_t num_filters;
+	uint16_t filter_type;
+#define QEDE_VXLAN_DEF_PORT			(4789)
+	uint16_t udp_port;
+};
 
 /*
  *  Structure to store private data for each port.
@@ -200,11 +202,11 @@ struct qede_dev {
 	SLIST_HEAD(uc_list_head, qede_ucast_entry) uc_list_head;
 	uint16_t num_uc_addr;
 	bool handle_hw_err;
-	uint16_t num_tunn_filters;
-	uint16_t vxlan_filter_type;
+	struct qede_vxlan_tunn vxlan;
 	struct qede_fdir_info fdir_info;
 	bool vlan_strip_flg;
 	char drv_ver[QEDE_PMD_DRV_VER_STR_SIZE];
+	void *ethdev;
 };
 
 /* Non-static functions */
@@ -221,6 +223,9 @@ int qed_fill_eth_dev_info(struct ecore_dev *edev,
 				 struct qed_dev_eth_info *info);
 int qede_dev_set_link_state(struct rte_eth_dev *eth_dev, bool link_up);
 
+int qede_link_update(struct rte_eth_dev *eth_dev,
+		     __rte_unused int wait_to_complete);
+
 int qede_dev_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_type type,
 			 enum rte_filter_op op, void *arg);
 
diff --git a/drivers/net/qede/qede_fdir.c b/drivers/net/qede/qede_fdir.c
index 7bd5c5d6..da6364ee 100644
--- a/drivers/net/qede/qede_fdir.c
+++ b/drivers/net/qede/qede_fdir.c
@@ -53,7 +53,7 @@ int qede_check_fdir_support(struct rte_eth_dev *eth_dev)
 		DP_INFO(edev, "flowdir is disabled\n");
 	break;
 	case RTE_FDIR_MODE_PERFECT:
-		if (edev->num_hwfns > 1) {
+		if (ECORE_IS_CMT(edev)) {
 			DP_ERR(edev, "flowdir is not supported in 100G mode\n");
 			qdev->fdir_info.arfs.arfs_enable = false;
 			return -ENOTSUP;
@@ -171,8 +171,8 @@ qede_config_cmn_fdir_filter(struct rte_eth_dev *eth_dev,
 					  &qdev->fdir_info.arfs);
 	}
 	/* configure filter with ECORE_SPQ_MODE_EBLOCK */
-	rc = ecore_configure_rfs_ntuple_filter(p_hwfn, p_hwfn->p_arfs_ptt, NULL,
-					       (dma_addr_t)mz->phys_addr,
+	rc = ecore_configure_rfs_ntuple_filter(p_hwfn, NULL,
+					       (dma_addr_t)mz->iova,
 					       pkt_len,
 					       fdir_filter->action.rx_queue,
 					       0, add);
@@ -386,7 +386,7 @@ qede_fdir_filter_conf(struct rte_eth_dev *eth_dev,
 	switch (filter_op) {
 	case RTE_ETH_FILTER_NOP:
 		/* Typically used to query flowdir support */
-		if (edev->num_hwfns > 1) {
+		if (ECORE_IS_CMT(edev)) {
 			DP_ERR(edev, "flowdir is not supported in 100G mode\n");
 			return -ENOTSUP;
 		}
@@ -425,7 +425,7 @@ int qede_ntuple_filter_conf(struct rte_eth_dev *eth_dev,
 	switch (filter_op) {
 	case RTE_ETH_FILTER_NOP:
 		/* Typically used to query fdir support */
-		if (edev->num_hwfns > 1) {
+		if (ECORE_IS_CMT(edev)) {
 			DP_ERR(edev, "flowdir is not supported in 100G mode\n");
 			return -ENOTSUP;
 		}
diff --git a/drivers/net/qede/qede_if.h b/drivers/net/qede/qede_if.h
index 9864bb44..246f0fd3 100644
--- a/drivers/net/qede/qede_if.h
+++ b/drivers/net/qede/qede_if.h
@@ -40,14 +40,19 @@ struct qed_dev_info {
 #define QED_MFW_VERSION_3_OFFSET	24
 
 	uint32_t flash_size;
-	uint8_t mf_mode;
+	bool b_arfs_capable;
+	bool b_inter_pf_switch;
 	bool tx_switching;
 	u16 mtu;
 
+	bool smart_an;
+
 	/* Out param for qede */
 	bool vxlan_enable;
 	bool gre_enable;
 	bool geneve_enable;
+
+	enum ecore_dev_type dev_type;
 };
 
 struct qed_dev_eth_info {
@@ -79,6 +84,7 @@ struct qed_link_params {
 #define QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS      (1 << 1)
 #define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED    (1 << 2)
 #define QED_LINK_OVERRIDE_PAUSE_CONFIG          (1 << 3)
+#define QED_LINK_OVERRIDE_EEE_CONFIG		(1 << 5)
 	uint32_t override_flags;
 	bool autoneg;
 	uint32_t adv_speeds;
@@ -87,6 +93,7 @@ struct qed_link_params {
 #define QED_LINK_PAUSE_RX_ENABLE                (1 << 1)
 #define QED_LINK_PAUSE_TX_ENABLE                (1 << 2)
 	uint32_t pause_config;
+	struct ecore_link_eee_params eee;
 };
 
 struct qed_link_output {
@@ -97,9 +104,15 @@ struct qed_link_output {
 	uint32_t speed;		/* In Mb/s */
 	uint32_t adv_speed;	/* Speed mask */
 	uint8_t duplex;		/* In DUPLEX defs */
-	uint8_t port;		/* In PORT defs */
+	uint16_t port;		/* In PORT defs */
 	bool autoneg;
 	uint32_t pause_config;
+
+	/* EEE - capability & param */
+	bool eee_supported;
+	bool eee_active;
+	u8 sup_caps;
+	struct ecore_link_eee_params eee;
 };
 
 struct qed_slowpath_params {
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index a6ff7af2..ae187321 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -19,7 +19,7 @@
 char fw_file[PATH_MAX];
 
 const char *QEDE_DEFAULT_FIRMWARE =
-	"/lib/firmware/qed/qed_init_values-8.20.0.0.bin";
+	"/lib/firmware/qed/qed_init_values-8.30.12.0.bin";
 
 static void
 qed_update_pf_params(struct ecore_dev *edev, struct ecore_pf_params *params)
@@ -36,6 +36,7 @@ static void qed_init_pci(struct ecore_dev *edev, struct rte_pci_device *pci_dev)
 {
 	edev->regview = pci_dev->mem_resource[0].addr;
 	edev->doorbells = pci_dev->mem_resource[2].addr;
+	edev->db_size = pci_dev->mem_resource[2].len;
 }
 
 static int
@@ -221,10 +222,11 @@ static void qed_stop_iov_task(struct ecore_dev *edev)
 static int qed_slowpath_start(struct ecore_dev *edev,
 			      struct qed_slowpath_params *params)
 {
+	struct ecore_drv_load_params drv_load_params;
+	struct ecore_hw_init_params hw_init_params;
+	struct ecore_mcp_drv_version drv_version;
 	const uint8_t *data = NULL;
 	struct ecore_hwfn *hwfn;
-	struct ecore_mcp_drv_version drv_version;
-	struct ecore_hw_init_params hw_init_params;
 	struct ecore_ptt *p_ptt;
 	int rc;
 
@@ -280,8 +282,13 @@ static int qed_slowpath_start(struct ecore_dev *edev,
 	hw_init_params.int_mode = ECORE_INT_MODE_MSIX;
 	hw_init_params.allow_npar_tx_switch = true;
 	hw_init_params.bin_fw_data = data;
-	hw_init_params.mfw_timeout_val = ECORE_LOAD_REQ_LOCK_TO_DEFAULT;
-	hw_init_params.avoid_eng_reset = false;
+
+	memset(&drv_load_params, 0, sizeof(drv_load_params));
+	drv_load_params.mfw_timeout_val = ECORE_LOAD_REQ_LOCK_TO_DEFAULT;
+	drv_load_params.avoid_eng_reset = false;
+	drv_load_params.override_force_load = ECORE_OVERRIDE_FORCE_LOAD_ALWAYS;
+	hw_init_params.p_drv_load_params = &drv_load_params;
+
 	rc = ecore_hw_init(edev, &hw_init_params);
 	if (rc) {
 		DP_ERR(edev, "ecore_hw_init failed\n");
@@ -335,6 +342,7 @@ err:
 static int
 qed_fill_dev_info(struct ecore_dev *edev, struct qed_dev_info *dev_info)
 {
+	struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(edev);
 	struct ecore_ptt *ptt = NULL;
 	struct ecore_tunnel_info *tun = &edev->tunnel;
 
@@ -357,6 +365,7 @@ qed_fill_dev_info(struct ecore_dev *edev, struct qed_dev_info *dev_info)
 	dev_info->num_hwfns = edev->num_hwfns;
 	dev_info->is_mf_default = IS_MF_DEFAULT(&edev->hwfns[0]);
 	dev_info->mtu = ECORE_LEADING_HWFN(edev)->hw_info.mtu;
+	dev_info->dev_type = edev->type;
 
 	rte_memcpy(&dev_info->hw_mac, &edev->hwfns[0].hw_info.hw_mac_addr,
 	       ETHER_ADDR_LEN);
@@ -367,9 +376,14 @@ qed_fill_dev_info(struct ecore_dev *edev, struct qed_dev_info *dev_info)
 	dev_info->fw_eng = FW_ENGINEERING_VERSION;
 
 	if (IS_PF(edev)) {
-		dev_info->mf_mode = edev->mf_mode;
+		dev_info->b_inter_pf_switch =
+			OSAL_TEST_BIT(ECORE_MF_INTER_PF_SWITCH, &edev->mf_bits);
+		if (!OSAL_TEST_BIT(ECORE_MF_DISABLE_ARFS, &edev->mf_bits))
+			dev_info->b_arfs_capable = true;
 		dev_info->tx_switching = false;
 
+		dev_info->smart_an = ecore_mcp_is_smart_an_supported(p_hwfn);
+
 		ptt = ecore_ptt_acquire(ECORE_LEADING_HWFN(edev));
 		if (ptt) {
 			ecore_mcp_get_mfw_ver(ECORE_LEADING_HWFN(edev), ptt,
@@ -412,7 +426,7 @@ qed_fill_eth_dev_info(struct ecore_dev *edev, struct qed_dev_eth_info *info)
 			info->num_queues +=
 			FEAT_NUM(&edev->hwfns[i], ECORE_PF_L2_QUE);
 
-		if (edev->p_iov_info)
+		if (IS_ECORE_SRIOV(edev))
 			max_vf_vlan_filters = edev->p_iov_info->total_vfs *
 					      ECORE_ETH_VF_NUM_VLAN_FILTERS;
 		info->num_vlan_filters = RESC_NUM(&edev->hwfns[0], ECORE_VLAN) -
@@ -423,7 +437,7 @@ qed_fill_eth_dev_info(struct ecore_dev *edev, struct qed_dev_eth_info *info)
 	} else {
 		ecore_vf_get_num_rxqs(ECORE_LEADING_HWFN(edev),
 				      &info->num_queues);
-		if (edev->num_hwfns > 1) {
+		if (ECORE_IS_CMT(edev)) {
 			ecore_vf_get_num_rxqs(&edev->hwfns[1], &queues);
 			info->num_queues += queues;
 		}
@@ -479,6 +493,7 @@ qed_sb_init(struct ecore_dev *edev, struct ecore_sb_info *sb_info,
 }
 
 static void qed_fill_link(struct ecore_hwfn *hwfn,
+			  __rte_unused struct ecore_ptt *ptt,
 			  struct qed_link_output *if_link)
 {
 	struct ecore_mcp_link_params params;
@@ -529,17 +544,42 @@ static void qed_fill_link(struct ecore_hwfn *hwfn,
 
 	if (params.pause.forced_tx)
 		if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE;
+
+	if (link_caps.default_eee == ECORE_MCP_EEE_UNSUPPORTED) {
+		if_link->eee_supported = false;
+	} else {
+		if_link->eee_supported = true;
+		if_link->eee_active = link.eee_active;
+		if_link->sup_caps = link_caps.eee_speed_caps;
+		/* MFW clears adv_caps on eee disable; use configured value */
+		if_link->eee.adv_caps = link.eee_adv_caps ? link.eee_adv_caps :
+					params.eee.adv_caps;
+		if_link->eee.lp_adv_caps = link.eee_lp_adv_caps;
+		if_link->eee.enable = params.eee.enable;
+		if_link->eee.tx_lpi_enable = params.eee.tx_lpi_enable;
+		if_link->eee.tx_lpi_timer = params.eee.tx_lpi_timer;
+	}
 }
 
 static void
 qed_get_current_link(struct ecore_dev *edev, struct qed_link_output *if_link)
 {
-	qed_fill_link(&edev->hwfns[0], if_link);
+	struct ecore_hwfn *hwfn;
+	struct ecore_ptt *ptt;
 
-#ifdef CONFIG_QED_SRIOV
-	for_each_hwfn(cdev, i)
-		qed_inform_vf_link_state(&cdev->hwfns[i]);
-#endif
+	hwfn = &edev->hwfns[0];
+	if (IS_PF(edev)) {
+		ptt = ecore_ptt_acquire(hwfn);
+		if (!ptt)
+			DP_NOTICE(hwfn, true, "Failed to fill link; No PTT\n");
+
+			qed_fill_link(hwfn, ptt, if_link);
+
+		if (ptt)
+			ecore_ptt_release(hwfn, ptt);
+	} else {
+		qed_fill_link(hwfn, NULL, if_link);
+	}
 }
 
 static int qed_set_link(struct ecore_dev *edev, struct qed_link_params *params)
@@ -578,6 +618,10 @@ static int qed_set_link(struct ecore_dev *edev, struct qed_link_params *params)
 			link_params->pause.forced_tx = false;
 	}
 
+	if (params->override_flags & QED_LINK_OVERRIDE_EEE_CONFIG)
+		memcpy(&link_params->eee, &params->eee,
+		       sizeof(link_params->eee));
+
 	rc = ecore_mcp_set_link(hwfn, ptt, params->link_up);
 
 	ecore_ptt_release(hwfn, ptt);
@@ -587,9 +631,10 @@ static int qed_set_link(struct ecore_dev *edev, struct qed_link_params *params)
 
 void qed_link_update(struct ecore_hwfn *hwfn)
 {
-	struct qed_link_output if_link;
+	struct ecore_dev *edev = hwfn->p_dev;
+	struct qede_dev *qdev = (struct qede_dev *)edev;
 
-	qed_fill_link(hwfn, &if_link);
+	qede_link_update((struct rte_eth_dev *)qdev->ethdev, 0);
 }
 
 static int qed_drain(struct ecore_dev *edev)
diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index 5c3613c7..8e8536f8 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -28,7 +28,7 @@ static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq)
 	}
 	rxq->sw_rx_ring[idx].mbuf = new_mb;
 	rxq->sw_rx_ring[idx].page_offset = 0;
-	mapping = rte_mbuf_data_dma_addr_default(new_mb);
+	mapping = rte_mbuf_data_iova_default(new_mb);
 	/* Advance PROD and get BD pointer */
 	rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring);
 	rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
@@ -364,12 +364,12 @@ qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info,
 		  uint16_t sb_id)
 {
 	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-	struct status_block *sb_virt;
+	struct status_block_e4 *sb_virt;
 	dma_addr_t sb_phys;
 	int rc;
 
 	sb_virt = OSAL_DMA_ALLOC_COHERENT(edev, &sb_phys,
-					  sizeof(struct status_block));
+					  sizeof(struct status_block_e4));
 	if (!sb_virt) {
 		DP_ERR(edev, "Status block allocation failed\n");
 		return -ENOMEM;
@@ -379,7 +379,7 @@ qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info,
 	if (rc) {
 		DP_ERR(edev, "Status block initialization failed\n");
 		OSAL_DMA_FREE_COHERENT(edev, sb_virt, sb_phys,
-				       sizeof(struct status_block));
+				       sizeof(struct status_block_e4));
 		return rc;
 	}
 
@@ -453,7 +453,7 @@ void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev)
 		if (fp->sb_info) {
 			OSAL_DMA_FREE_COHERENT(edev, fp->sb_info->sb_virt,
 				fp->sb_info->sb_phys,
-				sizeof(struct status_block));
+				sizeof(struct status_block_e4));
 			rte_free(fp->sb_info);
 			fp->sb_info = NULL;
 		}
@@ -555,7 +555,7 @@ qede_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
 		params.queue_id = rx_queue_id / edev->num_hwfns;
 		params.vport_id = 0;
 		params.stats_id = params.vport_id;
-		params.sb = fp->sb_info->igu_sb_id;
+		params.p_sb = fp->sb_info;
 		DP_INFO(edev, "rxq %u igu_sb_id 0x%x\n",
 				fp->rxq->queue_id, fp->sb_info->igu_sb_id);
 		params.sb_idx = RX_PI;
@@ -614,7 +614,7 @@ qede_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id)
 		params.queue_id = tx_queue_id / edev->num_hwfns;
 		params.vport_id = 0;
 		params.stats_id = params.vport_id;
-		params.sb = fp->sb_info->igu_sb_id;
+		params.p_sb = fp->sb_info;
 		DP_INFO(edev, "txq %u igu_sb_id 0x%x\n",
 				fp->txq->queue_id, fp->sb_info->igu_sb_id);
 		params.sb_idx = TX_PI(0); /* tc = 0 */
@@ -780,7 +780,7 @@ int qede_start_queues(struct rte_eth_dev *eth_dev)
 {
 	struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
 	uint8_t id;
-	int rc;
+	int rc = -1;
 
 	for_each_rss(id) {
 		rc = qede_rx_queue_start(eth_dev, id);
@@ -844,6 +844,109 @@ static inline uint8_t qede_check_notunn_csum_l4(uint16_t flag)
 	return 0;
 }
 
+/* Returns outer L3 and L4 packet_type for tunneled packets */
+static inline uint32_t qede_rx_cqe_to_pkt_type_outer(struct rte_mbuf *m)
+{
+	uint32_t packet_type = RTE_PTYPE_UNKNOWN;
+	struct ether_hdr *eth_hdr;
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+
+	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+	if (eth_hdr->ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+		packet_type |= RTE_PTYPE_L3_IPV4;
+		ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+						   sizeof(struct ether_hdr));
+		if (ipv4_hdr->next_proto_id == IPPROTO_TCP)
+			packet_type |= RTE_PTYPE_L4_TCP;
+		else if (ipv4_hdr->next_proto_id == IPPROTO_UDP)
+			packet_type |= RTE_PTYPE_L4_UDP;
+	} else if (eth_hdr->ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) {
+		packet_type |= RTE_PTYPE_L3_IPV6;
+		ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+						   sizeof(struct ether_hdr));
+		if (ipv6_hdr->proto == IPPROTO_TCP)
+			packet_type |= RTE_PTYPE_L4_TCP;
+		else if (ipv6_hdr->proto == IPPROTO_UDP)
+			packet_type |= RTE_PTYPE_L4_UDP;
+	}
+
+	return packet_type;
+}
+
+static inline uint32_t qede_rx_cqe_to_pkt_type_inner(uint16_t flags)
+{
+	uint16_t val;
+
+	/* Lookup table */
+	static const uint32_t
+	ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = {
+		[QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_INNER_L3_IPV4		|
+				       RTE_PTYPE_INNER_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_INNER_L3_IPV6		|
+				       RTE_PTYPE_INNER_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_INNER_L3_IPV4	|
+					   RTE_PTYPE_INNER_L4_TCP	|
+					   RTE_PTYPE_INNER_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_INNER_L3_IPV6	|
+					   RTE_PTYPE_INNER_L4_TCP	|
+					   RTE_PTYPE_INNER_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_INNER_L3_IPV4	|
+					   RTE_PTYPE_INNER_L4_UDP	|
+					   RTE_PTYPE_INNER_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_INNER_L3_IPV6	|
+					   RTE_PTYPE_INNER_L4_UDP	|
+					   RTE_PTYPE_INNER_L2_ETHER,
+		/* Frags with no VLAN */
+		[QEDE_PKT_TYPE_IPV4_FRAG] = RTE_PTYPE_INNER_L3_IPV4	|
+					    RTE_PTYPE_INNER_L4_FRAG	|
+					    RTE_PTYPE_INNER_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV6_FRAG] = RTE_PTYPE_INNER_L3_IPV6	|
+					    RTE_PTYPE_INNER_L4_FRAG	|
+					    RTE_PTYPE_INNER_L2_ETHER,
+		/* VLANs */
+		[QEDE_PKT_TYPE_IPV4_VLAN] = RTE_PTYPE_INNER_L3_IPV4	|
+					    RTE_PTYPE_INNER_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV6_VLAN] = RTE_PTYPE_INNER_L3_IPV6	|
+					    RTE_PTYPE_INNER_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV4_TCP_VLAN] = RTE_PTYPE_INNER_L3_IPV4	|
+						RTE_PTYPE_INNER_L4_TCP	|
+						RTE_PTYPE_INNER_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV6_TCP_VLAN] = RTE_PTYPE_INNER_L3_IPV6	|
+						RTE_PTYPE_INNER_L4_TCP	|
+						RTE_PTYPE_INNER_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV4_UDP_VLAN] = RTE_PTYPE_INNER_L3_IPV4	|
+						RTE_PTYPE_INNER_L4_UDP	|
+						RTE_PTYPE_INNER_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV6_UDP_VLAN] = RTE_PTYPE_INNER_L3_IPV6	|
+						RTE_PTYPE_INNER_L4_UDP	|
+						RTE_PTYPE_INNER_L2_ETHER_VLAN,
+		/* Frags with VLAN */
+		[QEDE_PKT_TYPE_IPV4_VLAN_FRAG] = RTE_PTYPE_INNER_L3_IPV4 |
+						 RTE_PTYPE_INNER_L4_FRAG |
+						 RTE_PTYPE_INNER_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV6_VLAN_FRAG] = RTE_PTYPE_INNER_L3_IPV6 |
+						 RTE_PTYPE_INNER_L4_FRAG |
+						 RTE_PTYPE_INNER_L2_ETHER_VLAN,
+	};
+
+	/* Bits (0..3) provides L3/L4 protocol type */
+	/* Bits (4,5) provides frag and VLAN info */
+	val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK <<
+	       PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) |
+	       (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK <<
+		PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT) |
+	       (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK <<
+		PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT) |
+		(PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK <<
+		 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT)) & flags;
+
+	if (val < QEDE_PKT_TYPE_MAX)
+		return ptype_lkup_tbl[val];
+
+	return RTE_PTYPE_UNKNOWN;
+}
+
 static inline uint32_t qede_rx_cqe_to_pkt_type(uint16_t flags)
 {
 	uint16_t val;
@@ -851,24 +954,68 @@ static inline uint32_t qede_rx_cqe_to_pkt_type(uint16_t flags)
 	/* Lookup table */
 	static const uint32_t
 	ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = {
-		[QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_L3_IPV4,
-		[QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_L3_IPV6,
-		[QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
-		[QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
-		[QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
-		[QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
+		[QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_L3_IPV4	|
+					   RTE_PTYPE_L4_TCP	|
+					   RTE_PTYPE_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_L3_IPV6	|
+					   RTE_PTYPE_L4_TCP	|
+					   RTE_PTYPE_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_L3_IPV4	|
+					   RTE_PTYPE_L4_UDP	|
+					   RTE_PTYPE_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_L3_IPV6	|
+					   RTE_PTYPE_L4_UDP	|
+					   RTE_PTYPE_L2_ETHER,
+		/* Frags with no VLAN */
+		[QEDE_PKT_TYPE_IPV4_FRAG] = RTE_PTYPE_L3_IPV4	|
+					    RTE_PTYPE_L4_FRAG	|
+					    RTE_PTYPE_L2_ETHER,
+		[QEDE_PKT_TYPE_IPV6_FRAG] = RTE_PTYPE_L3_IPV6	|
+					    RTE_PTYPE_L4_FRAG	|
+					    RTE_PTYPE_L2_ETHER,
+		/* VLANs */
+		[QEDE_PKT_TYPE_IPV4_VLAN] = RTE_PTYPE_L3_IPV4		|
+					    RTE_PTYPE_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV6_VLAN] = RTE_PTYPE_L3_IPV6		|
+					    RTE_PTYPE_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV4_TCP_VLAN] = RTE_PTYPE_L3_IPV4	|
+						RTE_PTYPE_L4_TCP	|
+						RTE_PTYPE_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV6_TCP_VLAN] = RTE_PTYPE_L3_IPV6	|
+						RTE_PTYPE_L4_TCP	|
+						RTE_PTYPE_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV4_UDP_VLAN] = RTE_PTYPE_L3_IPV4	|
+						RTE_PTYPE_L4_UDP	|
+						RTE_PTYPE_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV6_UDP_VLAN] = RTE_PTYPE_L3_IPV6	|
+						RTE_PTYPE_L4_UDP	|
+						RTE_PTYPE_L2_ETHER_VLAN,
+		/* Frags with VLAN */
+		[QEDE_PKT_TYPE_IPV4_VLAN_FRAG] = RTE_PTYPE_L3_IPV4	|
+						 RTE_PTYPE_L4_FRAG	|
+						 RTE_PTYPE_L2_ETHER_VLAN,
+		[QEDE_PKT_TYPE_IPV6_VLAN_FRAG] = RTE_PTYPE_L3_IPV6	|
+						 RTE_PTYPE_L4_FRAG	|
+						 RTE_PTYPE_L2_ETHER_VLAN,
 	};
 
 	/* Bits (0..3) provides L3/L4 protocol type */
+	/* Bits (4,5) provides frag and VLAN info */
 	val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK <<
 	       PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) |
 	       (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK <<
-		PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT)) & flags;
+		PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT) |
+	       (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK <<
+		PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT) |
+		(PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK <<
+		 PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT)) & flags;
 
 	if (val < QEDE_PKT_TYPE_MAX)
-		return ptype_lkup_tbl[val] | RTE_PTYPE_L2_ETHER;
-	else
-		return RTE_PTYPE_UNKNOWN;
+		return ptype_lkup_tbl[val];
+
+	return RTE_PTYPE_UNKNOWN;
 }
 
 static inline uint8_t
@@ -917,7 +1064,7 @@ qede_reuse_page(__rte_unused struct qede_dev *qdev,
 	curr_prod = &rxq->sw_rx_ring[idx];
 	*curr_prod = *curr_cons;
 
-	new_mapping = rte_mbuf_data_dma_addr_default(curr_prod->mbuf) +
+	new_mapping = rte_mbuf_data_iova_default(curr_prod->mbuf) +
 		      curr_prod->page_offset;
 
 	rx_bd_prod->addr.hi = rte_cpu_to_le_32(U64_HI(new_mapping));
@@ -1100,6 +1247,27 @@ qede_process_sg_pkts(void *p_rxq,  struct rte_mbuf *rx_mb,
 	return 0;
 }
 
+#ifdef RTE_LIBRTE_QEDE_DEBUG_RX
+static inline void
+print_rx_bd_info(struct rte_mbuf *m, struct qede_rx_queue *rxq,
+		 uint8_t bitfield)
+{
+	PMD_RX_LOG(INFO, rxq,
+		"len 0x%x bf 0x%x hash_val 0x%x"
+		" ol_flags 0x%04lx l2=%s l3=%s l4=%s tunn=%s"
+		" inner_l2=%s inner_l3=%s inner_l4=%s\n",
+		m->data_len, bitfield, m->hash.rss,
+		(unsigned long)m->ol_flags,
+		rte_get_ptype_l2_name(m->packet_type),
+		rte_get_ptype_l3_name(m->packet_type),
+		rte_get_ptype_l4_name(m->packet_type),
+		rte_get_ptype_tunnel_name(m->packet_type),
+		rte_get_ptype_inner_l2_name(m->packet_type),
+		rte_get_ptype_inner_l3_name(m->packet_type),
+		rte_get_ptype_inner_l4_name(m->packet_type));
+}
+#endif
+
 uint16_t
 qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
@@ -1120,7 +1288,6 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	uint16_t parse_flag;
 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
 	uint8_t bitfield_val;
-	enum rss_hash_type htype;
 #endif
 	uint8_t tunn_parse_flag;
 	uint8_t j;
@@ -1214,8 +1381,6 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			rss_hash = rte_le_to_cpu_32(fp_cqe->rss_hash);
 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
 			bitfield_val = fp_cqe->bitfields;
-			htype = (uint8_t)GET_FIELD(bitfield_val,
-					ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE);
 #endif
 		} else {
 			parse_flag =
@@ -1226,8 +1391,6 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			vlan_tci = rte_le_to_cpu_16(cqe_start_tpa->vlan_tag);
 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
 			bitfield_val = cqe_start_tpa->bitfields;
-			htype = (uint8_t)GET_FIELD(bitfield_val,
-				ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE);
 #endif
 			rss_hash = rte_le_to_cpu_32(cqe_start_tpa->rss_hash);
 		}
@@ -1247,8 +1410,17 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 				else
 					flags = fp_cqe->tunnel_pars_flags.flags;
 				tunn_parse_flag = flags;
+				/* Tunnel_type */
 				packet_type =
 				qede_rx_cqe_to_tunn_pkt_type(tunn_parse_flag);
+
+				/* Inner header */
+				packet_type |=
+				      qede_rx_cqe_to_pkt_type_inner(parse_flag);
+
+				/* Outer L3/L4 types is not available in CQE */
+				packet_type |=
+				      qede_rx_cqe_to_pkt_type_outer(rx_mb);
 			}
 		} else {
 			PMD_RX_LOG(INFO, rxq, "Rx non-tunneled packet\n");
@@ -1275,21 +1447,16 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 			}
 		}
 
-		if (CQE_HAS_VLAN(parse_flag)) {
-			ol_flags |= PKT_RX_VLAN_PKT;
+		if (CQE_HAS_VLAN(parse_flag) ||
+		    CQE_HAS_OUTER_VLAN(parse_flag)) {
+			/* Note: FW doesn't indicate Q-in-Q packet */
+			ol_flags |= PKT_RX_VLAN;
 			if (qdev->vlan_strip_flg) {
 				ol_flags |= PKT_RX_VLAN_STRIPPED;
 				rx_mb->vlan_tci = vlan_tci;
 			}
 		}
-		if (CQE_HAS_OUTER_VLAN(parse_flag)) {
-			ol_flags |= PKT_RX_QINQ_PKT;
-			if (qdev->vlan_strip_flg) {
-				rx_mb->vlan_tci = vlan_tci;
-				ol_flags |= PKT_RX_QINQ_STRIPPED;
-			}
-			rx_mb->vlan_tci_outer = 0;
-		}
+
 		/* RSS Hash */
 		if (qdev->rss_enable) {
 			ol_flags |= PKT_RX_RSS_HASH;
@@ -1341,11 +1508,9 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rx_mb->ol_flags = ol_flags;
 		rx_mb->data_len = len;
 		rx_mb->packet_type = packet_type;
-		PMD_RX_LOG(INFO, rxq,
-			   "pkt_type 0x%04x len %u hash_type %d hash_val 0x%x"
-			   " ol_flags 0x%04lx\n",
-			   packet_type, len, htype, rx_mb->hash.rss,
-			   (unsigned long)ol_flags);
+#ifdef RTE_LIBRTE_QEDE_DEBUG_RX
+		print_rx_bd_info(rx_mb, rxq, bitfield_val);
+#endif
 		if (!tpa_start_flg) {
 			rx_mb->nb_segs = fp_cqe->bd_num;
 			rx_mb->pkt_len = pkt_len;
@@ -1400,7 +1565,7 @@ qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg,
 				memset(*bd2, 0, sizeof(struct eth_tx_2nd_bd));
 				nb_segs++;
 			}
-			mapping = rte_mbuf_data_dma_addr(m_seg);
+			mapping = rte_mbuf_data_iova(m_seg);
 			QEDE_BD_SET_ADDR_LEN(*bd2, mapping, m_seg->data_len);
 			PMD_TX_LOG(DEBUG, txq, "BD2 len %04x", m_seg->data_len);
 		} else if (nb_segs == 1) {
@@ -1410,7 +1575,7 @@ qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg,
 				memset(*bd3, 0, sizeof(struct eth_tx_3rd_bd));
 				nb_segs++;
 			}
-			mapping = rte_mbuf_data_dma_addr(m_seg);
+			mapping = rte_mbuf_data_iova(m_seg);
 			QEDE_BD_SET_ADDR_LEN(*bd3, mapping, m_seg->data_len);
 			PMD_TX_LOG(DEBUG, txq, "BD3 len %04x", m_seg->data_len);
 		} else {
@@ -1418,7 +1583,7 @@ qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg,
 				ecore_chain_produce(&txq->tx_pbl);
 			memset(tx_bd, 0, sizeof(*tx_bd));
 			nb_segs++;
-			mapping = rte_mbuf_data_dma_addr(m_seg);
+			mapping = rte_mbuf_data_iova(m_seg);
 			QEDE_BD_SET_ADDR_LEN(tx_bd, mapping, m_seg->data_len);
 			PMD_TX_LOG(DEBUG, txq, "BD len %04x", m_seg->data_len);
 		}
@@ -1801,7 +1966,7 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		nbds++;
 
 		/* Map MBUF linear data for DMA and set in the BD1 */
-		QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_dma_addr(mbuf),
+		QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf),
 				     mbuf->data_len);
 		bd1->data.bitfields = rte_cpu_to_le_16(bd1_bf);
 		bd1->data.bd_flags.bitfields = bd1_bd_flags_bf;
@@ -1814,11 +1979,11 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			nbds++;
 
 			/* BD1 */
-			QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_dma_addr(mbuf),
+			QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_iova(mbuf),
 					     hdr_size);
 			/* BD2 */
 			QEDE_BD_SET_ADDR_LEN(bd2, (hdr_size +
-					     rte_mbuf_data_dma_addr(mbuf)),
+					     rte_mbuf_data_iova(mbuf)),
 					     mbuf->data_len - hdr_size);
 			bd2->data.bitfields1 = rte_cpu_to_le_16(bd2_bf1);
 			if (mplsoudp_flg) {
diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h
index b551fd6a..acf9e475 100644
--- a/drivers/net/qede/qede_rxtx.h
+++ b/drivers/net/qede/qede_rxtx.h
@@ -84,7 +84,8 @@
 
 /* Macros for non-tunnel packet types lkup table */
 #define QEDE_PKT_TYPE_UNKNOWN				0x0
-#define QEDE_PKT_TYPE_MAX				0xf
+#define QEDE_PKT_TYPE_MAX				0x3f
+
 #define QEDE_PKT_TYPE_IPV4				0x1
 #define QEDE_PKT_TYPE_IPV6				0x2
 #define QEDE_PKT_TYPE_IPV4_TCP				0x5
@@ -92,6 +93,20 @@
 #define QEDE_PKT_TYPE_IPV4_UDP				0x9
 #define QEDE_PKT_TYPE_IPV6_UDP				0xa
 
+/* For frag pkts, corresponding IP bits is set */
+#define QEDE_PKT_TYPE_IPV4_FRAG				0x11
+#define QEDE_PKT_TYPE_IPV6_FRAG				0x12
+
+#define QEDE_PKT_TYPE_IPV4_VLAN				0x21
+#define QEDE_PKT_TYPE_IPV6_VLAN				0x22
+#define QEDE_PKT_TYPE_IPV4_TCP_VLAN			0x25
+#define QEDE_PKT_TYPE_IPV6_TCP_VLAN			0x26
+#define QEDE_PKT_TYPE_IPV4_UDP_VLAN			0x29
+#define QEDE_PKT_TYPE_IPV6_UDP_VLAN			0x2a
+
+#define QEDE_PKT_TYPE_IPV4_VLAN_FRAG			0x31
+#define QEDE_PKT_TYPE_IPV6_VLAN_FRAG			0x32
+
 /* Macros for tunneled packets with next protocol lkup table */
 #define QEDE_PKT_TYPE_TUNN_GENEVE			0x1
 #define QEDE_PKT_TYPE_TUNN_GRE				0x2
@@ -99,12 +114,12 @@
 
 /* Bit 2 is don't care bit */
 #define QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GENEVE	0x9
-#define QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GRE	0xa
+#define QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GRE		0xa
 #define QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_VXLAN	0xb
 
 #define QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GENEVE	0xd
 #define QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GRE		0xe
-#define QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_VXLAN	0xf
+#define QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_VXLAN		0xf
 
 
 #define QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GENEVE    0x11
@@ -112,7 +127,7 @@
 #define QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_VXLAN     0x13
 
 #define QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GENEVE	0x15
-#define QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GRE	0x16
+#define QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GRE		0x16
 #define QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_VXLAN	0x17
 
 
diff --git a/drivers/net/ring/Makefile b/drivers/net/ring/Makefile
index b7e1a378..085ffa57 100644
--- a/drivers/net/ring/Makefile
+++ b/drivers/net/ring/Makefile
@@ -38,8 +38,11 @@ LIB = librte_pmd_ring.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vdev
 
-EXPORT_MAP := rte_eth_ring_version.map
+EXPORT_MAP := rte_pmd_ring_version.map
 
 LIBABIVER := 2
 
diff --git a/drivers/net/ring/rte_eth_ring.c b/drivers/net/ring/rte_eth_ring.c
index 464d3d38..a73c631f 100644
--- a/drivers/net/ring/rte_eth_ring.c
+++ b/drivers/net/ring/rte_eth_ring.c
@@ -36,9 +36,8 @@
 #include <rte_ethdev.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
-#include <rte_memzone.h>
 #include <rte_string_fns.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_errno.h>
 
@@ -190,7 +189,7 @@ eth_dev_info(struct rte_eth_dev *dev,
 	dev_info->min_rx_bufsize = 0;
 }
 
-static void
+static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned i;
@@ -214,6 +213,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	stats->ipackets = rx_total;
 	stats->opackets = tx_total;
 	stats->oerrors = tx_err_total;
+
+	return 0;
 }
 
 static void
@@ -356,7 +357,6 @@ do_eth_dev_ring_create(const char *name,
 
 	eth_dev->data = data;
 	eth_dev->dev_ops = &ops;
-	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 	data->kdrv = RTE_KDRV_NONE;
 	data->numa_node = numa_node;
 
@@ -394,7 +394,7 @@ rte_eth_from_rings(const char *name, struct rte_ring *const rx_queues[],
 	};
 	char args_str[32] = { 0 };
 	char ring_name[32] = { 0 };
-	uint8_t port_id = RTE_MAX_ETHPORTS;
+	uint16_t port_id = RTE_MAX_ETHPORTS;
 	int ret;
 
 	/* do some parameter checking */
diff --git a/drivers/net/ring/rte_eth_ring_version.map b/drivers/net/ring/rte_pmd_ring_version.map
index 1f785d94..1f785d94 100644
--- a/drivers/net/ring/rte_eth_ring_version.map
+++ b/drivers/net/ring/rte_pmd_ring_version.map
diff --git a/drivers/net/sfc/Makefile b/drivers/net/sfc/Makefile
index 57aa963b..2cfd62a2 100644
--- a/drivers/net/sfc/Makefile
+++ b/drivers/net/sfc/Makefile
@@ -65,13 +65,19 @@ CFLAGS += -Wbad-function-cast
 CFLAGS_BASE_DRIVER += -Wno-empty-body
 else ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
 CFLAGS_BASE_DRIVER += -Wno-unused-but-set-variable
+# Suppress ICC false positive warning on 'bulk' may be used before its
+# value is set
+CFLAGS_sfc_ef10_tx.o += -wd3656
 endif
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 #
 # List of base driver object files for which
 # special CFLAGS above should be applied
 #
-BASE_DRIVER_OBJS=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))
+BASE_DRIVER_OBJS=$(sort $(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))))
 $(foreach obj, $(BASE_DRIVER_OBJS), \
   $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
 
diff --git a/drivers/net/sfc/base/ef10_filter.c b/drivers/net/sfc/base/ef10_filter.c
index 695bb847..e1faf1dd 100644
--- a/drivers/net/sfc/base/ef10_filter.c
+++ b/drivers/net/sfc/base/ef10_filter.c
@@ -123,29 +123,33 @@ ef10_filter_init(
 
 #define	MATCH_MASK(match) (EFX_MASK32(match) << EFX_LOW_BIT(match))
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_REM_HOST ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_SRC_IP));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_SRC_IP));
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_LOC_HOST ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_DST_IP));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_DST_IP));
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_REM_MAC ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_SRC_MAC));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_SRC_MAC));
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_REM_PORT ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_SRC_PORT));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_SRC_PORT));
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_LOC_MAC ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_DST_MAC));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_DST_MAC));
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_LOC_PORT ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_DST_PORT));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_DST_PORT));
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_ETHER_TYPE ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_ETHER_TYPE));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE));
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_INNER_VID ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_INNER_VLAN));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_INNER_VLAN));
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_OUTER_VID ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_OUTER_VLAN));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_OUTER_VLAN));
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_IP_PROTO ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_IP_PROTO));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO));
+	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_IFRM_UNKNOWN_MCAST_DST ==
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST));
+	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_IFRM_UNKNOWN_UCAST_DST ==
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST));
 	EFX_STATIC_ASSERT(EFX_FILTER_MATCH_UNKNOWN_MCAST_DST ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_UNKNOWN_MCAST_DST));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST));
 	EFX_STATIC_ASSERT((uint32_t)EFX_FILTER_MATCH_UNKNOWN_UCAST_DST ==
-	    MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_UNKNOWN_UCAST_DST));
+	    MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST));
 #undef MATCH_MASK
 
 	EFSYS_KMEM_ALLOC(enp->en_esip, sizeof (ef10_filter_table_t), eftp);
@@ -186,27 +190,27 @@ efx_mcdi_filter_op_add(
 	__inout		ef10_filter_handle_t *handle)
 {
 	efx_mcdi_req_t req;
-	uint8_t payload[MAX(MC_CMD_FILTER_OP_IN_LEN,
-			    MC_CMD_FILTER_OP_OUT_LEN)];
+	uint8_t payload[MAX(MC_CMD_FILTER_OP_EXT_IN_LEN,
+			    MC_CMD_FILTER_OP_EXT_OUT_LEN)];
 	efx_rc_t rc;
 
 	memset(payload, 0, sizeof (payload));
 	req.emr_cmd = MC_CMD_FILTER_OP;
 	req.emr_in_buf = payload;
-	req.emr_in_length = MC_CMD_FILTER_OP_IN_LEN;
+	req.emr_in_length = MC_CMD_FILTER_OP_EXT_IN_LEN;
 	req.emr_out_buf = payload;
-	req.emr_out_length = MC_CMD_FILTER_OP_OUT_LEN;
+	req.emr_out_length = MC_CMD_FILTER_OP_EXT_OUT_LEN;
 
 	switch (filter_op) {
 	case MC_CMD_FILTER_OP_IN_OP_REPLACE:
-		MCDI_IN_SET_DWORD(req, FILTER_OP_IN_HANDLE_LO,
+		MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_HANDLE_LO,
 		    handle->efh_lo);
-		MCDI_IN_SET_DWORD(req, FILTER_OP_IN_HANDLE_HI,
+		MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_HANDLE_HI,
 		    handle->efh_hi);
 		/* Fall through */
 	case MC_CMD_FILTER_OP_IN_OP_INSERT:
 	case MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE:
-		MCDI_IN_SET_DWORD(req, FILTER_OP_IN_OP, filter_op);
+		MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_OP, filter_op);
 		break;
 	default:
 		EFSYS_ASSERT(0);
@@ -214,82 +218,123 @@ efx_mcdi_filter_op_add(
 		goto fail1;
 	}
 
-	MCDI_IN_SET_DWORD(req, FILTER_OP_IN_PORT_ID,
+	MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_PORT_ID,
 	    EVB_PORT_ID_ASSIGNED);
-	MCDI_IN_SET_DWORD(req, FILTER_OP_IN_MATCH_FIELDS,
+	MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_MATCH_FIELDS,
 	    spec->efs_match_flags);
-	MCDI_IN_SET_DWORD(req, FILTER_OP_IN_RX_DEST,
-	    MC_CMD_FILTER_OP_IN_RX_DEST_HOST);
-	MCDI_IN_SET_DWORD(req, FILTER_OP_IN_RX_QUEUE,
+	MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_RX_DEST,
+	    MC_CMD_FILTER_OP_EXT_IN_RX_DEST_HOST);
+	MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_RX_QUEUE,
 	    spec->efs_dmaq_id);
+
+#if EFSYS_OPT_RX_SCALE
 	if (spec->efs_flags & EFX_FILTER_FLAG_RX_RSS) {
-		MCDI_IN_SET_DWORD(req, FILTER_OP_IN_RX_CONTEXT,
-		    spec->efs_rss_context);
+		uint32_t rss_context;
+
+		if (spec->efs_rss_context == EFX_RSS_CONTEXT_DEFAULT)
+			rss_context = enp->en_rss_context;
+		else
+			rss_context = spec->efs_rss_context;
+		MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_RX_CONTEXT,
+		    rss_context);
 	}
-	MCDI_IN_SET_DWORD(req, FILTER_OP_IN_RX_MODE,
+#endif
+
+	MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_RX_MODE,
 	    spec->efs_flags & EFX_FILTER_FLAG_RX_RSS ?
-	    MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
-	    MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
-	MCDI_IN_SET_DWORD(req, FILTER_OP_IN_TX_DEST,
-	    MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT);
+	    MC_CMD_FILTER_OP_EXT_IN_RX_MODE_RSS :
+	    MC_CMD_FILTER_OP_EXT_IN_RX_MODE_SIMPLE);
+	MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_TX_DEST,
+	    MC_CMD_FILTER_OP_EXT_IN_TX_DEST_DEFAULT);
 
 	if (filter_op != MC_CMD_FILTER_OP_IN_OP_REPLACE) {
 		/*
 		 * NOTE: Unlike most MCDI requests, the filter fields
 		 * are presented in network (big endian) byte order.
 		 */
-		memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_IN_SRC_MAC),
+		memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_EXT_IN_SRC_MAC),
 		    spec->efs_rem_mac, EFX_MAC_ADDR_LEN);
-		memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_IN_DST_MAC),
+		memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_EXT_IN_DST_MAC),
 		    spec->efs_loc_mac, EFX_MAC_ADDR_LEN);
 
-		MCDI_IN_SET_WORD(req, FILTER_OP_IN_SRC_PORT,
+		MCDI_IN_SET_WORD(req, FILTER_OP_EXT_IN_SRC_PORT,
 		    __CPU_TO_BE_16(spec->efs_rem_port));
-		MCDI_IN_SET_WORD(req, FILTER_OP_IN_DST_PORT,
+		MCDI_IN_SET_WORD(req, FILTER_OP_EXT_IN_DST_PORT,
 		    __CPU_TO_BE_16(spec->efs_loc_port));
 
-		MCDI_IN_SET_WORD(req, FILTER_OP_IN_ETHER_TYPE,
+		MCDI_IN_SET_WORD(req, FILTER_OP_EXT_IN_ETHER_TYPE,
 		    __CPU_TO_BE_16(spec->efs_ether_type));
 
-		MCDI_IN_SET_WORD(req, FILTER_OP_IN_INNER_VLAN,
+		MCDI_IN_SET_WORD(req, FILTER_OP_EXT_IN_INNER_VLAN,
 		    __CPU_TO_BE_16(spec->efs_inner_vid));
-		MCDI_IN_SET_WORD(req, FILTER_OP_IN_OUTER_VLAN,
+		MCDI_IN_SET_WORD(req, FILTER_OP_EXT_IN_OUTER_VLAN,
 		    __CPU_TO_BE_16(spec->efs_outer_vid));
 
 		/* IP protocol (in low byte, high byte is zero) */
-		MCDI_IN_SET_BYTE(req, FILTER_OP_IN_IP_PROTO,
+		MCDI_IN_SET_BYTE(req, FILTER_OP_EXT_IN_IP_PROTO,
 		    spec->efs_ip_proto);
 
 		EFX_STATIC_ASSERT(sizeof (spec->efs_rem_host) ==
-		    MC_CMD_FILTER_OP_IN_SRC_IP_LEN);
+		    MC_CMD_FILTER_OP_EXT_IN_SRC_IP_LEN);
 		EFX_STATIC_ASSERT(sizeof (spec->efs_loc_host) ==
-		    MC_CMD_FILTER_OP_IN_DST_IP_LEN);
+		    MC_CMD_FILTER_OP_EXT_IN_DST_IP_LEN);
 
-		memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_IN_SRC_IP),
+		memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_EXT_IN_SRC_IP),
 		    &spec->efs_rem_host.eo_byte[0],
-		    MC_CMD_FILTER_OP_IN_SRC_IP_LEN);
-		memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_IN_DST_IP),
+		    MC_CMD_FILTER_OP_EXT_IN_SRC_IP_LEN);
+		memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_EXT_IN_DST_IP),
 		    &spec->efs_loc_host.eo_byte[0],
-		    MC_CMD_FILTER_OP_IN_DST_IP_LEN);
+		    MC_CMD_FILTER_OP_EXT_IN_DST_IP_LEN);
+
+		/*
+		 * On Medford, filters for encapsulated packets match based on
+		 * the ether type and IP protocol in the outer frame.  In
+		 * addition we need to fill in the VNI or VSID type field.
+		 */
+		switch (spec->efs_encap_type) {
+		case EFX_TUNNEL_PROTOCOL_NONE:
+			break;
+		case EFX_TUNNEL_PROTOCOL_VXLAN:
+		case EFX_TUNNEL_PROTOCOL_GENEVE:
+			MCDI_IN_POPULATE_DWORD_1(req,
+			    FILTER_OP_EXT_IN_VNI_OR_VSID,
+			    FILTER_OP_EXT_IN_VNI_TYPE,
+			    spec->efs_encap_type == EFX_TUNNEL_PROTOCOL_VXLAN ?
+				    MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN :
+				    MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE);
+			break;
+		case EFX_TUNNEL_PROTOCOL_NVGRE:
+			MCDI_IN_POPULATE_DWORD_1(req,
+			    FILTER_OP_EXT_IN_VNI_OR_VSID,
+			    FILTER_OP_EXT_IN_VSID_TYPE,
+			    MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_NVGRE);
+			break;
+		default:
+			EFSYS_ASSERT(0);
+			rc = EINVAL;
+			goto fail2;
+		}
 	}
 
 	efx_mcdi_execute(enp, &req);
 
 	if (req.emr_rc != 0) {
 		rc = req.emr_rc;
-		goto fail2;
+		goto fail3;
 	}
 
-	if (req.emr_out_length_used < MC_CMD_FILTER_OP_OUT_LEN) {
+	if (req.emr_out_length_used < MC_CMD_FILTER_OP_EXT_OUT_LEN) {
 		rc = EMSGSIZE;
-		goto fail3;
+		goto fail4;
 	}
 
-	handle->efh_lo = MCDI_OUT_DWORD(req, FILTER_OP_OUT_HANDLE_LO);
-	handle->efh_hi = MCDI_OUT_DWORD(req, FILTER_OP_OUT_HANDLE_HI);
+	handle->efh_lo = MCDI_OUT_DWORD(req, FILTER_OP_EXT_OUT_HANDLE_LO);
+	handle->efh_hi = MCDI_OUT_DWORD(req, FILTER_OP_EXT_OUT_HANDLE_HI);
 
 	return (0);
 
+fail4:
+	EFSYS_PROBE(fail4);
 fail3:
 	EFSYS_PROBE(fail3);
 fail2:
@@ -308,24 +353,24 @@ efx_mcdi_filter_op_delete(
 	__inout		ef10_filter_handle_t *handle)
 {
 	efx_mcdi_req_t req;
-	uint8_t payload[MAX(MC_CMD_FILTER_OP_IN_LEN,
-			    MC_CMD_FILTER_OP_OUT_LEN)];
+	uint8_t payload[MAX(MC_CMD_FILTER_OP_EXT_IN_LEN,
+			    MC_CMD_FILTER_OP_EXT_OUT_LEN)];
 	efx_rc_t rc;
 
 	memset(payload, 0, sizeof (payload));
 	req.emr_cmd = MC_CMD_FILTER_OP;
 	req.emr_in_buf = payload;
-	req.emr_in_length = MC_CMD_FILTER_OP_IN_LEN;
+	req.emr_in_length = MC_CMD_FILTER_OP_EXT_IN_LEN;
 	req.emr_out_buf = payload;
-	req.emr_out_length = MC_CMD_FILTER_OP_OUT_LEN;
+	req.emr_out_length = MC_CMD_FILTER_OP_EXT_OUT_LEN;
 
 	switch (filter_op) {
 	case MC_CMD_FILTER_OP_IN_OP_REMOVE:
-		MCDI_IN_SET_DWORD(req, FILTER_OP_IN_OP,
+		MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_OP,
 		    MC_CMD_FILTER_OP_IN_OP_REMOVE);
 		break;
 	case MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE:
-		MCDI_IN_SET_DWORD(req, FILTER_OP_IN_OP,
+		MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_OP,
 		    MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
 		break;
 	default:
@@ -334,8 +379,8 @@ efx_mcdi_filter_op_delete(
 		goto fail1;
 	}
 
-	MCDI_IN_SET_DWORD(req, FILTER_OP_IN_HANDLE_LO, handle->efh_lo);
-	MCDI_IN_SET_DWORD(req, FILTER_OP_IN_HANDLE_HI, handle->efh_hi);
+	MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_HANDLE_LO, handle->efh_lo);
+	MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_HANDLE_HI, handle->efh_hi);
 
 	efx_mcdi_execute_quiet(enp, &req);
 
@@ -344,7 +389,7 @@ efx_mcdi_filter_op_delete(
 		goto fail2;
 	}
 
-	if (req.emr_out_length_used < MC_CMD_FILTER_OP_OUT_LEN) {
+	if (req.emr_out_length_used < MC_CMD_FILTER_OP_EXT_OUT_LEN) {
 		rc = EMSGSIZE;
 		goto fail3;
 	}
@@ -390,6 +435,8 @@ ef10_filter_equal(
 		return (B_FALSE);
 	if (left->efs_ip_proto != right->efs_ip_proto)
 		return (B_FALSE);
+	if (left->efs_encap_type != right->efs_encap_type)
+		return (B_FALSE);
 
 	return (B_TRUE);
 
@@ -549,10 +596,6 @@ ef10_filter_add_internal(
 	EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON ||
 		    enp->en_family == EFX_FAMILY_MEDFORD);
 
-#if EFSYS_OPT_RX_SCALE
-	spec->efs_rss_context = enp->en_rss_context;
-#endif
-
 	hash = ef10_filter_hash(spec);
 
 	/*
@@ -1194,6 +1237,108 @@ fail1:
 	return (rc);
 }
 
+typedef struct ef10_filter_encap_entry_s {
+	uint16_t		ether_type;
+	efx_tunnel_protocol_t	encap_type;
+	uint32_t		inner_frame_match;
+} ef10_filter_encap_entry_t;
+
+#define EF10_ENCAP_FILTER_ENTRY(ipv, encap_type, inner_frame_match)	\
+	{ EFX_ETHER_TYPE_##ipv, EFX_TUNNEL_PROTOCOL_##encap_type,		\
+	    EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_##inner_frame_match }
+
+static ef10_filter_encap_entry_t ef10_filter_encap_list[] = {
+	EF10_ENCAP_FILTER_ENTRY(IPV4, VXLAN, UCAST_DST),
+	EF10_ENCAP_FILTER_ENTRY(IPV4, VXLAN, MCAST_DST),
+	EF10_ENCAP_FILTER_ENTRY(IPV6, VXLAN, UCAST_DST),
+	EF10_ENCAP_FILTER_ENTRY(IPV6, VXLAN, MCAST_DST),
+
+	EF10_ENCAP_FILTER_ENTRY(IPV4, GENEVE, UCAST_DST),
+	EF10_ENCAP_FILTER_ENTRY(IPV4, GENEVE, MCAST_DST),
+	EF10_ENCAP_FILTER_ENTRY(IPV6, GENEVE, UCAST_DST),
+	EF10_ENCAP_FILTER_ENTRY(IPV6, GENEVE, MCAST_DST),
+
+	EF10_ENCAP_FILTER_ENTRY(IPV4, NVGRE, UCAST_DST),
+	EF10_ENCAP_FILTER_ENTRY(IPV4, NVGRE, MCAST_DST),
+	EF10_ENCAP_FILTER_ENTRY(IPV6, NVGRE, UCAST_DST),
+	EF10_ENCAP_FILTER_ENTRY(IPV6, NVGRE, MCAST_DST),
+};
+
+#undef EF10_ENCAP_FILTER_ENTRY
+
+static	__checkReturn	efx_rc_t
+ef10_filter_insert_encap_filters(
+	__in		efx_nic_t *enp,
+	__in		boolean_t mulcst,
+	__in		efx_filter_flags_t filter_flags)
+{
+	ef10_filter_table_t *table = enp->en_filter.ef_ef10_filter_table;
+	uint32_t i;
+	efx_rc_t rc;
+
+	EFX_STATIC_ASSERT(EFX_ARRAY_SIZE(ef10_filter_encap_list) <=
+			    EFX_ARRAY_SIZE(table->eft_encap_filter_indexes));
+
+	/*
+	 * On Medford, full-featured firmware can identify packets as being
+	 * tunnel encapsulated, even if no encapsulated packet offloads are in
+	 * use. When packets are identified as such, ordinary filters are not
+	 * applied, only ones specific to encapsulated packets. Hence we need to
+	 * insert filters for encapsulated packets in order to receive them.
+	 *
+	 * Separate filters need to be inserted for each ether type,
+	 * encapsulation type, and inner frame type (unicast or multicast). To
+	 * keep things simple and reduce the number of filters needed, catch-all
+	 * filters for all combinations of types are inserted, even if
+	 * all_unicst or all_mulcst have not been set. (These catch-all filters
+	 * may well, however, fail to insert on unprivileged functions.)
+	 */
+	table->eft_encap_filter_count = 0;
+	for (i = 0; i < EFX_ARRAY_SIZE(ef10_filter_encap_list); i++) {
+		efx_filter_spec_t spec;
+		ef10_filter_encap_entry_t *encap_filter =
+			&ef10_filter_encap_list[i];
+
+		/*
+		 * Skip multicast filters if we've not been asked for
+		 * any multicast traffic.
+		 */
+		if ((mulcst == B_FALSE) &&
+		    (encap_filter->inner_frame_match ==
+		     EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_MCAST_DST))
+				continue;
+
+		efx_filter_spec_init_rx(&spec, EFX_FILTER_PRI_AUTO,
+					filter_flags,
+					table->eft_default_rxq);
+		efx_filter_spec_set_ether_type(&spec, encap_filter->ether_type);
+		rc = efx_filter_spec_set_encap_type(&spec,
+					    encap_filter->encap_type,
+					    encap_filter->inner_frame_match);
+		if (rc != 0)
+			goto fail1;
+
+		rc = ef10_filter_add_internal(enp, &spec, B_TRUE,
+			    &table->eft_encap_filter_indexes[
+				    table->eft_encap_filter_count]);
+		if (rc != 0) {
+			if (rc != EACCES)
+				goto fail2;
+		} else {
+			table->eft_encap_filter_count++;
+		}
+	}
+
+	return (0);
+
+fail2:
+	EFSYS_PROBE(fail2);
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+	return (rc);
+}
+
 static			void
 ef10_filter_remove_old(
 	__in		efx_nic_t *enp)
@@ -1289,6 +1434,12 @@ ef10_filter_reconfigure(
 		}
 		table->eft_mulcst_filter_count = 0;
 
+		for (i = 0; i < table->eft_encap_filter_count; i++) {
+			(void) ef10_filter_delete_internal(enp,
+					table->eft_encap_filter_indexes[i]);
+		}
+		table->eft_encap_filter_count = 0;
+
 		return (0);
 	}
 
@@ -1306,6 +1457,10 @@ ef10_filter_reconfigure(
 		ef10_filter_set_entry_auto_old(table,
 					table->eft_mulcst_filter_indexes[i]);
 	}
+	for (i = 0; i < table->eft_encap_filter_count; i++) {
+		ef10_filter_set_entry_auto_old(table,
+					table->eft_encap_filter_indexes[i]);
+	}
 
 	/*
 	 * Insert or renew unicast filters.
@@ -1423,6 +1578,13 @@ ef10_filter_reconfigure(
 		}
 	}
 
+	if (encp->enc_tunnel_encapsulations_supported != 0) {
+		/* Try to insert filters for encapsulated packets. */
+		(void) ef10_filter_insert_encap_filters(enp,
+					    mulcst || all_mulcst || brdcst,
+					    filter_flags);
+	}
+
 	/* Remove old filters which were not renewed */
 	ef10_filter_remove_old(enp);
 
diff --git a/drivers/net/sfc/base/ef10_impl.h b/drivers/net/sfc/base/ef10_impl.h
index 8c3dffee..8f9eb7a3 100644
--- a/drivers/net/sfc/base/ef10_impl.h
+++ b/drivers/net/sfc/base/ef10_impl.h
@@ -898,8 +898,21 @@ ef10_rx_scatter_enable(
 #if EFSYS_OPT_RX_SCALE
 
 extern	__checkReturn	efx_rc_t
+ef10_rx_scale_context_alloc(
+	__in		efx_nic_t *enp,
+	__in		efx_rx_scale_context_type_t type,
+	__in		uint32_t num_queues,
+	__out		uint32_t *rss_contextp);
+
+extern	__checkReturn	efx_rc_t
+ef10_rx_scale_context_free(
+	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context);
+
+extern	__checkReturn	efx_rc_t
 ef10_rx_scale_mode_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in		efx_rx_hash_alg_t alg,
 	__in		efx_rx_hash_type_t type,
 	__in		boolean_t insert);
@@ -907,12 +920,14 @@ ef10_rx_scale_mode_set(
 extern	__checkReturn	efx_rc_t
 ef10_rx_scale_key_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	uint8_t *key,
 	__in		size_t n);
 
 extern	__checkReturn	efx_rc_t
 ef10_rx_scale_tbl_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	unsigned int *table,
 	__in		size_t n);
 
@@ -1005,6 +1020,13 @@ typedef struct ef10_filter_entry_s {
 /* Allow for the broadcast address to be added to the multicast list */
 #define	EFX_EF10_FILTER_MULTICAST_FILTERS_MAX	(EFX_MAC_MULTICAST_LIST_MAX + 1)
 
+/*
+ * For encapsulated packets, there is one filter each for each combination of
+ * IPv4 or IPv6 outer frame, VXLAN, GENEVE or NVGRE packet type, and unicast or
+ * multicast inner frames.
+ */
+#define EFX_EF10_FILTER_ENCAP_FILTERS_MAX	12
+
 typedef struct ef10_filter_table_s {
 	ef10_filter_entry_t	eft_entry[EFX_EF10_FILTER_TBL_ROWS];
 	efx_rxq_t		*eft_default_rxq;
@@ -1016,6 +1038,9 @@ typedef struct ef10_filter_table_s {
 	    EFX_EF10_FILTER_MULTICAST_FILTERS_MAX];
 	uint32_t		eft_mulcst_filter_count;
 	boolean_t		eft_using_all_mulcst;
+	uint32_t		eft_encap_filter_indexes[
+	    EFX_EF10_FILTER_ENCAP_FILTERS_MAX];
+	uint32_t		eft_encap_filter_count;
 } ef10_filter_table_t;
 
 	__checkReturn	efx_rc_t
diff --git a/drivers/net/sfc/base/ef10_nic.c b/drivers/net/sfc/base/ef10_nic.c
index aac2679c..58d1b0af 100644
--- a/drivers/net/sfc/base/ef10_nic.c
+++ b/drivers/net/sfc/base/ef10_nic.c
@@ -1072,6 +1072,16 @@ ef10_get_datapath_caps(
 	encp->enc_mac_stats_40g_tx_size_bins =
 	    CAP_FLAG2(flags2, MAC_STATS_40G_TX_SIZE_BINS) ? B_TRUE : B_FALSE;
 
+	/*
+	 * Check if firmware supports VXLAN and NVGRE tunnels.
+	 * The capability indicates Geneve protocol support as well.
+	 */
+	if (CAP_FLAG(flags, VXLAN_NVGRE))
+		encp->enc_tunnel_encapsulations_supported =
+		    (1u << EFX_TUNNEL_PROTOCOL_VXLAN) |
+		    (1u << EFX_TUNNEL_PROTOCOL_GENEVE) |
+		    (1u << EFX_TUNNEL_PROTOCOL_NVGRE);
+
 #undef CAP_FLAG
 #undef CAP_FLAG2
 
diff --git a/drivers/net/sfc/base/ef10_rx.c b/drivers/net/sfc/base/ef10_rx.c
index 661caa88..849f674c 100644
--- a/drivers/net/sfc/base/ef10_rx.c
+++ b/drivers/net/sfc/base/ef10_rx.c
@@ -159,7 +159,7 @@ fail1:
 static	__checkReturn	efx_rc_t
 efx_mcdi_rss_context_alloc(
 	__in		efx_nic_t *enp,
-	__in		efx_rx_scale_support_t scale_support,
+	__in		efx_rx_scale_context_type_t type,
 	__in		uint32_t num_queues,
 	__out		uint32_t *rss_contextp)
 {
@@ -175,7 +175,7 @@ efx_mcdi_rss_context_alloc(
 		goto fail1;
 	}
 
-	switch (scale_support) {
+	switch (type) {
 	case EFX_RX_SCALE_EXCLUSIVE:
 		context_type = MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE;
 		break;
@@ -461,7 +461,7 @@ ef10_rx_init(
 		 * Allocated an exclusive RSS context, which allows both the
 		 * indirection table and key to be modified.
 		 */
-		enp->en_rss_support = EFX_RX_SCALE_EXCLUSIVE;
+		enp->en_rss_context_type = EFX_RX_SCALE_EXCLUSIVE;
 		enp->en_hash_support = EFX_RX_HASH_AVAILABLE;
 	} else {
 		/*
@@ -469,7 +469,7 @@ ef10_rx_init(
 		 * operation without support for RSS. The pseudo-header in
 		 * received packets will not contain a Toeplitz hash value.
 		 */
-		enp->en_rss_support = EFX_RX_SCALE_UNAVAILABLE;
+		enp->en_rss_context_type = EFX_RX_SCALE_UNAVAILABLE;
 		enp->en_hash_support = EFX_RX_HASH_UNAVAILABLE;
 	}
 
@@ -491,8 +491,51 @@ ef10_rx_scatter_enable(
 
 #if EFSYS_OPT_RX_SCALE
 	__checkReturn	efx_rc_t
+ef10_rx_scale_context_alloc(
+	__in		efx_nic_t *enp,
+	__in		efx_rx_scale_context_type_t type,
+	__in		uint32_t num_queues,
+	__out		uint32_t *rss_contextp)
+{
+	efx_rc_t rc;
+
+	rc = efx_mcdi_rss_context_alloc(enp, type, num_queues, rss_contextp);
+	if (rc != 0)
+		goto fail1;
+
+	return (0);
+
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+	return (rc);
+}
+#endif /* EFSYS_OPT_RX_SCALE */
+
+#if EFSYS_OPT_RX_SCALE
+	__checkReturn	efx_rc_t
+ef10_rx_scale_context_free(
+	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context)
+{
+	efx_rc_t rc;
+
+	rc = efx_mcdi_rss_context_free(enp, rss_context);
+	if (rc != 0)
+		goto fail1;
+
+	return (0);
+
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+	return (rc);
+}
+#endif /* EFSYS_OPT_RX_SCALE */
+
+#if EFSYS_OPT_RX_SCALE
+	__checkReturn	efx_rc_t
 ef10_rx_scale_mode_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in		efx_rx_hash_alg_t alg,
 	__in		efx_rx_hash_type_t type,
 	__in		boolean_t insert)
@@ -507,13 +550,16 @@ ef10_rx_scale_mode_set(
 		goto fail1;
 	}
 
-	if (enp->en_rss_support == EFX_RX_SCALE_UNAVAILABLE) {
-		rc = ENOTSUP;
-		goto fail2;
+	if (rss_context == EFX_RSS_CONTEXT_DEFAULT) {
+		if (enp->en_rss_context_type == EFX_RX_SCALE_UNAVAILABLE) {
+			rc = ENOTSUP;
+			goto fail2;
+		}
+		rss_context = enp->en_rss_context;
 	}
 
 	if ((rc = efx_mcdi_rss_context_set_flags(enp,
-		    enp->en_rss_context, type)) != 0)
+		    rss_context, type)) != 0)
 		goto fail3;
 
 	return (0);
@@ -533,18 +579,24 @@ fail1:
 	__checkReturn	efx_rc_t
 ef10_rx_scale_key_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	uint8_t *key,
 	__in		size_t n)
 {
 	efx_rc_t rc;
 
-	if (enp->en_rss_support == EFX_RX_SCALE_UNAVAILABLE) {
-		rc = ENOTSUP;
-		goto fail1;
+	EFX_STATIC_ASSERT(EFX_RSS_KEY_SIZE ==
+	    MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
+
+	if (rss_context == EFX_RSS_CONTEXT_DEFAULT) {
+		if (enp->en_rss_context_type == EFX_RX_SCALE_UNAVAILABLE) {
+			rc = ENOTSUP;
+			goto fail1;
+		}
+		rss_context = enp->en_rss_context;
 	}
 
-	if ((rc = efx_mcdi_rss_context_set_key(enp,
-	    enp->en_rss_context, key, n)) != 0)
+	if ((rc = efx_mcdi_rss_context_set_key(enp, rss_context, key, n)) != 0)
 		goto fail2;
 
 	return (0);
@@ -562,18 +614,23 @@ fail1:
 	__checkReturn	efx_rc_t
 ef10_rx_scale_tbl_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	unsigned int *table,
 	__in		size_t n)
 {
 	efx_rc_t rc;
 
-	if (enp->en_rss_support == EFX_RX_SCALE_UNAVAILABLE) {
-		rc = ENOTSUP;
-		goto fail1;
+
+	if (rss_context == EFX_RSS_CONTEXT_DEFAULT) {
+		if (enp->en_rss_context_type == EFX_RX_SCALE_UNAVAILABLE) {
+			rc = ENOTSUP;
+			goto fail1;
+		}
+		rss_context = enp->en_rss_context;
 	}
 
 	if ((rc = efx_mcdi_rss_context_set_table(enp,
-	    enp->en_rss_context, table, n)) != 0)
+		    rss_context, table, n)) != 0)
 		goto fail2;
 
 	return (0);
@@ -964,11 +1021,10 @@ ef10_rx_fini(
 	__in	efx_nic_t *enp)
 {
 #if EFSYS_OPT_RX_SCALE
-	if (enp->en_rss_support != EFX_RX_SCALE_UNAVAILABLE) {
+	if (enp->en_rss_context_type != EFX_RX_SCALE_UNAVAILABLE)
 		(void) efx_mcdi_rss_context_free(enp, enp->en_rss_context);
-	}
 	enp->en_rss_context = 0;
-	enp->en_rss_support = EFX_RX_SCALE_UNAVAILABLE;
+	enp->en_rss_context_type = EFX_RX_SCALE_UNAVAILABLE;
 #else
 	_NOTE(ARGUNUSED(enp))
 #endif /* EFSYS_OPT_RX_SCALE */
diff --git a/drivers/net/sfc/base/efx.h b/drivers/net/sfc/base/efx.h
index 7eabc370..57fba052 100644
--- a/drivers/net/sfc/base/efx.h
+++ b/drivers/net/sfc/base/efx.h
@@ -1088,6 +1088,14 @@ efx_bist_stop(
 #define	EFX_FEATURE_FW_ASSISTED_TSO_V2	0x00002000
 #define	EFX_FEATURE_PACKED_STREAM	0x00004000
 
+typedef enum efx_tunnel_protocol_e {
+	EFX_TUNNEL_PROTOCOL_NONE = 0,
+	EFX_TUNNEL_PROTOCOL_VXLAN,
+	EFX_TUNNEL_PROTOCOL_GENEVE,
+	EFX_TUNNEL_PROTOCOL_NVGRE,
+	EFX_TUNNEL_NPROTOS
+} efx_tunnel_protocol_t;
+
 typedef struct efx_nic_cfg_s {
 	uint32_t		enc_board_type;
 	uint32_t		enc_phy_type;
@@ -1119,6 +1127,7 @@ typedef struct efx_nic_cfg_s {
 	uint32_t		enc_rx_prefix_size;
 	uint32_t		enc_rx_buf_align_start;
 	uint32_t		enc_rx_buf_align_end;
+	uint32_t		enc_rx_scale_max_exclusive_contexts;
 #if EFSYS_OPT_LOOPBACK
 	efx_qword_t		enc_loopback_types[EFX_LINK_NMODES];
 #endif	/* EFSYS_OPT_LOOPBACK */
@@ -1187,6 +1196,7 @@ typedef struct efx_nic_cfg_s {
 	boolean_t		enc_rx_var_packed_stream_supported;
 	boolean_t		enc_pm_and_rxdp_counters;
 	boolean_t		enc_mac_stats_40g_tx_size_bins;
+	uint32_t		enc_tunnel_encapsulations_supported;
 	/* External port identifier */
 	uint8_t			enc_external_port;
 	uint32_t		enc_mcdi_max_payload_length;
@@ -1873,6 +1883,9 @@ efx_rx_scatter_enable(
 	__in		unsigned int buf_size);
 #endif	/* EFSYS_OPT_RX_SCATTER */
 
+/* Handle to represent use of the default RSS context. */
+#define	EFX_RSS_CONTEXT_DEFAULT	0xffffffff
+
 #if EFSYS_OPT_RX_SCALE
 
 typedef enum efx_rx_hash_alg_e {
@@ -1892,30 +1905,44 @@ typedef enum efx_rx_hash_support_e {
 	EFX_RX_HASH_AVAILABLE		/* Insert hash with/without RSS */
 } efx_rx_hash_support_t;
 
+#define	EFX_RSS_KEY_SIZE	40	/* RSS key size (bytes) */
 #define	EFX_RSS_TBL_SIZE	128	/* Rows in RX indirection table */
 #define	EFX_MAXRSS		64	/* RX indirection entry range */
 #define	EFX_MAXRSS_LEGACY	16	/* See bug16611 and bug17213 */
 
-typedef enum efx_rx_scale_support_e {
-	EFX_RX_SCALE_UNAVAILABLE = 0,	/* Not supported */
+typedef enum efx_rx_scale_context_type_e {
+	EFX_RX_SCALE_UNAVAILABLE = 0,	/* No RX scale context */
 	EFX_RX_SCALE_EXCLUSIVE,		/* Writable key/indirection table */
 	EFX_RX_SCALE_SHARED		/* Read-only key/indirection table */
-} efx_rx_scale_support_t;
+} efx_rx_scale_context_type_t;
 
 extern	__checkReturn	efx_rc_t
-efx_rx_hash_support_get(
+efx_rx_hash_default_support_get(
 	__in		efx_nic_t *enp,
 	__out		efx_rx_hash_support_t *supportp);
 
 
 extern	__checkReturn	efx_rc_t
-efx_rx_scale_support_get(
+efx_rx_scale_default_support_get(
+	__in		efx_nic_t *enp,
+	__out		efx_rx_scale_context_type_t *typep);
+
+extern	__checkReturn	efx_rc_t
+efx_rx_scale_context_alloc(
 	__in		efx_nic_t *enp,
-	__out		efx_rx_scale_support_t *supportp);
+	__in		efx_rx_scale_context_type_t type,
+	__in		uint32_t num_queues,
+	__out		uint32_t *rss_contextp);
+
+extern	__checkReturn	efx_rc_t
+efx_rx_scale_context_free(
+	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context);
 
 extern	__checkReturn	efx_rc_t
 efx_rx_scale_mode_set(
 	__in	efx_nic_t *enp,
+	__in	uint32_t rss_context,
 	__in	efx_rx_hash_alg_t alg,
 	__in	efx_rx_hash_type_t type,
 	__in	boolean_t insert);
@@ -1923,12 +1950,14 @@ efx_rx_scale_mode_set(
 extern	__checkReturn	efx_rc_t
 efx_rx_scale_tbl_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	unsigned int *table,
 	__in		size_t n);
 
 extern	__checkReturn	efx_rc_t
 efx_rx_scale_key_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	uint8_t *key,
 	__in		size_t n);
 
@@ -2214,6 +2243,7 @@ efx_tx_qdestroy(
 
 #define	EFX_IPPROTO_TCP 6
 #define	EFX_IPPROTO_UDP 17
+#define	EFX_IPPROTO_GRE	47
 
 /* Use RSS to spread across multiple queues */
 #define	EFX_FILTER_FLAG_RX_RSS		0x01
@@ -2232,6 +2262,10 @@ efx_tx_qdestroy(
 
 typedef unsigned int efx_filter_flags_t;
 
+/*
+ * Flags which specify the fields to match on. The values are the same as in the
+ * MC_CMD_FILTER_OP/MC_CMD_FILTER_OP_EXT commands.
+ */
 typedef enum efx_filter_match_flags_e {
 	EFX_FILTER_MATCH_REM_HOST = 0x0001,	/* Match by remote IP host
 						 * address */
@@ -2246,6 +2280,10 @@ typedef enum efx_filter_match_flags_e {
 	EFX_FILTER_MATCH_OUTER_VID = 0x0100,	/* Match by outer VLAN ID */
 	EFX_FILTER_MATCH_IP_PROTO = 0x0200,	/* Match by IP transport
 						 * protocol */
+	/* For encapsulated packets, match all multicast inner frames */
+	EFX_FILTER_MATCH_IFRM_UNKNOWN_MCAST_DST = 0x01000000,
+	/* For encapsulated packets, match all unicast inner frames */
+	EFX_FILTER_MATCH_IFRM_UNKNOWN_UCAST_DST = 0x02000000,
 	/* Match otherwise-unmatched multicast and broadcast packets */
 	EFX_FILTER_MATCH_UNKNOWN_MCAST_DST = 0x40000000,
 	/* Match otherwise-unmatched unicast packets */
@@ -2271,26 +2309,26 @@ typedef enum efx_filter_priority_s {
  */
 
 typedef struct efx_filter_spec_s {
-	uint32_t	efs_match_flags;
-	uint32_t	efs_priority:2;
-	uint32_t	efs_flags:6;
-	uint32_t	efs_dmaq_id:12;
-	uint32_t	efs_rss_context;
-	uint16_t	efs_outer_vid;
-	uint16_t	efs_inner_vid;
-	uint8_t		efs_loc_mac[EFX_MAC_ADDR_LEN];
-	uint8_t		efs_rem_mac[EFX_MAC_ADDR_LEN];
-	uint16_t	efs_ether_type;
-	uint8_t		efs_ip_proto;
-	uint16_t	efs_loc_port;
-	uint16_t	efs_rem_port;
-	efx_oword_t	efs_rem_host;
-	efx_oword_t	efs_loc_host;
+	uint32_t		efs_match_flags;
+	uint32_t		efs_priority:2;
+	uint32_t		efs_flags:6;
+	uint32_t		efs_dmaq_id:12;
+	uint32_t		efs_rss_context;
+	uint16_t		efs_outer_vid;
+	uint16_t		efs_inner_vid;
+	uint8_t			efs_loc_mac[EFX_MAC_ADDR_LEN];
+	uint8_t			efs_rem_mac[EFX_MAC_ADDR_LEN];
+	uint16_t		efs_ether_type;
+	uint8_t			efs_ip_proto;
+	efx_tunnel_protocol_t	efs_encap_type;
+	uint16_t		efs_loc_port;
+	uint16_t		efs_rem_port;
+	efx_oword_t		efs_rem_host;
+	efx_oword_t		efs_loc_host;
 } efx_filter_spec_t;
 
 
 /* Default values for use in filter specifications */
-#define	EFX_FILTER_SPEC_RSS_CONTEXT_DEFAULT	0xffffffff
 #define	EFX_FILTER_SPEC_RX_DMAQ_ID_DROP		0xfff
 #define	EFX_FILTER_SPEC_VID_UNSPEC		0xffff
 
@@ -2357,6 +2395,11 @@ efx_filter_spec_set_eth_local(
 	__in		uint16_t vid,
 	__in		const uint8_t *addr);
 
+extern			void
+efx_filter_spec_set_ether_type(
+	__inout		efx_filter_spec_t *spec,
+	__in		uint16_t ether_type);
+
 extern	__checkReturn	efx_rc_t
 efx_filter_spec_set_uc_def(
 	__inout		efx_filter_spec_t *spec);
@@ -2365,6 +2408,24 @@ extern	__checkReturn	efx_rc_t
 efx_filter_spec_set_mc_def(
 	__inout		efx_filter_spec_t *spec);
 
+typedef enum efx_filter_inner_frame_match_e {
+	EFX_FILTER_INNER_FRAME_MATCH_OTHER = 0,
+	EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_MCAST_DST,
+	EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_UCAST_DST
+} efx_filter_inner_frame_match_t;
+
+extern	__checkReturn	efx_rc_t
+efx_filter_spec_set_encap_type(
+	__inout		efx_filter_spec_t *spec,
+	__in		efx_tunnel_protocol_t encap_type,
+	__in		efx_filter_inner_frame_match_t inner_frame_match);
+
+#if EFSYS_OPT_RX_SCALE
+extern	__checkReturn	efx_rc_t
+efx_filter_spec_set_rss_context(
+	__inout		efx_filter_spec_t *spec,
+	__in		uint32_t rss_context);
+#endif
 #endif	/* EFSYS_OPT_FILTER */
 
 /* HASH */
diff --git a/drivers/net/sfc/base/efx_filter.c b/drivers/net/sfc/base/efx_filter.c
index ba310260..5cab7d87 100644
--- a/drivers/net/sfc/base/efx_filter.c
+++ b/drivers/net/sfc/base/efx_filter.c
@@ -117,10 +117,6 @@ efx_filter_remove(
 	EFSYS_ASSERT3P(spec, !=, NULL);
 	EFSYS_ASSERT3U(spec->efs_flags, &, EFX_FILTER_FLAG_RX);
 
-#if EFSYS_OPT_RX_SCALE
-	spec->efs_rss_context = enp->en_rss_context;
-#endif
-
 	return (efop->efo_delete(enp, spec));
 }
 
@@ -302,7 +298,7 @@ efx_filter_spec_init_rx(
 	memset(spec, 0, sizeof (*spec));
 	spec->efs_priority = priority;
 	spec->efs_flags = EFX_FILTER_FLAG_RX | flags;
-	spec->efs_rss_context = EFX_FILTER_SPEC_RSS_CONTEXT_DEFAULT;
+	spec->efs_rss_context = EFX_RSS_CONTEXT_DEFAULT;
 	spec->efs_dmaq_id = (uint16_t)erp->er_index;
 }
 
@@ -396,6 +392,17 @@ efx_filter_spec_set_eth_local(
 	return (0);
 }
 
+			void
+efx_filter_spec_set_ether_type(
+	__inout		efx_filter_spec_t *spec,
+	__in		uint16_t ether_type)
+{
+	EFSYS_ASSERT3P(spec, !=, NULL);
+
+	spec->efs_ether_type = ether_type;
+	spec->efs_match_flags |= EFX_FILTER_MATCH_ETHER_TYPE;
+}
+
 /*
  * Specify matching otherwise-unmatched unicast in a filter specification
  */
@@ -423,6 +430,88 @@ efx_filter_spec_set_mc_def(
 }
 
 
+__checkReturn		efx_rc_t
+efx_filter_spec_set_encap_type(
+	__inout		efx_filter_spec_t *spec,
+	__in		efx_tunnel_protocol_t encap_type,
+	__in		efx_filter_inner_frame_match_t inner_frame_match)
+{
+	uint32_t match_flags = 0;
+	uint8_t ip_proto;
+	efx_rc_t rc;
+
+	EFSYS_ASSERT3P(spec, !=, NULL);
+
+	switch (encap_type) {
+	case EFX_TUNNEL_PROTOCOL_VXLAN:
+	case EFX_TUNNEL_PROTOCOL_GENEVE:
+		ip_proto = EFX_IPPROTO_UDP;
+		break;
+	case EFX_TUNNEL_PROTOCOL_NVGRE:
+		ip_proto = EFX_IPPROTO_GRE;
+		break;
+	default:
+		EFSYS_ASSERT(0);
+		rc = EINVAL;
+		goto fail1;
+	}
+
+	switch (inner_frame_match) {
+	case EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_MCAST_DST:
+		match_flags |= EFX_FILTER_MATCH_IFRM_UNKNOWN_MCAST_DST;
+		break;
+	case EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_UCAST_DST:
+		match_flags |= EFX_FILTER_MATCH_IFRM_UNKNOWN_UCAST_DST;
+		break;
+	case EFX_FILTER_INNER_FRAME_MATCH_OTHER:
+		/* This is for when specific inner frames are to be matched. */
+		break;
+	default:
+		EFSYS_ASSERT(0);
+		rc = EINVAL;
+		goto fail2;
+	}
+
+	spec->efs_encap_type = encap_type;
+	spec->efs_ip_proto = ip_proto;
+	spec->efs_match_flags |= (match_flags | EFX_FILTER_MATCH_IP_PROTO);
+
+	return (0);
+
+fail2:
+	EFSYS_PROBE(fail2);
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+	return (rc);
+}
+
+#if EFSYS_OPT_RX_SCALE
+	__checkReturn	efx_rc_t
+efx_filter_spec_set_rss_context(
+	__inout		efx_filter_spec_t *spec,
+	__in		uint32_t rss_context)
+{
+	efx_rc_t rc;
+
+	EFSYS_ASSERT3P(spec, !=, NULL);
+
+	/* The filter must have been created with EFX_FILTER_FLAG_RX_RSS. */
+	if ((spec->efs_flags & EFX_FILTER_FLAG_RX_RSS) == 0) {
+		rc = EINVAL;
+		goto fail1;
+	}
+
+	spec->efs_rss_context = rss_context;
+
+	return (0);
+
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+	return (rc);
+}
+#endif
 
 #if EFSYS_OPT_SIENA
 
@@ -454,9 +543,9 @@ siena_filter_spec_from_gen_spec(
 	else
 		EFSYS_ASSERT3U(gen_spec->efs_flags, &, EFX_FILTER_FLAG_RX);
 
-	/* Falconsiena only has one RSS context */
+	/* Siena only has one RSS context */
 	if ((gen_spec->efs_flags & EFX_FILTER_FLAG_RX_RSS) &&
-	    gen_spec->efs_rss_context != 0) {
+	    gen_spec->efs_rss_context != EFX_RSS_CONTEXT_DEFAULT) {
 		rc = EINVAL;
 		goto fail1;
 	}
diff --git a/drivers/net/sfc/base/efx_impl.h b/drivers/net/sfc/base/efx_impl.h
index 43add6d9..53fa37ac 100644
--- a/drivers/net/sfc/base/efx_impl.h
+++ b/drivers/net/sfc/base/efx_impl.h
@@ -152,11 +152,17 @@ typedef struct efx_rx_ops_s {
 	efx_rc_t	(*erxo_scatter_enable)(efx_nic_t *, unsigned int);
 #endif
 #if EFSYS_OPT_RX_SCALE
-	efx_rc_t	(*erxo_scale_mode_set)(efx_nic_t *, efx_rx_hash_alg_t,
+	efx_rc_t	(*erxo_scale_context_alloc)(efx_nic_t *,
+						    efx_rx_scale_context_type_t,
+						    uint32_t, uint32_t *);
+	efx_rc_t	(*erxo_scale_context_free)(efx_nic_t *, uint32_t);
+	efx_rc_t	(*erxo_scale_mode_set)(efx_nic_t *, uint32_t,
+					       efx_rx_hash_alg_t,
 					       efx_rx_hash_type_t, boolean_t);
-	efx_rc_t	(*erxo_scale_key_set)(efx_nic_t *, uint8_t *, size_t);
-	efx_rc_t	(*erxo_scale_tbl_set)(efx_nic_t *, unsigned int *,
-					      size_t);
+	efx_rc_t	(*erxo_scale_key_set)(efx_nic_t *, uint32_t,
+					      uint8_t *, size_t);
+	efx_rc_t	(*erxo_scale_tbl_set)(efx_nic_t *, uint32_t,
+					      unsigned int *, size_t);
 	uint32_t	(*erxo_prefix_hash)(efx_nic_t *, efx_rx_hash_alg_t,
 					    uint8_t *);
 #endif /* EFSYS_OPT_RX_SCALE */
@@ -648,9 +654,9 @@ struct efx_nic_s {
 	const efx_vpd_ops_t	*en_evpdop;
 #endif	/* EFSYS_OPT_VPD */
 #if EFSYS_OPT_RX_SCALE
-	efx_rx_hash_support_t	en_hash_support;
-	efx_rx_scale_support_t	en_rss_support;
-	uint32_t		en_rss_context;
+	efx_rx_hash_support_t		en_hash_support;
+	efx_rx_scale_context_type_t	en_rss_context_type;
+	uint32_t			en_rss_context;
 #endif	/* EFSYS_OPT_RX_SCALE */
 	uint32_t		en_vport_id;
 #if EFSYS_OPT_LICENSING
diff --git a/drivers/net/sfc/base/efx_rx.c b/drivers/net/sfc/base/efx_rx.c
index c8156341..785365d3 100644
--- a/drivers/net/sfc/base/efx_rx.c
+++ b/drivers/net/sfc/base/efx_rx.c
@@ -53,6 +53,7 @@ siena_rx_scatter_enable(
 static	__checkReturn	efx_rc_t
 siena_rx_scale_mode_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in		efx_rx_hash_alg_t alg,
 	__in		efx_rx_hash_type_t type,
 	__in		boolean_t insert);
@@ -60,12 +61,14 @@ siena_rx_scale_mode_set(
 static	__checkReturn	efx_rc_t
 siena_rx_scale_key_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	uint8_t *key,
 	__in		size_t n);
 
 static	__checkReturn	efx_rc_t
 siena_rx_scale_tbl_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	unsigned int *table,
 	__in		size_t n);
 
@@ -149,6 +152,8 @@ static const efx_rx_ops_t __efx_rx_siena_ops = {
 	siena_rx_scatter_enable,		/* erxo_scatter_enable */
 #endif
 #if EFSYS_OPT_RX_SCALE
+	NULL,					/* erxo_scale_context_alloc */
+	NULL,					/* erxo_scale_context_free */
 	siena_rx_scale_mode_set,		/* erxo_scale_mode_set */
 	siena_rx_scale_key_set,			/* erxo_scale_key_set */
 	siena_rx_scale_tbl_set,			/* erxo_scale_tbl_set */
@@ -176,6 +181,8 @@ static const efx_rx_ops_t __efx_rx_ef10_ops = {
 	ef10_rx_scatter_enable,			/* erxo_scatter_enable */
 #endif
 #if EFSYS_OPT_RX_SCALE
+	ef10_rx_scale_context_alloc,		/* erxo_scale_context_alloc */
+	ef10_rx_scale_context_free,		/* erxo_scale_context_free */
 	ef10_rx_scale_mode_set,			/* erxo_scale_mode_set */
 	ef10_rx_scale_key_set,			/* erxo_scale_key_set */
 	ef10_rx_scale_tbl_set,			/* erxo_scale_tbl_set */
@@ -304,7 +311,7 @@ fail1:
 
 #if EFSYS_OPT_RX_SCALE
 	__checkReturn	efx_rc_t
-efx_rx_hash_support_get(
+efx_rx_hash_default_support_get(
 	__in		efx_nic_t *enp,
 	__out		efx_rx_hash_support_t *supportp)
 {
@@ -318,7 +325,10 @@ efx_rx_hash_support_get(
 		goto fail1;
 	}
 
-	/* Report if resources are available to insert RX hash value */
+	/*
+	 * Report the hashing support the client gets by default if it
+	 * does not allocate an RSS context itself.
+	 */
 	*supportp = enp->en_hash_support;
 
 	return (0);
@@ -330,22 +340,25 @@ fail1:
 }
 
 	__checkReturn	efx_rc_t
-efx_rx_scale_support_get(
+efx_rx_scale_default_support_get(
 	__in		efx_nic_t *enp,
-	__out		efx_rx_scale_support_t *supportp)
+	__out		efx_rx_scale_context_type_t *typep)
 {
 	efx_rc_t rc;
 
 	EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
 	EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX);
 
-	if (supportp == NULL) {
+	if (typep == NULL) {
 		rc = EINVAL;
 		goto fail1;
 	}
 
-	/* Report if resources are available to support RSS */
-	*supportp = enp->en_rss_support;
+	/*
+	 * Report the RSS support the client gets by default if it
+	 * does not allocate an RSS context itself.
+	 */
+	*typep = enp->en_rss_context_type;
 
 	return (0);
 
@@ -354,10 +367,75 @@ fail1:
 
 	return (rc);
 }
+#endif	/* EFSYS_OPT_RX_SCALE */
 
+#if EFSYS_OPT_RX_SCALE
+	__checkReturn	efx_rc_t
+efx_rx_scale_context_alloc(
+	__in		efx_nic_t *enp,
+	__in		efx_rx_scale_context_type_t type,
+	__in		uint32_t num_queues,
+	__out		uint32_t *rss_contextp)
+{
+	const efx_rx_ops_t *erxop = enp->en_erxop;
+	efx_rc_t rc;
+
+	EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
+	EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX);
+
+	if (erxop->erxo_scale_context_alloc == NULL) {
+		rc = ENOTSUP;
+		goto fail1;
+	}
+	if ((rc = erxop->erxo_scale_context_alloc(enp, type,
+			    num_queues, rss_contextp)) != 0) {
+		goto fail2;
+	}
+
+	return (0);
+
+fail2:
+	EFSYS_PROBE(fail2);
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+	return (rc);
+}
+#endif	/* EFSYS_OPT_RX_SCALE */
+
+#if EFSYS_OPT_RX_SCALE
+	__checkReturn	efx_rc_t
+efx_rx_scale_context_free(
+	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context)
+{
+	const efx_rx_ops_t *erxop = enp->en_erxop;
+	efx_rc_t rc;
+
+	EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
+	EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX);
+
+	if (erxop->erxo_scale_context_free == NULL) {
+		rc = ENOTSUP;
+		goto fail1;
+	}
+	if ((rc = erxop->erxo_scale_context_free(enp, rss_context)) != 0)
+		goto fail2;
+
+	return (0);
+
+fail2:
+	EFSYS_PROBE(fail2);
+fail1:
+	EFSYS_PROBE1(fail1, efx_rc_t, rc);
+	return (rc);
+}
+#endif	/* EFSYS_OPT_RX_SCALE */
+
+#if EFSYS_OPT_RX_SCALE
 	__checkReturn	efx_rc_t
 efx_rx_scale_mode_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in		efx_rx_hash_alg_t alg,
 	__in		efx_rx_hash_type_t type,
 	__in		boolean_t insert)
@@ -369,7 +447,7 @@ efx_rx_scale_mode_set(
 	EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX);
 
 	if (erxop->erxo_scale_mode_set != NULL) {
-		if ((rc = erxop->erxo_scale_mode_set(enp, alg,
+		if ((rc = erxop->erxo_scale_mode_set(enp, rss_context, alg,
 			    type, insert)) != 0)
 			goto fail1;
 	}
@@ -386,6 +464,7 @@ fail1:
 	__checkReturn	efx_rc_t
 efx_rx_scale_key_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	uint8_t *key,
 	__in		size_t n)
 {
@@ -395,7 +474,7 @@ efx_rx_scale_key_set(
 	EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
 	EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX);
 
-	if ((rc = erxop->erxo_scale_key_set(enp, key, n)) != 0)
+	if ((rc = erxop->erxo_scale_key_set(enp, rss_context, key, n)) != 0)
 		goto fail1;
 
 	return (0);
@@ -411,6 +490,7 @@ fail1:
 	__checkReturn	efx_rc_t
 efx_rx_scale_tbl_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	unsigned int *table,
 	__in		size_t n)
 {
@@ -420,7 +500,7 @@ efx_rx_scale_tbl_set(
 	EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
 	EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX);
 
-	if ((rc = erxop->erxo_scale_tbl_set(enp, table, n)) != 0)
+	if ((rc = erxop->erxo_scale_tbl_set(enp, rss_context, table, n)) != 0)
 		goto fail1;
 
 	return (0);
@@ -654,7 +734,7 @@ siena_rx_init(
 
 #if EFSYS_OPT_RX_SCALE
 	/* The RSS key and indirection table are writable. */
-	enp->en_rss_support = EFX_RX_SCALE_EXCLUSIVE;
+	enp->en_rss_context_type = EFX_RX_SCALE_EXCLUSIVE;
 
 	/* Hardware can insert RX hash with/without RSS */
 	enp->en_hash_support = EFX_RX_HASH_AVAILABLE;
@@ -773,12 +853,18 @@ fail1:
 static	__checkReturn	efx_rc_t
 siena_rx_scale_mode_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in		efx_rx_hash_alg_t alg,
 	__in		efx_rx_hash_type_t type,
 	__in		boolean_t insert)
 {
 	efx_rc_t rc;
 
+	if (rss_context != EFX_RSS_CONTEXT_DEFAULT) {
+		rc = EINVAL;
+		goto fail1;
+	}
+
 	switch (alg) {
 	case EFX_RX_HASHALG_LFSR:
 		EFX_RX_LFSR_HASH(enp, insert);
@@ -794,17 +880,19 @@ siena_rx_scale_mode_set(
 		    type & EFX_RX_HASH_TCPIPV6,
 		    rc);
 		if (rc != 0)
-			goto fail1;
+			goto fail2;
 
 		break;
 
 	default:
 		rc = EINVAL;
-		goto fail2;
+		goto fail3;
 	}
 
 	return (0);
 
+fail3:
+	EFSYS_PROBE(fail3);
 fail2:
 	EFSYS_PROBE(fail2);
 fail1:
@@ -820,6 +908,7 @@ fail1:
 static	__checkReturn	efx_rc_t
 siena_rx_scale_key_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	uint8_t *key,
 	__in		size_t n)
 {
@@ -828,6 +917,11 @@ siena_rx_scale_key_set(
 	unsigned int offset;
 	efx_rc_t rc;
 
+	if (rss_context != EFX_RSS_CONTEXT_DEFAULT) {
+		rc = EINVAL;
+		goto fail1;
+	}
+
 	byte = 0;
 
 	/* Write Toeplitz IPv4 hash key */
@@ -848,7 +942,7 @@ siena_rx_scale_key_set(
 	    --offset) {
 		if (oword.eo_u8[offset - 1] != key[byte++]) {
 			rc = EFAULT;
-			goto fail1;
+			goto fail2;
 		}
 	}
 
@@ -897,7 +991,7 @@ siena_rx_scale_key_set(
 	    --offset) {
 		if (oword.eo_u8[offset - 1] != key[byte++]) {
 			rc = EFAULT;
-			goto fail2;
+			goto fail3;
 		}
 	}
 
@@ -909,7 +1003,7 @@ siena_rx_scale_key_set(
 	    --offset) {
 		if (oword.eo_u8[offset - 1] != key[byte++]) {
 			rc = EFAULT;
-			goto fail3;
+			goto fail4;
 		}
 	}
 
@@ -921,13 +1015,15 @@ siena_rx_scale_key_set(
 	    --offset) {
 		if (oword.eo_u8[offset - 1] != key[byte++]) {
 			rc = EFAULT;
-			goto fail4;
+			goto fail5;
 		}
 	}
 
 done:
 	return (0);
 
+fail5:
+	EFSYS_PROBE(fail5);
 fail4:
 	EFSYS_PROBE(fail4);
 fail3:
@@ -945,6 +1041,7 @@ fail1:
 static	__checkReturn	efx_rc_t
 siena_rx_scale_tbl_set(
 	__in		efx_nic_t *enp,
+	__in		uint32_t rss_context,
 	__in_ecount(n)	unsigned int *table,
 	__in		size_t n)
 {
@@ -955,11 +1052,16 @@ siena_rx_scale_tbl_set(
 	EFX_STATIC_ASSERT(EFX_RSS_TBL_SIZE == FR_BZ_RX_INDIRECTION_TBL_ROWS);
 	EFX_STATIC_ASSERT(EFX_MAXRSS == (1 << FRF_BZ_IT_QUEUE_WIDTH));
 
-	if (n > FR_BZ_RX_INDIRECTION_TBL_ROWS) {
+	if (rss_context != EFX_RSS_CONTEXT_DEFAULT) {
 		rc = EINVAL;
 		goto fail1;
 	}
 
+	if (n > FR_BZ_RX_INDIRECTION_TBL_ROWS) {
+		rc = EINVAL;
+		goto fail2;
+	}
+
 	for (index = 0; index < FR_BZ_RX_INDIRECTION_TBL_ROWS; index++) {
 		uint32_t byte;
 
@@ -988,12 +1090,14 @@ siena_rx_scale_tbl_set(
 		/* Verify the entry */
 		if (EFX_OWORD_FIELD(oword, FRF_BZ_IT_QUEUE) != byte) {
 			rc = EFAULT;
-			goto fail2;
+			goto fail3;
 		}
 	}
 
 	return (0);
 
+fail3:
+	EFSYS_PROBE(fail3);
 fail2:
 	EFSYS_PROBE(fail2);
 fail1:
diff --git a/drivers/net/sfc/base/hunt_nic.c b/drivers/net/sfc/base/hunt_nic.c
index addbf1c5..19fb7cfb 100644
--- a/drivers/net/sfc/base/hunt_nic.c
+++ b/drivers/net/sfc/base/hunt_nic.c
@@ -301,6 +301,13 @@ hunt_board_cfg(
 	/* Alignment for WPTR updates */
 	encp->enc_rx_push_align = EF10_RX_WPTR_ALIGN;
 
+	/*
+	 * Maximum number of exclusive RSS contexts which can be allocated. The
+	 * hardware supports 64, but 6 are reserved for shared contexts. They
+	 * are a global resource so not all may be available.
+	 */
+	encp->enc_rx_scale_max_exclusive_contexts = 58;
+
 	encp->enc_tx_dma_desc_size_max = EFX_MASK32(ESF_DZ_RX_KER_BYTE_CNT);
 	/* No boundary crossing limits */
 	encp->enc_tx_dma_desc_boundary = 0;
diff --git a/drivers/net/sfc/base/medford_nic.c b/drivers/net/sfc/base/medford_nic.c
index 07afac1e..d361d654 100644
--- a/drivers/net/sfc/base/medford_nic.c
+++ b/drivers/net/sfc/base/medford_nic.c
@@ -298,6 +298,13 @@ medford_board_cfg(
 	/* Alignment for WPTR updates */
 	encp->enc_rx_push_align = EF10_RX_WPTR_ALIGN;
 
+	/*
+	 * Maximum number of exclusive RSS contexts which can be allocated. The
+	 * hardware supports 64, but 6 are reserved for shared contexts. They
+	 * are a global resource so not all may be available.
+	 */
+	encp->enc_rx_scale_max_exclusive_contexts = 58;
+
 	encp->enc_tx_dma_desc_size_max = EFX_MASK32(ESF_DZ_RX_KER_BYTE_CNT);
 	/* No boundary crossing limits */
 	encp->enc_tx_dma_desc_boundary = 0;
diff --git a/drivers/net/sfc/base/siena_nic.c b/drivers/net/sfc/base/siena_nic.c
index 129b854b..fcc8f151 100644
--- a/drivers/net/sfc/base/siena_nic.c
+++ b/drivers/net/sfc/base/siena_nic.c
@@ -135,6 +135,9 @@ siena_board_cfg(
 	/* Alignment for WPTR updates */
 	encp->enc_rx_push_align = 1;
 
+	/* There is one RSS context per function */
+	encp->enc_rx_scale_max_exclusive_contexts = 1;
+
 	encp->enc_tx_dma_desc_size_max = EFX_MASK32(FSF_AZ_TX_KER_BYTE_COUNT);
 	/* Fragments must not span 4k boundaries. */
 	encp->enc_tx_dma_desc_boundary = 4096;
diff --git a/drivers/net/sfc/efsys.h b/drivers/net/sfc/efsys.h
index 0405d02b..f428b624 100644
--- a/drivers/net/sfc/efsys.h
+++ b/drivers/net/sfc/efsys.h
@@ -253,7 +253,7 @@ typedef struct __efsys_identifier_s efsys_identifier_t;
 
 /* DMA */
 
-typedef phys_addr_t efsys_dma_addr_t;
+typedef rte_iova_t efsys_dma_addr_t;
 
 typedef struct efsys_mem_s {
 	const struct rte_memzone	*esm_mz;
diff --git a/drivers/net/sfc/sfc.c b/drivers/net/sfc/sfc.c
index 6cecfc00..49d7e937 100644
--- a/drivers/net/sfc/sfc.c
+++ b/drivers/net/sfc/sfc.c
@@ -61,8 +61,8 @@ sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
 		return ENOMEM;
 	}
 
-	esmp->esm_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr);
-	if (esmp->esm_addr == RTE_BAD_PHYS_ADDR) {
+	esmp->esm_addr = mz->iova;
+	if (esmp->esm_addr == RTE_BAD_IOVA) {
 		(void)rte_memzone_free(mz);
 		return EFAULT;
 	}
@@ -501,7 +501,7 @@ sfc_mem_bar_fini(struct sfc_adapter *sa)
  * and also known to give a uniform distribution
  * (a good distribution of traffic between different CPUs)
  */
-static const uint8_t default_rss_key[SFC_RSS_KEY_SIZE] = {
+static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = {
 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
 	0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
@@ -510,10 +510,10 @@ static const uint8_t default_rss_key[SFC_RSS_KEY_SIZE] = {
 };
 #endif
 
+#if EFSYS_OPT_RX_SCALE
 static int
 sfc_set_rss_defaults(struct sfc_adapter *sa)
 {
-#if EFSYS_OPT_RX_SCALE
 	int rc;
 
 	rc = efx_intr_init(sa->nic, sa->intr.type, NULL);
@@ -528,11 +528,11 @@ sfc_set_rss_defaults(struct sfc_adapter *sa)
 	if (rc != 0)
 		goto fail_rx_init;
 
-	rc = efx_rx_scale_support_get(sa->nic, &sa->rss_support);
+	rc = efx_rx_scale_default_support_get(sa->nic, &sa->rss_support);
 	if (rc != 0)
 		goto fail_scale_support_get;
 
-	rc = efx_rx_hash_support_get(sa->nic, &sa->hash_support);
+	rc = efx_rx_hash_default_support_get(sa->nic, &sa->hash_support);
 	if (rc != 0)
 		goto fail_hash_support_get;
 
@@ -556,10 +556,14 @@ fail_ev_init:
 
 fail_intr_init:
 	return rc;
+}
 #else
+static int
+sfc_set_rss_defaults(__rte_unused struct sfc_adapter *sa)
+{
 	return 0;
-#endif
 }
+#endif
 
 int
 sfc_attach(struct sfc_adapter *sa)
diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h
index 286d1ac1..7f11bf22 100644
--- a/drivers/net/sfc/sfc.h
+++ b/drivers/net/sfc/sfc.h
@@ -35,6 +35,7 @@
 #include <stdbool.h>
 
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_ethdev.h>
 #include <rte_kvargs.h>
 #include <rte_spinlock.h>
@@ -48,8 +49,6 @@ extern "C" {
 #endif
 
 #if EFSYS_OPT_RX_SCALE
-/** RSS key length (bytes) */
-#define SFC_RSS_KEY_SIZE	40
 /** RSS hash offloads mask */
 #define SFC_RSS_OFFLOADS	(ETH_RSS_IP | ETH_RSS_TCP)
 #endif
@@ -225,11 +224,11 @@ struct sfc_adapter {
 	unsigned int			rss_channels;
 
 #if EFSYS_OPT_RX_SCALE
-	efx_rx_scale_support_t		rss_support;
+	efx_rx_scale_context_type_t	rss_support;
 	efx_rx_hash_support_t		hash_support;
 	efx_rx_hash_type_t		rss_hash_types;
 	unsigned int			rss_tbl[EFX_RSS_TBL_SIZE];
-	uint8_t				rss_key[SFC_RSS_KEY_SIZE];
+	uint8_t				rss_key[EFX_RSS_KEY_SIZE];
 #endif
 
 	/*
diff --git a/drivers/net/sfc/sfc_dp_rx.h b/drivers/net/sfc/sfc_dp_rx.h
index a7b82784..3f6a604b 100644
--- a/drivers/net/sfc/sfc_dp_rx.h
+++ b/drivers/net/sfc/sfc_dp_rx.h
@@ -155,6 +155,10 @@ typedef const uint32_t * (sfc_dp_rx_supported_ptypes_get_t)(void);
 /** Get number of pending Rx descriptors */
 typedef unsigned int (sfc_dp_rx_qdesc_npending_t)(struct sfc_dp_rxq *dp_rxq);
 
+/** Check Rx descriptor status */
+typedef int (sfc_dp_rx_qdesc_status_t)(struct sfc_dp_rxq *dp_rxq,
+				       uint16_t offset);
+
 /** Receive datapath definition */
 struct sfc_dp_rx {
 	struct sfc_dp				dp;
@@ -170,6 +174,7 @@ struct sfc_dp_rx {
 	sfc_dp_rx_qpurge_t			*qpurge;
 	sfc_dp_rx_supported_ptypes_get_t	*supported_ptypes_get;
 	sfc_dp_rx_qdesc_npending_t		*qdesc_npending;
+	sfc_dp_rx_qdesc_status_t		*qdesc_status;
 	eth_rx_burst_t				pkt_burst;
 };
 
diff --git a/drivers/net/sfc/sfc_dp_tx.h b/drivers/net/sfc/sfc_dp_tx.h
index c1c34191..94d1b108 100644
--- a/drivers/net/sfc/sfc_dp_tx.h
+++ b/drivers/net/sfc/sfc_dp_tx.h
@@ -127,6 +127,12 @@ typedef bool (sfc_dp_tx_qtx_ev_t)(struct sfc_dp_txq *dp_txq, unsigned int id);
  */
 typedef void (sfc_dp_tx_qreap_t)(struct sfc_dp_txq *dp_txq);
 
+/**
+ * Check Tx descriptor status
+ */
+typedef int (sfc_dp_tx_qdesc_status_t)(struct sfc_dp_txq *dp_txq,
+				       uint16_t offset);
+
 /** Transmit datapath definition */
 struct sfc_dp_tx {
 	struct sfc_dp			dp;
@@ -136,12 +142,15 @@ struct sfc_dp_tx {
 #define SFC_DP_TX_FEAT_TSO		0x2
 #define SFC_DP_TX_FEAT_MULTI_SEG	0x4
 #define SFC_DP_TX_FEAT_MULTI_PROCESS	0x8
+#define SFC_DP_TX_FEAT_MULTI_POOL	0x10
+#define SFC_DP_TX_FEAT_REFCNT		0x20
 	sfc_dp_tx_qcreate_t		*qcreate;
 	sfc_dp_tx_qdestroy_t		*qdestroy;
 	sfc_dp_tx_qstart_t		*qstart;
 	sfc_dp_tx_qstop_t		*qstop;
 	sfc_dp_tx_qtx_ev_t		*qtx_ev;
 	sfc_dp_tx_qreap_t		*qreap;
+	sfc_dp_tx_qdesc_status_t	*qdesc_status;
 	eth_tx_burst_t			pkt_burst;
 };
 
diff --git a/drivers/net/sfc/sfc_ef10_rx.c b/drivers/net/sfc/sfc_ef10_rx.c
index 60812cbe..18d60c69 100644
--- a/drivers/net/sfc/sfc_ef10_rx.c
+++ b/drivers/net/sfc/sfc_ef10_rx.c
@@ -177,7 +177,7 @@ sfc_ef10_rx_qrefill(struct sfc_ef10_rxq *rxq)
 		     ++i, ++id) {
 			struct rte_mbuf *m = objs[i];
 			struct sfc_ef10_rx_sw_desc *rxd;
-			phys_addr_t phys_addr;
+			rte_iova_t phys_addr;
 
 			SFC_ASSERT((id & ~ptr_mask) == 0);
 			rxd = &rxq->sw_ring[id];
@@ -189,7 +189,7 @@ sfc_ef10_rx_qrefill(struct sfc_ef10_rxq *rxq)
 			 * structure members.
 			 */
 
-			phys_addr = rte_mbuf_data_dma_addr_default(m);
+			phys_addr = rte_mbuf_data_iova_default(m);
 			EFX_POPULATE_QWORD_2(rxq->rxq_hw_ring[id],
 			    ESF_DZ_RX_KER_BYTE_CNT, buf_size,
 			    ESF_DZ_RX_KER_BUF_ADDR, phys_addr);
@@ -544,6 +544,14 @@ sfc_ef10_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
 	return -ENOTSUP;
 }
 
+static sfc_dp_rx_qdesc_status_t sfc_ef10_rx_qdesc_status;
+static int
+sfc_ef10_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
+			 __rte_unused uint16_t offset)
+{
+	return -ENOTSUP;
+}
+
 
 static uint64_t
 sfc_ef10_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size)
@@ -708,5 +716,6 @@ struct sfc_dp_rx sfc_ef10_rx = {
 	.qpurge			= sfc_ef10_rx_qpurge,
 	.supported_ptypes_get	= sfc_ef10_supported_ptypes_get,
 	.qdesc_npending		= sfc_ef10_rx_qdesc_npending,
+	.qdesc_status		= sfc_ef10_rx_qdesc_status,
 	.pkt_burst		= sfc_ef10_recv_pkts,
 };
diff --git a/drivers/net/sfc/sfc_ef10_tx.c b/drivers/net/sfc/sfc_ef10_tx.c
index da2b5edb..0454e79a 100644
--- a/drivers/net/sfc/sfc_ef10_tx.c
+++ b/drivers/net/sfc/sfc_ef10_tx.c
@@ -158,17 +158,35 @@ sfc_ef10_tx_reap(struct sfc_ef10_txq *txq)
 	pending += sfc_ef10_tx_process_events(txq);
 
 	if (pending != completed) {
+		struct rte_mbuf *bulk[SFC_TX_REAP_BULK_SIZE];
+		unsigned int nb = 0;
+
 		do {
 			struct sfc_ef10_tx_sw_desc *txd;
+			struct rte_mbuf *m;
 
 			txd = &txq->sw_ring[completed & ptr_mask];
+			if (txd->mbuf == NULL)
+				continue;
 
-			if (txd->mbuf != NULL) {
-				rte_pktmbuf_free(txd->mbuf);
-				txd->mbuf = NULL;
+			m = rte_pktmbuf_prefree_seg(txd->mbuf);
+			txd->mbuf = NULL;
+			if (m == NULL)
+				continue;
+
+			if ((nb == RTE_DIM(bulk)) ||
+			    ((nb != 0) && (m->pool != bulk[0]->pool))) {
+				rte_mempool_put_bulk(bulk[0]->pool,
+						     (void *)bulk, nb);
+				nb = 0;
 			}
+
+			bulk[nb++] = m;
 		} while (++completed != pending);
 
+		if (nb != 0)
+			rte_mempool_put_bulk(bulk[0]->pool, (void *)bulk, nb);
+
 		txq->completed = completed;
 	}
 
@@ -177,7 +195,7 @@ sfc_ef10_tx_reap(struct sfc_ef10_txq *txq)
 }
 
 static void
-sfc_ef10_tx_qdesc_dma_create(phys_addr_t addr, uint16_t size, bool eop,
+sfc_ef10_tx_qdesc_dma_create(rte_iova_t addr, uint16_t size, bool eop,
 			     efx_qword_t *edp)
 {
 	EFX_POPULATE_QWORD_4(*edp,
@@ -323,8 +341,9 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
 		pkt_len = m_seg->pkt_len;
 		do {
-			phys_addr_t seg_addr = rte_mbuf_data_dma_addr(m_seg);
+			rte_iova_t seg_addr = rte_mbuf_data_iova(m_seg);
 			unsigned int seg_len = rte_pktmbuf_data_len(m_seg);
+			unsigned int id = added & ptr_mask;
 
 			SFC_ASSERT(seg_len <= SFC_EF10_TX_DMA_DESC_LEN_MAX);
 
@@ -332,15 +351,30 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
 			sfc_ef10_tx_qdesc_dma_create(seg_addr,
 				seg_len, (pkt_len == 0),
-				&txq->txq_hw_ring[added & ptr_mask]);
+				&txq->txq_hw_ring[id]);
+
+			/*
+			 * rte_pktmbuf_free() is commonly used in DPDK for
+			 * recycling packets - the function checks every
+			 * segment's reference counter and returns the
+			 * buffer to its pool whenever possible;
+			 * nevertheless, freeing mbuf segments one by one
+			 * may entail some performance decline;
+			 * from this point, sfc_efx_tx_reap() does the same job
+			 * on its own and frees buffers in bulks (all mbufs
+			 * within a bulk belong to the same pool);
+			 * from this perspective, individual segment pointers
+			 * must be associated with the corresponding SW
+			 * descriptors independently so that only one loop
+			 * is sufficient on reap to inspect all the buffers
+			 */
+			txq->sw_ring[id].mbuf = m_seg;
+
 			++added;
 
 		} while ((m_seg = m_seg->next) != 0);
 
 		dma_desc_space -= (added - pkt_start);
-
-		/* Assign mbuf to the last used desc */
-		txq->sw_ring[(added - 1) & ptr_mask].mbuf = *pktp;
 	}
 
 	if (likely(added != txq->added)) {
@@ -367,14 +401,25 @@ sfc_ef10_simple_tx_reap(struct sfc_ef10_txq *txq)
 	pending += sfc_ef10_tx_process_events(txq);
 
 	if (pending != completed) {
+		struct rte_mbuf *bulk[SFC_TX_REAP_BULK_SIZE];
+		unsigned int nb = 0;
+
 		do {
 			struct sfc_ef10_tx_sw_desc *txd;
 
 			txd = &txq->sw_ring[completed & ptr_mask];
 
-			rte_pktmbuf_free_seg(txd->mbuf);
+			if (nb == RTE_DIM(bulk)) {
+				rte_mempool_put_bulk(bulk[0]->pool,
+						     (void *)bulk, nb);
+				nb = 0;
+			}
+
+			bulk[nb++] = txd->mbuf;
 		} while (++completed != pending);
 
+		rte_mempool_put_bulk(bulk[0]->pool, (void *)bulk, nb);
+
 		txq->completed = completed;
 	}
 
@@ -419,7 +464,7 @@ sfc_ef10_simple_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		SFC_ASSERT(rte_pktmbuf_data_len(pkt) <=
 			   SFC_EF10_TX_DMA_DESC_LEN_MAX);
 
-		sfc_ef10_tx_qdesc_dma_create(rte_mbuf_data_dma_addr(pkt),
+		sfc_ef10_tx_qdesc_dma_create(rte_mbuf_data_iova(pkt),
 					     rte_pktmbuf_data_len(pkt),
 					     true, &txq->txq_hw_ring[id]);
 
@@ -557,7 +602,7 @@ sfc_ef10_tx_qreap(struct sfc_dp_txq *dp_txq)
 
 		txd = &txq->sw_ring[completed & txq->ptr_mask];
 		if (txd->mbuf != NULL) {
-			rte_pktmbuf_free(txd->mbuf);
+			rte_pktmbuf_free_seg(txd->mbuf);
 			txd->mbuf = NULL;
 		}
 	}
@@ -565,6 +610,14 @@ sfc_ef10_tx_qreap(struct sfc_dp_txq *dp_txq)
 	txq->flags &= ~SFC_EF10_TXQ_STARTED;
 }
 
+static sfc_dp_tx_qdesc_status_t sfc_ef10_tx_qdesc_status;
+static int
+sfc_ef10_tx_qdesc_status(__rte_unused struct sfc_dp_txq *dp_txq,
+			 __rte_unused uint16_t offset)
+{
+	return -ENOTSUP;
+}
+
 struct sfc_dp_tx sfc_ef10_tx = {
 	.dp = {
 		.name		= SFC_KVARG_DATAPATH_EF10,
@@ -572,6 +625,8 @@ struct sfc_dp_tx sfc_ef10_tx = {
 		.hw_fw_caps	= SFC_DP_HW_FW_CAP_EF10,
 	},
 	.features		= SFC_DP_TX_FEAT_MULTI_SEG |
+				  SFC_DP_TX_FEAT_MULTI_POOL |
+				  SFC_DP_TX_FEAT_REFCNT |
 				  SFC_DP_TX_FEAT_MULTI_PROCESS,
 	.qcreate		= sfc_ef10_tx_qcreate,
 	.qdestroy		= sfc_ef10_tx_qdestroy,
@@ -579,6 +634,7 @@ struct sfc_dp_tx sfc_ef10_tx = {
 	.qtx_ev			= sfc_ef10_tx_qtx_ev,
 	.qstop			= sfc_ef10_tx_qstop,
 	.qreap			= sfc_ef10_tx_qreap,
+	.qdesc_status		= sfc_ef10_tx_qdesc_status,
 	.pkt_burst		= sfc_ef10_xmit_pkts,
 };
 
@@ -594,5 +650,6 @@ struct sfc_dp_tx sfc_ef10_simple_tx = {
 	.qtx_ev			= sfc_ef10_tx_qtx_ev,
 	.qstop			= sfc_ef10_tx_qstop,
 	.qreap			= sfc_ef10_tx_qreap,
+	.qdesc_status		= sfc_ef10_tx_qdesc_status,
 	.pkt_burst		= sfc_ef10_simple_xmit_pkts,
 };
diff --git a/drivers/net/sfc/sfc_ethdev.c b/drivers/net/sfc/sfc_ethdev.c
index 12bcd6fa..2f5f86f8 100644
--- a/drivers/net/sfc/sfc_ethdev.c
+++ b/drivers/net/sfc/sfc_ethdev.c
@@ -33,6 +33,7 @@
 #include <rte_ethdev.h>
 #include <rte_ethdev_pci.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_errno.h>
 
 #include "efx.h"
@@ -145,10 +146,16 @@ sfc_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	if (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG)
 		dev_info->default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
 
+	if (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL)
+		dev_info->default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOMULTMEMP;
+
+	if (~sa->dp_tx->features & SFC_DP_TX_FEAT_REFCNT)
+		dev_info->default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOREFCOUNT;
+
 #if EFSYS_OPT_RX_SCALE
 	if (sa->rss_support != EFX_RX_SCALE_UNAVAILABLE) {
 		dev_info->reta_size = EFX_RSS_TBL_SIZE;
-		dev_info->hash_key_size = SFC_RSS_KEY_SIZE;
+		dev_info->hash_key_size = EFX_RSS_KEY_SIZE;
 		dev_info->flow_type_rss_offloads = SFC_RSS_OFFLOADS;
 	}
 #endif
@@ -515,16 +522,18 @@ sfc_tx_queue_release(void *queue)
 	sfc_adapter_unlock(sa);
 }
 
-static void
+static int
 sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	struct sfc_adapter *sa = dev->data->dev_private;
 	struct sfc_port *port = &sa->port;
 	uint64_t *mac_stats;
+	int ret;
 
 	rte_spinlock_lock(&port->mac_stats_lock);
 
-	if (sfc_port_update_mac_stats(sa) != 0)
+	ret = sfc_port_update_mac_stats(sa);
+	if (ret != 0)
 		goto unlock;
 
 	mac_stats = port->mac_stats_buf;
@@ -581,6 +590,8 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
 unlock:
 	rte_spinlock_unlock(&port->mac_stats_lock);
+	SFC_ASSERT(ret >= 0);
+	return -ret;
 }
 
 static void
@@ -991,7 +1002,7 @@ sfc_set_mc_addr_list(struct rte_eth_dev *dev, struct ether_addr *mc_addr_set,
 	}
 
 	for (i = 0; i < nb_mc_addr; ++i) {
-		(void)rte_memcpy(mc_addrs, mc_addr_set[i].addr_bytes,
+		rte_memcpy(mc_addrs, mc_addr_set[i].addr_bytes,
 				 EFX_MAC_ADDR_LEN);
 		mc_addrs += EFX_MAC_ADDR_LEN;
 	}
@@ -1087,6 +1098,24 @@ sfc_rx_descriptor_done(void *queue, uint16_t offset)
 }
 
 static int
+sfc_rx_descriptor_status(void *queue, uint16_t offset)
+{
+	struct sfc_dp_rxq *dp_rxq = queue;
+	struct sfc_rxq *rxq = sfc_rxq_by_dp_rxq(dp_rxq);
+
+	return rxq->evq->sa->dp_rx->qdesc_status(dp_rxq, offset);
+}
+
+static int
+sfc_tx_descriptor_status(void *queue, uint16_t offset)
+{
+	struct sfc_dp_txq *dp_txq = queue;
+	struct sfc_txq *txq = sfc_txq_by_dp_txq(dp_txq);
+
+	return txq->evq->sa->dp_tx->qdesc_status(dp_txq, offset);
+}
+
+static int
 sfc_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
 	struct sfc_adapter *sa = dev->data->dev_private;
@@ -1205,9 +1234,9 @@ sfc_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
 	 * locally in 'sfc_adapter' and kept up-to-date
 	 */
 	rss_conf->rss_hf = sfc_efx_to_rte_hash_type(sa->rss_hash_types);
-	rss_conf->rss_key_len = SFC_RSS_KEY_SIZE;
+	rss_conf->rss_key_len = EFX_RSS_KEY_SIZE;
 	if (rss_conf->rss_key != NULL)
-		rte_memcpy(rss_conf->rss_key, sa->rss_key, SFC_RSS_KEY_SIZE);
+		rte_memcpy(rss_conf->rss_key, sa->rss_key, EFX_RSS_KEY_SIZE);
 
 	sfc_adapter_unlock(sa);
 
@@ -1252,14 +1281,17 @@ sfc_dev_rss_hash_update(struct rte_eth_dev *dev,
 
 	efx_hash_types = sfc_rte_to_efx_hash_type(rss_conf->rss_hf);
 
-	rc = efx_rx_scale_mode_set(sa->nic, EFX_RX_HASHALG_TOEPLITZ,
+	rc = efx_rx_scale_mode_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
+				   EFX_RX_HASHALG_TOEPLITZ,
 				   efx_hash_types, B_TRUE);
 	if (rc != 0)
 		goto fail_scale_mode_set;
 
 	if (rss_conf->rss_key != NULL) {
 		if (sa->state == SFC_ADAPTER_STARTED) {
-			rc = efx_rx_scale_key_set(sa->nic, rss_conf->rss_key,
+			rc = efx_rx_scale_key_set(sa->nic,
+						  EFX_RSS_CONTEXT_DEFAULT,
+						  rss_conf->rss_key,
 						  sizeof(sa->rss_key));
 			if (rc != 0)
 				goto fail_scale_key_set;
@@ -1275,7 +1307,8 @@ sfc_dev_rss_hash_update(struct rte_eth_dev *dev,
 	return 0;
 
 fail_scale_key_set:
-	if (efx_rx_scale_mode_set(sa->nic, EFX_RX_HASHALG_TOEPLITZ,
+	if (efx_rx_scale_mode_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
+				  EFX_RX_HASHALG_TOEPLITZ,
 				  sa->rss_hash_types, B_TRUE) != 0)
 		sfc_err(sa, "failed to restore RSS mode");
 
@@ -1326,7 +1359,7 @@ sfc_dev_rss_reta_update(struct rte_eth_dev *dev,
 	struct sfc_port *port = &sa->port;
 	unsigned int *rss_tbl_new;
 	uint16_t entry;
-	int rc;
+	int rc = 0;
 
 
 	if (port->isolated)
@@ -1371,10 +1404,16 @@ sfc_dev_rss_reta_update(struct rte_eth_dev *dev,
 		}
 	}
 
-	rc = efx_rx_scale_tbl_set(sa->nic, rss_tbl_new, EFX_RSS_TBL_SIZE);
-	if (rc == 0)
-		rte_memcpy(sa->rss_tbl, rss_tbl_new, sizeof(sa->rss_tbl));
+	if (sa->state == SFC_ADAPTER_STARTED) {
+		rc = efx_rx_scale_tbl_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
+					  rss_tbl_new, EFX_RSS_TBL_SIZE);
+		if (rc != 0)
+			goto fail_scale_tbl_set;
+	}
+
+	rte_memcpy(sa->rss_tbl, rss_tbl_new, sizeof(sa->rss_tbl));
 
+fail_scale_tbl_set:
 bad_reta_entry:
 	sfc_adapter_unlock(sa);
 
@@ -1469,6 +1508,8 @@ static const struct eth_dev_ops sfc_eth_dev_ops = {
 	.rx_queue_release		= sfc_rx_queue_release,
 	.rx_queue_count			= sfc_rx_queue_count,
 	.rx_descriptor_done		= sfc_rx_descriptor_done,
+	.rx_descriptor_status		= sfc_rx_descriptor_status,
+	.tx_descriptor_status		= sfc_tx_descriptor_status,
 	.tx_queue_setup			= sfc_tx_queue_setup,
 	.tx_queue_release		= sfc_tx_queue_release,
 	.flow_ctrl_get			= sfc_flow_ctrl_get,
@@ -1751,8 +1792,6 @@ sfc_eth_dev_init(struct rte_eth_dev *dev)
 	/* Copy PCI device info to the dev->data */
 	rte_eth_copy_pci_info(dev, pci_dev);
 
-	dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
-
 	rc = sfc_kvargs_parse(sa);
 	if (rc != 0)
 		goto fail_kvargs_parse;
diff --git a/drivers/net/sfc/sfc_flow.c b/drivers/net/sfc/sfc_flow.c
index 110dfb89..f2050f65 100644
--- a/drivers/net/sfc/sfc_flow.c
+++ b/drivers/net/sfc/sfc_flow.c
@@ -803,7 +803,7 @@ sfc_flow_parse_attr(const struct rte_flow_attr *attr,
 	}
 
 	flow->spec.efs_flags |= EFX_FILTER_FLAG_RX;
-	flow->spec.efs_rss_context = EFX_FILTER_SPEC_RSS_CONTEXT_DEFAULT;
+	flow->spec.efs_rss_context = EFX_RSS_CONTEXT_DEFAULT;
 
 	return 0;
 }
@@ -886,6 +886,170 @@ sfc_flow_parse_queue(struct sfc_adapter *sa,
 	return 0;
 }
 
+#if EFSYS_OPT_RX_SCALE
+static int
+sfc_flow_parse_rss(struct sfc_adapter *sa,
+		   const struct rte_flow_action_rss *rss,
+		   struct rte_flow *flow)
+{
+	unsigned int rxq_sw_index;
+	struct sfc_rxq *rxq;
+	unsigned int rxq_hw_index_min;
+	unsigned int rxq_hw_index_max;
+	const struct rte_eth_rss_conf *rss_conf = rss->rss_conf;
+	uint64_t rss_hf;
+	uint8_t *rss_key = NULL;
+	struct sfc_flow_rss *sfc_rss_conf = &flow->rss_conf;
+	unsigned int i;
+
+	if (rss->num == 0)
+		return -EINVAL;
+
+	rxq_sw_index = sa->rxq_count - 1;
+	rxq = sa->rxq_info[rxq_sw_index].rxq;
+	rxq_hw_index_min = rxq->hw_index;
+	rxq_hw_index_max = 0;
+
+	for (i = 0; i < rss->num; ++i) {
+		rxq_sw_index = rss->queue[i];
+
+		if (rxq_sw_index >= sa->rxq_count)
+			return -EINVAL;
+
+		rxq = sa->rxq_info[rxq_sw_index].rxq;
+
+		if (rxq->hw_index < rxq_hw_index_min)
+			rxq_hw_index_min = rxq->hw_index;
+
+		if (rxq->hw_index > rxq_hw_index_max)
+			rxq_hw_index_max = rxq->hw_index;
+	}
+
+	rss_hf = (rss_conf != NULL) ? rss_conf->rss_hf : SFC_RSS_OFFLOADS;
+	if ((rss_hf & ~SFC_RSS_OFFLOADS) != 0)
+		return -EINVAL;
+
+	if (rss_conf != NULL) {
+		if (rss_conf->rss_key_len != sizeof(sa->rss_key))
+			return -EINVAL;
+
+		rss_key = rss_conf->rss_key;
+	} else {
+		rss_key = sa->rss_key;
+	}
+
+	flow->rss = B_TRUE;
+
+	sfc_rss_conf->rxq_hw_index_min = rxq_hw_index_min;
+	sfc_rss_conf->rxq_hw_index_max = rxq_hw_index_max;
+	sfc_rss_conf->rss_hash_types = sfc_rte_to_efx_hash_type(rss_hf);
+	rte_memcpy(sfc_rss_conf->rss_key, rss_key, sizeof(sa->rss_key));
+
+	for (i = 0; i < RTE_DIM(sfc_rss_conf->rss_tbl); ++i) {
+		unsigned int rxq_sw_index = rss->queue[i % rss->num];
+		struct sfc_rxq *rxq = sa->rxq_info[rxq_sw_index].rxq;
+
+		sfc_rss_conf->rss_tbl[i] = rxq->hw_index - rxq_hw_index_min;
+	}
+
+	return 0;
+}
+#endif /* EFSYS_OPT_RX_SCALE */
+
+static int
+sfc_flow_filter_insert(struct sfc_adapter *sa,
+		       struct rte_flow *flow)
+{
+	efx_filter_spec_t *spec = &flow->spec;
+
+#if EFSYS_OPT_RX_SCALE
+	struct sfc_flow_rss *rss = &flow->rss_conf;
+	int rc = 0;
+
+	if (flow->rss) {
+		unsigned int rss_spread = MIN(rss->rxq_hw_index_max -
+					      rss->rxq_hw_index_min + 1,
+					      EFX_MAXRSS);
+
+		rc = efx_rx_scale_context_alloc(sa->nic,
+						EFX_RX_SCALE_EXCLUSIVE,
+						rss_spread,
+						&spec->efs_rss_context);
+		if (rc != 0)
+			goto fail_scale_context_alloc;
+
+		rc = efx_rx_scale_mode_set(sa->nic, spec->efs_rss_context,
+					   EFX_RX_HASHALG_TOEPLITZ,
+					   rss->rss_hash_types, B_TRUE);
+		if (rc != 0)
+			goto fail_scale_mode_set;
+
+		rc = efx_rx_scale_key_set(sa->nic, spec->efs_rss_context,
+					  rss->rss_key,
+					  sizeof(sa->rss_key));
+		if (rc != 0)
+			goto fail_scale_key_set;
+
+		spec->efs_dmaq_id = rss->rxq_hw_index_min;
+		spec->efs_flags |= EFX_FILTER_FLAG_RX_RSS;
+	}
+
+	rc = efx_filter_insert(sa->nic, spec);
+	if (rc != 0)
+		goto fail_filter_insert;
+
+	if (flow->rss) {
+		/*
+		 * Scale table is set after filter insertion because
+		 * the table entries are relative to the base RxQ ID
+		 * and the latter is submitted to the HW by means of
+		 * inserting a filter, so by the time of the request
+		 * the HW knows all the information needed to verify
+		 * the table entries, and the operation will succeed
+		 */
+		rc = efx_rx_scale_tbl_set(sa->nic, spec->efs_rss_context,
+					  rss->rss_tbl, RTE_DIM(rss->rss_tbl));
+		if (rc != 0)
+			goto fail_scale_tbl_set;
+	}
+
+	return 0;
+
+fail_scale_tbl_set:
+	efx_filter_remove(sa->nic, spec);
+
+fail_filter_insert:
+fail_scale_key_set:
+fail_scale_mode_set:
+	if (rss != NULL)
+		efx_rx_scale_context_free(sa->nic, spec->efs_rss_context);
+
+fail_scale_context_alloc:
+	return rc;
+#else /* !EFSYS_OPT_RX_SCALE */
+	return efx_filter_insert(sa->nic, spec);
+#endif /* EFSYS_OPT_RX_SCALE */
+}
+
+static int
+sfc_flow_filter_remove(struct sfc_adapter *sa,
+		       struct rte_flow *flow)
+{
+	efx_filter_spec_t *spec = &flow->spec;
+	int rc = 0;
+
+	rc = efx_filter_remove(sa->nic, spec);
+	if (rc != 0)
+		return rc;
+
+#if EFSYS_OPT_RX_SCALE
+	if (flow->rss)
+		rc = efx_rx_scale_context_free(sa->nic, spec->efs_rss_context);
+#endif /* EFSYS_OPT_RX_SCALE */
+
+	return rc;
+}
+
 static int
 sfc_flow_parse_actions(struct sfc_adapter *sa,
 		       const struct rte_flow_action actions[],
@@ -919,6 +1083,20 @@ sfc_flow_parse_actions(struct sfc_adapter *sa,
 			is_specified = B_TRUE;
 			break;
 
+#if EFSYS_OPT_RX_SCALE
+		case RTE_FLOW_ACTION_TYPE_RSS:
+			rc = sfc_flow_parse_rss(sa, actions->conf, flow);
+			if (rc != 0) {
+				rte_flow_error_set(error, rc,
+					RTE_FLOW_ERROR_TYPE_ACTION, actions,
+					"Bad RSS action");
+				return -rte_errno;
+			}
+
+			is_specified = B_TRUE;
+			break;
+#endif /* EFSYS_OPT_RX_SCALE */
+
 		default:
 			rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION, actions,
@@ -1013,7 +1191,7 @@ sfc_flow_create(struct rte_eth_dev *dev,
 	sfc_adapter_lock(sa);
 
 	if (sa->state == SFC_ADAPTER_STARTED) {
-		rc = efx_filter_insert(sa->nic, &flow->spec);
+		rc = sfc_flow_filter_insert(sa, flow);
 		if (rc != 0) {
 			rte_flow_error_set(error, rc,
 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -1047,7 +1225,7 @@ sfc_flow_remove(struct sfc_adapter *sa,
 	SFC_ASSERT(sfc_adapter_is_locked(sa));
 
 	if (sa->state == SFC_ADAPTER_STARTED) {
-		rc = efx_filter_remove(sa->nic, &flow->spec);
+		rc = sfc_flow_filter_remove(sa, flow);
 		if (rc != 0)
 			rte_flow_error_set(error, rc,
 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -1172,7 +1350,7 @@ sfc_flow_stop(struct sfc_adapter *sa)
 	SFC_ASSERT(sfc_adapter_is_locked(sa));
 
 	TAILQ_FOREACH(flow, &sa->filter.flow_list, entries)
-		efx_filter_remove(sa->nic, &flow->spec);
+		sfc_flow_filter_remove(sa, flow);
 }
 
 int
@@ -1186,7 +1364,7 @@ sfc_flow_start(struct sfc_adapter *sa)
 	SFC_ASSERT(sfc_adapter_is_locked(sa));
 
 	TAILQ_FOREACH(flow, &sa->filter.flow_list, entries) {
-		rc = efx_filter_insert(sa->nic, &flow->spec);
+		rc = sfc_flow_filter_insert(sa, flow);
 		if (rc != 0)
 			goto fail_bad_flow;
 	}
diff --git a/drivers/net/sfc/sfc_flow.h b/drivers/net/sfc/sfc_flow.h
index bfc34364..aa740d7d 100644
--- a/drivers/net/sfc/sfc_flow.h
+++ b/drivers/net/sfc/sfc_flow.h
@@ -41,9 +41,24 @@
 extern "C" {
 #endif
 
+#if EFSYS_OPT_RX_SCALE
+/* RSS configuration storage */
+struct sfc_flow_rss {
+	unsigned int	rxq_hw_index_min;
+	unsigned int	rxq_hw_index_max;
+	unsigned int	rss_hash_types;
+	uint8_t		rss_key[EFX_RSS_KEY_SIZE];
+	unsigned int	rss_tbl[EFX_RSS_TBL_SIZE];
+};
+#endif /* EFSYS_OPT_RX_SCALE */
+
 /* PMD-specific definition of the opaque type from rte_flow.h */
 struct rte_flow {
 	efx_filter_spec_t spec;		/* filter specification */
+#if EFSYS_OPT_RX_SCALE
+	boolean_t rss;			/* RSS toggle */
+	struct sfc_flow_rss rss_conf;	/* RSS configuration */
+#endif /* EFSYS_OPT_RX_SCALE */
 	TAILQ_ENTRY(rte_flow) entries;	/* flow list entries */
 };
 
diff --git a/drivers/net/sfc/sfc_rx.c b/drivers/net/sfc/sfc_rx.c
index 1bf86445..2ae095b2 100644
--- a/drivers/net/sfc/sfc_rx.c
+++ b/drivers/net/sfc/sfc_rx.c
@@ -128,7 +128,7 @@ sfc_efx_rx_qrefill(struct sfc_efx_rxq *rxq)
 			SFC_ASSERT(m->nb_segs == 1);
 			m->port = port_id;
 
-			addr[i] = rte_pktmbuf_mtophys(m);
+			addr[i] = rte_pktmbuf_iova(m);
 		}
 
 		efx_rx_qpost(rxq->common, addr, rxq->buf_size,
@@ -207,11 +207,11 @@ sfc_efx_supported_ptypes_get(void)
 	return ptypes;
 }
 
+#if EFSYS_OPT_RX_SCALE
 static void
 sfc_efx_rx_set_rss_hash(struct sfc_efx_rxq *rxq, unsigned int flags,
 			struct rte_mbuf *m)
 {
-#if EFSYS_OPT_RX_SCALE
 	uint8_t *mbuf_data;
 
 
@@ -227,8 +227,15 @@ sfc_efx_rx_set_rss_hash(struct sfc_efx_rxq *rxq, unsigned int flags,
 
 		m->ol_flags |= PKT_RX_RSS_HASH;
 	}
-#endif
 }
+#else
+static void
+sfc_efx_rx_set_rss_hash(__rte_unused struct sfc_efx_rxq *rxq,
+			__rte_unused unsigned int flags,
+			__rte_unused struct rte_mbuf *m)
+{
+}
+#endif
 
 static uint16_t
 sfc_efx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
@@ -349,6 +356,43 @@ sfc_efx_rx_qdesc_npending(struct sfc_dp_rxq *dp_rxq)
 	return rxq->pending - rxq->completed;
 }
 
+static sfc_dp_rx_qdesc_status_t sfc_efx_rx_qdesc_status;
+static int
+sfc_efx_rx_qdesc_status(struct sfc_dp_rxq *dp_rxq, uint16_t offset)
+{
+	struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
+
+	if (unlikely(offset > rxq->ptr_mask))
+		return -EINVAL;
+
+	/*
+	 * Poll EvQ to derive up-to-date 'rxq->pending' figure;
+	 * it is required for the queue to be running, but the
+	 * check is omitted because API design assumes that it
+	 * is the duty of the caller to satisfy all conditions
+	 */
+	SFC_ASSERT((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) ==
+		   SFC_EFX_RXQ_FLAG_RUNNING);
+	sfc_ev_qpoll(rxq->evq);
+
+	/*
+	 * There is a handful of reserved entries in the ring,
+	 * but an explicit check whether the offset points to
+	 * a reserved entry is neglected since the two checks
+	 * below rely on the figures which take the HW limits
+	 * into account and thus if an entry is reserved, the
+	 * checks will fail and UNAVAIL code will be returned
+	 */
+
+	if (offset < (rxq->pending - rxq->completed))
+		return RTE_ETH_RX_DESC_DONE;
+
+	if (offset < (rxq->added - rxq->completed))
+		return RTE_ETH_RX_DESC_AVAIL;
+
+	return RTE_ETH_RX_DESC_UNAVAIL;
+}
+
 struct sfc_rxq *
 sfc_rxq_by_dp_rxq(const struct sfc_dp_rxq *dp_rxq)
 {
@@ -498,6 +542,7 @@ struct sfc_dp_rx sfc_efx_rx = {
 	.qpurge			= sfc_efx_rx_qpurge,
 	.supported_ptypes_get	= sfc_efx_supported_ptypes_get,
 	.qdesc_npending		= sfc_efx_rx_qdesc_npending,
+	.qdesc_status		= sfc_efx_rx_qdesc_status,
 	.pkt_burst		= sfc_efx_recv_pkts,
 };
 
@@ -1050,31 +1095,39 @@ sfc_efx_to_rte_hash_type(efx_rx_hash_type_t efx_hash_types)
 }
 #endif
 
+#if EFSYS_OPT_RX_SCALE
 static int
 sfc_rx_rss_config(struct sfc_adapter *sa)
 {
 	int rc = 0;
 
-#if EFSYS_OPT_RX_SCALE
 	if (sa->rss_channels > 0) {
-		rc = efx_rx_scale_mode_set(sa->nic, EFX_RX_HASHALG_TOEPLITZ,
+		rc = efx_rx_scale_mode_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
+					   EFX_RX_HASHALG_TOEPLITZ,
 					   sa->rss_hash_types, B_TRUE);
 		if (rc != 0)
 			goto finish;
 
-		rc = efx_rx_scale_key_set(sa->nic, sa->rss_key,
+		rc = efx_rx_scale_key_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
+					  sa->rss_key,
 					  sizeof(sa->rss_key));
 		if (rc != 0)
 			goto finish;
 
-		rc = efx_rx_scale_tbl_set(sa->nic, sa->rss_tbl,
-					  sizeof(sa->rss_tbl));
+		rc = efx_rx_scale_tbl_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT,
+					  sa->rss_tbl, RTE_DIM(sa->rss_tbl));
 	}
 
 finish:
-#endif
 	return rc;
 }
+#else
+static int
+sfc_rx_rss_config(__rte_unused struct sfc_adapter *sa)
+{
+	return 0;
+}
+#endif
 
 int
 sfc_rx_start(struct sfc_adapter *sa)
@@ -1243,7 +1296,6 @@ sfc_rx_configure(struct sfc_adapter *sa)
 {
 	struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf;
 	const unsigned int nb_rx_queues = sa->eth_dev->data->nb_rx_queues;
-	unsigned int sw_index;
 	int rc;
 
 	sfc_log_init(sa, "nb_rx_queues=%u (old %u)",
@@ -1296,6 +1348,8 @@ sfc_rx_configure(struct sfc_adapter *sa)
 			   MIN(sa->rxq_count, EFX_MAXRSS) : 0;
 
 	if (sa->rss_channels > 0) {
+		unsigned int sw_index;
+
 		for (sw_index = 0; sw_index < EFX_RSS_TBL_SIZE; ++sw_index)
 			sa->rss_tbl[sw_index] = sw_index % sa->rss_channels;
 	}
diff --git a/drivers/net/sfc/sfc_tso.c b/drivers/net/sfc/sfc_tso.c
index fb79d749..2e7b595b 100644
--- a/drivers/net/sfc/sfc_tso.c
+++ b/drivers/net/sfc/sfc_tso.c
@@ -141,7 +141,7 @@ sfc_efx_tso_do(struct sfc_efx_txq *txq, unsigned int idx,
 	if (unlikely(tcph_off > encp->enc_tx_tso_tcp_header_offset_limit))
 		return EMSGSIZE;
 
-	header_paddr = rte_pktmbuf_mtophys(m);
+	header_paddr = rte_pktmbuf_iova(m);
 
 	/*
 	 * Sometimes headers may be split across multiple mbufs. In such cases
@@ -155,7 +155,7 @@ sfc_efx_tso_do(struct sfc_efx_txq *txq, unsigned int idx,
 					   header_len);
 		tsoh = txq->sw_ring[idx & txq->ptr_mask].tsoh;
 
-		header_paddr = rte_malloc_virt2phy((void *)tsoh);
+		header_paddr = rte_malloc_virt2iova((void *)tsoh);
 	} else {
 		if (m->data_len == header_len) {
 			*in_off = 0;
diff --git a/drivers/net/sfc/sfc_tweak.h b/drivers/net/sfc/sfc_tweak.h
index 4ef7fc8b..fd2f75c3 100644
--- a/drivers/net/sfc/sfc_tweak.h
+++ b/drivers/net/sfc/sfc_tweak.h
@@ -53,4 +53,7 @@
 /** Default free threshold follows recommendations from DPDK documentation */
 #define SFC_TX_DEFAULT_FREE_THRESH	32
 
+/** Number of mbufs to be freed in bulk in a single call */
+#define SFC_TX_REAP_BULK_SIZE		32
+
 #endif /* _SFC_TWEAK_H_ */
diff --git a/drivers/net/sfc/sfc_tx.c b/drivers/net/sfc/sfc_tx.c
index fc439cb6..127d59e6 100644
--- a/drivers/net/sfc/sfc_tx.c
+++ b/drivers/net/sfc/sfc_tx.c
@@ -91,6 +91,21 @@ sfc_tx_qcheck_conf(struct sfc_adapter *sa, uint16_t nb_tx_desc,
 		rc = EINVAL;
 	}
 
+	if (((flags & ETH_TXQ_FLAGS_NOMULTMEMP) == 0) &&
+	    (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL)) {
+		sfc_err(sa, "multi-mempool is not supported by %s datapath",
+			sa->dp_tx->dp.name);
+		rc = EINVAL;
+	}
+
+	if (((flags & ETH_TXQ_FLAGS_NOREFCOUNT) == 0) &&
+	    (~sa->dp_tx->features & SFC_DP_TX_FEAT_REFCNT)) {
+		sfc_err(sa,
+			"mbuf reference counters are neglected by %s datapath",
+			sa->dp_tx->dp.name);
+		rc = EINVAL;
+	}
+
 	if ((flags & ETH_TXQ_FLAGS_NOVLANOFFL) == 0) {
 		if (!encp->enc_hw_tx_insert_vlan_enabled) {
 			sfc_err(sa, "VLAN offload is not supported");
@@ -750,7 +765,7 @@ sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 			size_t			seg_len;
 
 			seg_len = m_seg->data_len;
-			next_frag = rte_mbuf_data_dma_addr(m_seg);
+			next_frag = rte_mbuf_data_iova(m_seg);
 
 			/*
 			 * If we've started TSO transaction few steps earlier,
@@ -977,6 +992,44 @@ sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq)
 	txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED;
 }
 
+static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status;
+static int
+sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset)
+{
+	struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq);
+
+	if (unlikely(offset > txq->ptr_mask))
+		return -EINVAL;
+
+	if (unlikely(offset >= EFX_TXQ_LIMIT(txq->ptr_mask + 1)))
+		return RTE_ETH_TX_DESC_UNAVAIL;
+
+	/*
+	 * Poll EvQ to derive up-to-date 'txq->pending' figure;
+	 * it is required for the queue to be running, but the
+	 * check is omitted because API design assumes that it
+	 * is the duty of the caller to satisfy all conditions
+	 */
+	SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) ==
+		   SFC_EFX_TXQ_FLAG_RUNNING);
+	sfc_ev_qpoll(txq->evq);
+
+	/*
+	 * Ring tail is 'txq->pending', and although descriptors
+	 * between 'txq->completed' and 'txq->pending' are still
+	 * in use by the driver, they should be reported as DONE
+	 */
+	if (unlikely(offset < (txq->added - txq->pending)))
+		return RTE_ETH_TX_DESC_FULL;
+
+	/*
+	 * There is no separate return value for unused descriptors;
+	 * the latter will be reported as DONE because genuine DONE
+	 * descriptors will be freed anyway in SW on the next burst
+	 */
+	return RTE_ETH_TX_DESC_DONE;
+}
+
 struct sfc_dp_tx sfc_efx_tx = {
 	.dp = {
 		.name		= SFC_KVARG_DATAPATH_EFX,
@@ -985,11 +1038,14 @@ struct sfc_dp_tx sfc_efx_tx = {
 	},
 	.features		= SFC_DP_TX_FEAT_VLAN_INSERT |
 				  SFC_DP_TX_FEAT_TSO |
+				  SFC_DP_TX_FEAT_MULTI_POOL |
+				  SFC_DP_TX_FEAT_REFCNT |
 				  SFC_DP_TX_FEAT_MULTI_SEG,
 	.qcreate		= sfc_efx_tx_qcreate,
 	.qdestroy		= sfc_efx_tx_qdestroy,
 	.qstart			= sfc_efx_tx_qstart,
 	.qstop			= sfc_efx_tx_qstop,
 	.qreap			= sfc_efx_tx_qreap,
+	.qdesc_status		= sfc_efx_tx_qdesc_status,
 	.pkt_burst		= sfc_efx_xmit_pkts,
 };
diff --git a/drivers/net/xenvirt/Makefile b/drivers/net/softnic/Makefile
index 8b4b8f03..09ed62ea 100644
--- a/drivers/net/xenvirt/Makefile
+++ b/drivers/net/softnic/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2017 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -34,24 +34,27 @@ include $(RTE_SDK)/mk/rte.vars.mk
 #
 # library name
 #
-LIB = librte_pmd_xenvirt.a
+LIB = librte_pmd_softnic.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
-LDLIBS += -lxenstore
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_sched
+LDLIBS += -lrte_bus_vdev
 
-EXPORT_MAP := rte_eth_xenvirt_version.map
+EXPORT_MAP := rte_pmd_eth_softnic_version.map
 
 LIBABIVER := 1
 
 #
 # all source are stored in SRCS-y
 #
-SRCS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += rte_eth_xenvirt.c rte_mempool_gntalloc.c rte_xen_lib.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic_tm.c
 
 #
 # Export include files
 #
-SYMLINK-y-include += rte_eth_xenvirt.h
+SYMLINK-y-include += rte_eth_softnic.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/softnic/rte_eth_softnic.c b/drivers/net/softnic/rte_eth_softnic.c
new file mode 100644
index 00000000..3e47c2f9
--- /dev/null
+++ b/drivers/net/softnic/rte_eth_softnic.c
@@ -0,0 +1,851 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_ethdev.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_malloc.h>
+#include <rte_bus_vdev.h>
+#include <rte_kvargs.h>
+#include <rte_errno.h>
+#include <rte_ring.h>
+#include <rte_sched.h>
+#include <rte_tm_driver.h>
+
+#include "rte_eth_softnic.h"
+#include "rte_eth_softnic_internals.h"
+
+#define DEV_HARD(p)					\
+	(&rte_eth_devices[p->hard.port_id])
+
+#define PMD_PARAM_SOFT_TM					"soft_tm"
+#define PMD_PARAM_SOFT_TM_RATE				"soft_tm_rate"
+#define PMD_PARAM_SOFT_TM_NB_QUEUES			"soft_tm_nb_queues"
+#define PMD_PARAM_SOFT_TM_QSIZE0			"soft_tm_qsize0"
+#define PMD_PARAM_SOFT_TM_QSIZE1			"soft_tm_qsize1"
+#define PMD_PARAM_SOFT_TM_QSIZE2			"soft_tm_qsize2"
+#define PMD_PARAM_SOFT_TM_QSIZE3			"soft_tm_qsize3"
+#define PMD_PARAM_SOFT_TM_ENQ_BSZ			"soft_tm_enq_bsz"
+#define PMD_PARAM_SOFT_TM_DEQ_BSZ			"soft_tm_deq_bsz"
+
+#define PMD_PARAM_HARD_NAME					"hard_name"
+#define PMD_PARAM_HARD_TX_QUEUE_ID			"hard_tx_queue_id"
+
+static const char *pmd_valid_args[] = {
+	PMD_PARAM_SOFT_TM,
+	PMD_PARAM_SOFT_TM_RATE,
+	PMD_PARAM_SOFT_TM_NB_QUEUES,
+	PMD_PARAM_SOFT_TM_QSIZE0,
+	PMD_PARAM_SOFT_TM_QSIZE1,
+	PMD_PARAM_SOFT_TM_QSIZE2,
+	PMD_PARAM_SOFT_TM_QSIZE3,
+	PMD_PARAM_SOFT_TM_ENQ_BSZ,
+	PMD_PARAM_SOFT_TM_DEQ_BSZ,
+	PMD_PARAM_HARD_NAME,
+	PMD_PARAM_HARD_TX_QUEUE_ID,
+	NULL
+};
+
+static const struct rte_eth_dev_info pmd_dev_info = {
+	.min_rx_bufsize = 0,
+	.max_rx_pktlen = UINT32_MAX,
+	.max_rx_queues = UINT16_MAX,
+	.max_tx_queues = UINT16_MAX,
+	.rx_desc_lim = {
+		.nb_max = UINT16_MAX,
+		.nb_min = 0,
+		.nb_align = 1,
+	},
+	.tx_desc_lim = {
+		.nb_max = UINT16_MAX,
+		.nb_min = 0,
+		.nb_align = 1,
+	},
+};
+
+static void
+pmd_dev_infos_get(struct rte_eth_dev *dev __rte_unused,
+	struct rte_eth_dev_info *dev_info)
+{
+	memcpy(dev_info, &pmd_dev_info, sizeof(*dev_info));
+}
+
+static int
+pmd_dev_configure(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct rte_eth_dev *hard_dev = DEV_HARD(p);
+
+	if (dev->data->nb_rx_queues > hard_dev->data->nb_rx_queues)
+		return -1;
+
+	if (p->params.hard.tx_queue_id >= hard_dev->data->nb_tx_queues)
+		return -1;
+
+	return 0;
+}
+
+static int
+pmd_rx_queue_setup(struct rte_eth_dev *dev,
+	uint16_t rx_queue_id,
+	uint16_t nb_rx_desc __rte_unused,
+	unsigned int socket_id,
+	const struct rte_eth_rxconf *rx_conf __rte_unused,
+	struct rte_mempool *mb_pool __rte_unused)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	if (p->params.soft.intrusive == 0) {
+		struct pmd_rx_queue *rxq;
+
+		rxq = rte_zmalloc_socket(p->params.soft.name,
+			sizeof(struct pmd_rx_queue), 0, socket_id);
+		if (rxq == NULL)
+			return -ENOMEM;
+
+		rxq->hard.port_id = p->hard.port_id;
+		rxq->hard.rx_queue_id = rx_queue_id;
+		dev->data->rx_queues[rx_queue_id] = rxq;
+	} else {
+		struct rte_eth_dev *hard_dev = DEV_HARD(p);
+		void *rxq = hard_dev->data->rx_queues[rx_queue_id];
+
+		if (rxq == NULL)
+			return -1;
+
+		dev->data->rx_queues[rx_queue_id] = rxq;
+	}
+	return 0;
+}
+
+static int
+pmd_tx_queue_setup(struct rte_eth_dev *dev,
+	uint16_t tx_queue_id,
+	uint16_t nb_tx_desc,
+	unsigned int socket_id,
+	const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	uint32_t size = RTE_ETH_NAME_MAX_LEN + strlen("_txq") + 4;
+	char name[size];
+	struct rte_ring *r;
+
+	snprintf(name, sizeof(name), "%s_txq%04x",
+		dev->data->name, tx_queue_id);
+	r = rte_ring_create(name, nb_tx_desc, socket_id,
+		RING_F_SP_ENQ | RING_F_SC_DEQ);
+	if (r == NULL)
+		return -1;
+
+	dev->data->tx_queues[tx_queue_id] = r;
+	return 0;
+}
+
+static int
+pmd_dev_start(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	if (tm_used(dev)) {
+		int status = tm_start(p);
+
+		if (status)
+			return status;
+	}
+
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	if (p->params.soft.intrusive) {
+		struct rte_eth_dev *hard_dev = DEV_HARD(p);
+
+		/* The hard_dev->rx_pkt_burst should be stable by now */
+		dev->rx_pkt_burst = hard_dev->rx_pkt_burst;
+	}
+
+	return 0;
+}
+
+static void
+pmd_dev_stop(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+
+	if (tm_used(dev))
+		tm_stop(p);
+}
+
+static void
+pmd_dev_close(struct rte_eth_dev *dev)
+{
+	uint32_t i;
+
+	/* TX queues */
+	for (i = 0; i < dev->data->nb_tx_queues; i++)
+		rte_ring_free((struct rte_ring *)dev->data->tx_queues[i]);
+}
+
+static int
+pmd_link_update(struct rte_eth_dev *dev __rte_unused,
+	int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+pmd_tm_ops_get(struct rte_eth_dev *dev, void *arg)
+{
+	*(const struct rte_tm_ops **)arg =
+		(tm_enabled(dev)) ? &pmd_tm_ops : NULL;
+
+	return 0;
+}
+
+static const struct eth_dev_ops pmd_ops = {
+	.dev_configure = pmd_dev_configure,
+	.dev_start = pmd_dev_start,
+	.dev_stop = pmd_dev_stop,
+	.dev_close = pmd_dev_close,
+	.link_update = pmd_link_update,
+	.dev_infos_get = pmd_dev_infos_get,
+	.rx_queue_setup = pmd_rx_queue_setup,
+	.tx_queue_setup = pmd_tx_queue_setup,
+	.tm_ops_get = pmd_tm_ops_get,
+};
+
+static uint16_t
+pmd_rx_pkt_burst(void *rxq,
+	struct rte_mbuf **rx_pkts,
+	uint16_t nb_pkts)
+{
+	struct pmd_rx_queue *rx_queue = rxq;
+
+	return rte_eth_rx_burst(rx_queue->hard.port_id,
+		rx_queue->hard.rx_queue_id,
+		rx_pkts,
+		nb_pkts);
+}
+
+static uint16_t
+pmd_tx_pkt_burst(void *txq,
+	struct rte_mbuf **tx_pkts,
+	uint16_t nb_pkts)
+{
+	return (uint16_t)rte_ring_enqueue_burst(txq,
+		(void **)tx_pkts,
+		nb_pkts,
+		NULL);
+}
+
+static __rte_always_inline int
+run_default(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	/* Persistent context: Read Only (update not required) */
+	struct rte_mbuf **pkts = p->soft.def.pkts;
+	uint16_t nb_tx_queues = dev->data->nb_tx_queues;
+
+	/* Persistent context: Read - Write (update required) */
+	uint32_t txq_pos = p->soft.def.txq_pos;
+	uint32_t pkts_len = p->soft.def.pkts_len;
+	uint32_t flush_count = p->soft.def.flush_count;
+
+	/* Not part of the persistent context */
+	uint32_t pos;
+	uint16_t i;
+
+	/* Soft device TXQ read, Hard device TXQ write */
+	for (i = 0; i < nb_tx_queues; i++) {
+		struct rte_ring *txq = dev->data->tx_queues[txq_pos];
+
+		/* Read soft device TXQ burst to packet enqueue buffer */
+		pkts_len += rte_ring_sc_dequeue_burst(txq,
+			(void **)&pkts[pkts_len],
+			DEFAULT_BURST_SIZE,
+			NULL);
+
+		/* Increment soft device TXQ */
+		txq_pos++;
+		if (txq_pos >= nb_tx_queues)
+			txq_pos = 0;
+
+		/* Hard device TXQ write when complete burst is available */
+		if (pkts_len >= DEFAULT_BURST_SIZE) {
+			for (pos = 0; pos < pkts_len; )
+				pos += rte_eth_tx_burst(p->hard.port_id,
+					p->params.hard.tx_queue_id,
+					&pkts[pos],
+					(uint16_t)(pkts_len - pos));
+
+			pkts_len = 0;
+			flush_count = 0;
+			break;
+		}
+	}
+
+	if (flush_count >= FLUSH_COUNT_THRESHOLD) {
+		for (pos = 0; pos < pkts_len; )
+			pos += rte_eth_tx_burst(p->hard.port_id,
+				p->params.hard.tx_queue_id,
+				&pkts[pos],
+				(uint16_t)(pkts_len - pos));
+
+		pkts_len = 0;
+		flush_count = 0;
+	}
+
+	p->soft.def.txq_pos = txq_pos;
+	p->soft.def.pkts_len = pkts_len;
+	p->soft.def.flush_count = flush_count + 1;
+
+	return 0;
+}
+
+static __rte_always_inline int
+run_tm(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	/* Persistent context: Read Only (update not required) */
+	struct rte_sched_port *sched = p->soft.tm.sched;
+	struct rte_mbuf **pkts_enq = p->soft.tm.pkts_enq;
+	struct rte_mbuf **pkts_deq = p->soft.tm.pkts_deq;
+	uint32_t enq_bsz = p->params.soft.tm.enq_bsz;
+	uint32_t deq_bsz = p->params.soft.tm.deq_bsz;
+	uint16_t nb_tx_queues = dev->data->nb_tx_queues;
+
+	/* Persistent context: Read - Write (update required) */
+	uint32_t txq_pos = p->soft.tm.txq_pos;
+	uint32_t pkts_enq_len = p->soft.tm.pkts_enq_len;
+	uint32_t flush_count = p->soft.tm.flush_count;
+
+	/* Not part of the persistent context */
+	uint32_t pkts_deq_len, pos;
+	uint16_t i;
+
+	/* Soft device TXQ read, TM enqueue */
+	for (i = 0; i < nb_tx_queues; i++) {
+		struct rte_ring *txq = dev->data->tx_queues[txq_pos];
+
+		/* Read TXQ burst to packet enqueue buffer */
+		pkts_enq_len += rte_ring_sc_dequeue_burst(txq,
+			(void **)&pkts_enq[pkts_enq_len],
+			enq_bsz,
+			NULL);
+
+		/* Increment TXQ */
+		txq_pos++;
+		if (txq_pos >= nb_tx_queues)
+			txq_pos = 0;
+
+		/* TM enqueue when complete burst is available */
+		if (pkts_enq_len >= enq_bsz) {
+			rte_sched_port_enqueue(sched, pkts_enq, pkts_enq_len);
+
+			pkts_enq_len = 0;
+			flush_count = 0;
+			break;
+		}
+	}
+
+	if (flush_count >= FLUSH_COUNT_THRESHOLD) {
+		if (pkts_enq_len)
+			rte_sched_port_enqueue(sched, pkts_enq, pkts_enq_len);
+
+		pkts_enq_len = 0;
+		flush_count = 0;
+	}
+
+	p->soft.tm.txq_pos = txq_pos;
+	p->soft.tm.pkts_enq_len = pkts_enq_len;
+	p->soft.tm.flush_count = flush_count + 1;
+
+	/* TM dequeue, Hard device TXQ write */
+	pkts_deq_len = rte_sched_port_dequeue(sched, pkts_deq, deq_bsz);
+
+	for (pos = 0; pos < pkts_deq_len; )
+		pos += rte_eth_tx_burst(p->hard.port_id,
+			p->params.hard.tx_queue_id,
+			&pkts_deq[pos],
+			(uint16_t)(pkts_deq_len - pos));
+
+	return 0;
+}
+
+int
+rte_pmd_softnic_run(uint16_t port_id)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
+#endif
+
+	return (tm_used(dev)) ? run_tm(dev) : run_default(dev);
+}
+
+static struct ether_addr eth_addr = { .addr_bytes = {0} };
+
+static uint32_t
+eth_dev_speed_max_mbps(uint32_t speed_capa)
+{
+	uint32_t rate_mbps[32] = {
+		ETH_SPEED_NUM_NONE,
+		ETH_SPEED_NUM_10M,
+		ETH_SPEED_NUM_10M,
+		ETH_SPEED_NUM_100M,
+		ETH_SPEED_NUM_100M,
+		ETH_SPEED_NUM_1G,
+		ETH_SPEED_NUM_2_5G,
+		ETH_SPEED_NUM_5G,
+		ETH_SPEED_NUM_10G,
+		ETH_SPEED_NUM_20G,
+		ETH_SPEED_NUM_25G,
+		ETH_SPEED_NUM_40G,
+		ETH_SPEED_NUM_50G,
+		ETH_SPEED_NUM_56G,
+		ETH_SPEED_NUM_100G,
+	};
+
+	uint32_t pos = (speed_capa) ? (31 - __builtin_clz(speed_capa)) : 0;
+	return rate_mbps[pos];
+}
+
+static int
+default_init(struct pmd_internals *p,
+	struct pmd_params *params,
+	int numa_node)
+{
+	p->soft.def.pkts = rte_zmalloc_socket(params->soft.name,
+		2 * DEFAULT_BURST_SIZE * sizeof(struct rte_mbuf *),
+		0,
+		numa_node);
+
+	if (p->soft.def.pkts == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void
+default_free(struct pmd_internals *p)
+{
+	rte_free(p->soft.def.pkts);
+}
+
+static void *
+pmd_init(struct pmd_params *params, int numa_node)
+{
+	struct pmd_internals *p;
+	int status;
+
+	p = rte_zmalloc_socket(params->soft.name,
+		sizeof(struct pmd_internals),
+		0,
+		numa_node);
+	if (p == NULL)
+		return NULL;
+
+	memcpy(&p->params, params, sizeof(p->params));
+	rte_eth_dev_get_port_by_name(params->hard.name, &p->hard.port_id);
+
+	/* Default */
+	status = default_init(p, params, numa_node);
+	if (status) {
+		free(p->params.hard.name);
+		rte_free(p);
+		return NULL;
+	}
+
+	/* Traffic Management (TM)*/
+	if (params->soft.flags & PMD_FEATURE_TM) {
+		status = tm_init(p, params, numa_node);
+		if (status) {
+			default_free(p);
+			free(p->params.hard.name);
+			rte_free(p);
+			return NULL;
+		}
+	}
+
+	return p;
+}
+
+static void
+pmd_free(struct pmd_internals *p)
+{
+	if (p->params.soft.flags & PMD_FEATURE_TM)
+		tm_free(p);
+
+	default_free(p);
+
+	free(p->params.hard.name);
+	rte_free(p);
+}
+
+static int
+pmd_ethdev_register(struct rte_vdev_device *vdev,
+	struct pmd_params *params,
+	void *dev_private)
+{
+	struct rte_eth_dev_info hard_info;
+	struct rte_eth_dev *soft_dev;
+	uint32_t hard_speed;
+	int numa_node;
+	uint16_t hard_port_id;
+
+	rte_eth_dev_get_port_by_name(params->hard.name, &hard_port_id);
+	rte_eth_dev_info_get(hard_port_id, &hard_info);
+	hard_speed = eth_dev_speed_max_mbps(hard_info.speed_capa);
+	numa_node = rte_eth_dev_socket_id(hard_port_id);
+
+	/* Ethdev entry allocation */
+	soft_dev = rte_eth_dev_allocate(params->soft.name);
+	if (!soft_dev)
+		return -ENOMEM;
+
+	/* dev */
+	soft_dev->rx_pkt_burst = (params->soft.intrusive) ?
+		NULL : /* set up later */
+		pmd_rx_pkt_burst;
+	soft_dev->tx_pkt_burst = pmd_tx_pkt_burst;
+	soft_dev->tx_pkt_prepare = NULL;
+	soft_dev->dev_ops = &pmd_ops;
+	soft_dev->device = &vdev->device;
+
+	/* dev->data */
+	soft_dev->data->dev_private = dev_private;
+	soft_dev->data->dev_link.link_speed = hard_speed;
+	soft_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
+	soft_dev->data->dev_link.link_autoneg = ETH_LINK_SPEED_FIXED;
+	soft_dev->data->dev_link.link_status = ETH_LINK_DOWN;
+	soft_dev->data->mac_addrs = &eth_addr;
+	soft_dev->data->promiscuous = 1;
+	soft_dev->data->kdrv = RTE_KDRV_NONE;
+	soft_dev->data->numa_node = numa_node;
+
+	return 0;
+}
+
+static int
+get_string(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	if (!value || !extra_args)
+		return -EINVAL;
+
+	*(char **)extra_args = strdup(value);
+
+	if (!*(char **)extra_args)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int
+get_uint32(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	if (!value || !extra_args)
+		return -EINVAL;
+
+	*(uint32_t *)extra_args = strtoull(value, NULL, 0);
+
+	return 0;
+}
+
+static int
+pmd_parse_args(struct pmd_params *p, const char *name, const char *params)
+{
+	struct rte_kvargs *kvlist;
+	int i, ret;
+
+	kvlist = rte_kvargs_parse(params, pmd_valid_args);
+	if (kvlist == NULL)
+		return -EINVAL;
+
+	/* Set default values */
+	memset(p, 0, sizeof(*p));
+	p->soft.name = name;
+	p->soft.intrusive = INTRUSIVE;
+	p->soft.tm.rate = 0;
+	p->soft.tm.nb_queues = SOFTNIC_SOFT_TM_NB_QUEUES;
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
+		p->soft.tm.qsize[i] = SOFTNIC_SOFT_TM_QUEUE_SIZE;
+	p->soft.tm.enq_bsz = SOFTNIC_SOFT_TM_ENQ_BSZ;
+	p->soft.tm.deq_bsz = SOFTNIC_SOFT_TM_DEQ_BSZ;
+	p->hard.tx_queue_id = SOFTNIC_HARD_TX_QUEUE_ID;
+
+	/* SOFT: TM (optional) */
+	if (rte_kvargs_count(kvlist, PMD_PARAM_SOFT_TM) == 1) {
+		char *s;
+
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_SOFT_TM,
+			&get_string, &s);
+		if (ret < 0)
+			goto out_free;
+
+		if (strcmp(s, "on") == 0)
+			p->soft.flags |= PMD_FEATURE_TM;
+		else if (strcmp(s, "off") == 0)
+			p->soft.flags &= ~PMD_FEATURE_TM;
+		else
+			ret = -EINVAL;
+
+		free(s);
+		if (ret)
+			goto out_free;
+	}
+
+	/* SOFT: TM rate (measured in bytes/second) (optional) */
+	if (rte_kvargs_count(kvlist, PMD_PARAM_SOFT_TM_RATE) == 1) {
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_SOFT_TM_RATE,
+			&get_uint32, &p->soft.tm.rate);
+		if (ret < 0)
+			goto out_free;
+
+		p->soft.flags |= PMD_FEATURE_TM;
+	}
+
+	/* SOFT: TM number of queues (optional) */
+	if (rte_kvargs_count(kvlist, PMD_PARAM_SOFT_TM_NB_QUEUES) == 1) {
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_SOFT_TM_NB_QUEUES,
+			&get_uint32, &p->soft.tm.nb_queues);
+		if (ret < 0)
+			goto out_free;
+
+		p->soft.flags |= PMD_FEATURE_TM;
+	}
+
+	/* SOFT: TM queue size 0 .. 3 (optional) */
+	if (rte_kvargs_count(kvlist, PMD_PARAM_SOFT_TM_QSIZE0) == 1) {
+		uint32_t qsize;
+
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_SOFT_TM_QSIZE0,
+			&get_uint32, &qsize);
+		if (ret < 0)
+			goto out_free;
+
+		p->soft.tm.qsize[0] = (uint16_t)qsize;
+		p->soft.flags |= PMD_FEATURE_TM;
+	}
+
+	if (rte_kvargs_count(kvlist, PMD_PARAM_SOFT_TM_QSIZE1) == 1) {
+		uint32_t qsize;
+
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_SOFT_TM_QSIZE1,
+			&get_uint32, &qsize);
+		if (ret < 0)
+			goto out_free;
+
+		p->soft.tm.qsize[1] = (uint16_t)qsize;
+		p->soft.flags |= PMD_FEATURE_TM;
+	}
+
+	if (rte_kvargs_count(kvlist, PMD_PARAM_SOFT_TM_QSIZE2) == 1) {
+		uint32_t qsize;
+
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_SOFT_TM_QSIZE2,
+			&get_uint32, &qsize);
+		if (ret < 0)
+			goto out_free;
+
+		p->soft.tm.qsize[2] = (uint16_t)qsize;
+		p->soft.flags |= PMD_FEATURE_TM;
+	}
+
+	if (rte_kvargs_count(kvlist, PMD_PARAM_SOFT_TM_QSIZE3) == 1) {
+		uint32_t qsize;
+
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_SOFT_TM_QSIZE3,
+			&get_uint32, &qsize);
+		if (ret < 0)
+			goto out_free;
+
+		p->soft.tm.qsize[3] = (uint16_t)qsize;
+		p->soft.flags |= PMD_FEATURE_TM;
+	}
+
+	/* SOFT: TM enqueue burst size (optional) */
+	if (rte_kvargs_count(kvlist, PMD_PARAM_SOFT_TM_ENQ_BSZ) == 1) {
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_SOFT_TM_ENQ_BSZ,
+			&get_uint32, &p->soft.tm.enq_bsz);
+		if (ret < 0)
+			goto out_free;
+
+		p->soft.flags |= PMD_FEATURE_TM;
+	}
+
+	/* SOFT: TM dequeue burst size (optional) */
+	if (rte_kvargs_count(kvlist, PMD_PARAM_SOFT_TM_DEQ_BSZ) == 1) {
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_SOFT_TM_DEQ_BSZ,
+			&get_uint32, &p->soft.tm.deq_bsz);
+		if (ret < 0)
+			goto out_free;
+
+		p->soft.flags |= PMD_FEATURE_TM;
+	}
+
+	/* HARD: name (mandatory) */
+	if (rte_kvargs_count(kvlist, PMD_PARAM_HARD_NAME) == 1) {
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_HARD_NAME,
+			&get_string, &p->hard.name);
+		if (ret < 0)
+			goto out_free;
+	} else {
+		ret = -EINVAL;
+		goto out_free;
+	}
+
+	/* HARD: tx_queue_id (optional) */
+	if (rte_kvargs_count(kvlist, PMD_PARAM_HARD_TX_QUEUE_ID) == 1) {
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_HARD_TX_QUEUE_ID,
+			&get_uint32, &p->hard.tx_queue_id);
+		if (ret < 0)
+			goto out_free;
+	}
+
+out_free:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
+static int
+pmd_probe(struct rte_vdev_device *vdev)
+{
+	struct pmd_params p;
+	const char *params;
+	int status;
+
+	struct rte_eth_dev_info hard_info;
+	uint32_t hard_speed;
+	uint16_t hard_port_id;
+	int numa_node;
+	void *dev_private;
+
+	RTE_LOG(INFO, PMD,
+		"Probing device \"%s\"\n",
+		rte_vdev_device_name(vdev));
+
+	/* Parse input arguments */
+	params = rte_vdev_device_args(vdev);
+	if (!params)
+		return -EINVAL;
+
+	status = pmd_parse_args(&p, rte_vdev_device_name(vdev), params);
+	if (status)
+		return status;
+
+	/* Check input arguments */
+	if (rte_eth_dev_get_port_by_name(p.hard.name, &hard_port_id))
+		return -EINVAL;
+
+	rte_eth_dev_info_get(hard_port_id, &hard_info);
+	hard_speed = eth_dev_speed_max_mbps(hard_info.speed_capa);
+	numa_node = rte_eth_dev_socket_id(hard_port_id);
+
+	if (p.hard.tx_queue_id >= hard_info.max_tx_queues)
+		return -EINVAL;
+
+	if (p.soft.flags & PMD_FEATURE_TM) {
+		status = tm_params_check(&p, hard_speed);
+
+		if (status)
+			return status;
+	}
+
+	/* Allocate and initialize soft ethdev private data */
+	dev_private = pmd_init(&p, numa_node);
+	if (dev_private == NULL)
+		return -ENOMEM;
+
+	/* Register soft ethdev */
+	RTE_LOG(INFO, PMD,
+		"Creating soft ethdev \"%s\" for hard ethdev \"%s\"\n",
+		p.soft.name, p.hard.name);
+
+	status = pmd_ethdev_register(vdev, &p, dev_private);
+	if (status) {
+		pmd_free(dev_private);
+		return status;
+	}
+
+	return 0;
+}
+
+static int
+pmd_remove(struct rte_vdev_device *vdev)
+{
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *p;
+
+	if (!vdev)
+		return -EINVAL;
+
+	RTE_LOG(INFO, PMD, "Removing device \"%s\"\n",
+		rte_vdev_device_name(vdev));
+
+	/* Find the ethdev entry */
+	dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
+	if (dev == NULL)
+		return -ENODEV;
+	p = dev->data->dev_private;
+
+	/* Free device data structures*/
+	pmd_free(p);
+	rte_free(dev->data);
+	rte_eth_dev_release_port(dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_softnic_drv = {
+	.probe = pmd_probe,
+	.remove = pmd_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_softnic, pmd_softnic_drv);
+RTE_PMD_REGISTER_PARAM_STRING(net_softnic,
+	PMD_PARAM_SOFT_TM	 "=on|off "
+	PMD_PARAM_SOFT_TM_RATE "=<int> "
+	PMD_PARAM_SOFT_TM_NB_QUEUES "=<int> "
+	PMD_PARAM_SOFT_TM_QSIZE0 "=<int> "
+	PMD_PARAM_SOFT_TM_QSIZE1 "=<int> "
+	PMD_PARAM_SOFT_TM_QSIZE2 "=<int> "
+	PMD_PARAM_SOFT_TM_QSIZE3 "=<int> "
+	PMD_PARAM_SOFT_TM_ENQ_BSZ "=<int> "
+	PMD_PARAM_SOFT_TM_DEQ_BSZ "=<int> "
+	PMD_PARAM_HARD_NAME "=<string> "
+	PMD_PARAM_HARD_TX_QUEUE_ID "=<int>");
diff --git a/drivers/net/xenvirt/rte_eth_xenvirt.h b/drivers/net/softnic/rte_eth_softnic.h
index 598adc6f..b49e5829 100644
--- a/drivers/net/xenvirt/rte_eth_xenvirt.h
+++ b/drivers/net/softnic/rte_eth_softnic.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -31,31 +31,53 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef _RTE_ETH_XENVIRT_H_
-#define _RTE_ETH_XENVIRT_H_
+#ifndef __INCLUDE_RTE_ETH_SOFTNIC_H__
+#define __INCLUDE_RTE_ETH_SOFTNIC_H__
+
+#include <stdint.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <rte_mempool.h>
+#ifndef SOFTNIC_SOFT_TM_NB_QUEUES
+#define SOFTNIC_SOFT_TM_NB_QUEUES			65536
+#endif
+
+#ifndef SOFTNIC_SOFT_TM_QUEUE_SIZE
+#define SOFTNIC_SOFT_TM_QUEUE_SIZE			64
+#endif
+
+#ifndef SOFTNIC_SOFT_TM_ENQ_BSZ
+#define SOFTNIC_SOFT_TM_ENQ_BSZ				32
+#endif
+
+#ifndef SOFTNIC_SOFT_TM_DEQ_BSZ
+#define SOFTNIC_SOFT_TM_DEQ_BSZ				24
+#endif
+
+#ifndef SOFTNIC_HARD_TX_QUEUE_ID
+#define SOFTNIC_HARD_TX_QUEUE_ID			0
+#endif
 
 /**
- * Creates mempool for xen virtio PMD.
- * This function uses memzone_reserve to allocate memory for meta data,
- * and uses grant alloc driver to allocate memory for data area.
- * The input parameters are exactly the same as rte_mempool_create.
+ * Run the traffic management function on the softnic device
+ *
+ * This function read the packets from the softnic input queues, insert into
+ * QoS scheduler queues based on mbuf sched field value and transmit the
+ * scheduled packets out through the hard device interface.
+ *
+ * @param portid
+ *    port id of the soft device.
+ * @return
+ *    zero.
  */
-struct rte_mempool *
-rte_mempool_gntalloc_create(const char *name, unsigned elt_num, unsigned elt_size,
-		   unsigned cache_size, unsigned private_data_size,
-		   rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		   rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
-		   int socket_id, unsigned flags);
 
+int
+rte_pmd_softnic_run(uint16_t port_id);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif
+#endif /* __INCLUDE_RTE_ETH_SOFTNIC_H__ */
diff --git a/drivers/net/softnic/rte_eth_softnic_internals.h b/drivers/net/softnic/rte_eth_softnic_internals.h
new file mode 100644
index 00000000..1f758069
--- /dev/null
+++ b/drivers/net/softnic/rte_eth_softnic_internals.h
@@ -0,0 +1,291 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_ETH_SOFTNIC_INTERNALS_H__
+#define __INCLUDE_RTE_ETH_SOFTNIC_INTERNALS_H__
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_sched.h>
+#include <rte_ethdev.h>
+#include <rte_tm_driver.h>
+
+#include "rte_eth_softnic.h"
+
+/**
+ * PMD Parameters
+ */
+
+enum pmd_feature {
+	PMD_FEATURE_TM = 1, /**< Traffic Management (TM) */
+};
+
+#ifndef INTRUSIVE
+#define INTRUSIVE					0
+#endif
+
+struct pmd_params {
+	/** Parameters for the soft device (to be created) */
+	struct {
+		const char *name; /**< Name */
+		uint32_t flags; /**< Flags */
+
+		/** 0 = Access hard device though API only (potentially slower,
+		 *      but safer);
+		 *  1 = Access hard device private data structures is allowed
+		 *      (potentially faster).
+		 */
+		int intrusive;
+
+		/** Traffic Management (TM) */
+		struct {
+			uint32_t rate; /**< Rate (bytes/second) */
+			uint32_t nb_queues; /**< Number of queues */
+			uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+			/**< Queue size per traffic class */
+			uint32_t enq_bsz; /**< Enqueue burst size */
+			uint32_t deq_bsz; /**< Dequeue burst size */
+		} tm;
+	} soft;
+
+	/** Parameters for the hard device (existing) */
+	struct {
+		char *name; /**< Name */
+		uint16_t tx_queue_id; /**< TX queue ID */
+	} hard;
+};
+
+/**
+ * Default Internals
+ */
+
+#ifndef DEFAULT_BURST_SIZE
+#define DEFAULT_BURST_SIZE				32
+#endif
+
+#ifndef FLUSH_COUNT_THRESHOLD
+#define FLUSH_COUNT_THRESHOLD			(1 << 17)
+#endif
+
+struct default_internals {
+	struct rte_mbuf **pkts;
+	uint32_t pkts_len;
+	uint32_t txq_pos;
+	uint32_t flush_count;
+};
+
+/**
+ * Traffic Management (TM) Internals
+ */
+
+#ifndef TM_MAX_SUBPORTS
+#define TM_MAX_SUBPORTS					8
+#endif
+
+#ifndef TM_MAX_PIPES_PER_SUBPORT
+#define TM_MAX_PIPES_PER_SUBPORT			4096
+#endif
+
+struct tm_params {
+	struct rte_sched_port_params port_params;
+
+	struct rte_sched_subport_params subport_params[TM_MAX_SUBPORTS];
+
+	struct rte_sched_pipe_params
+		pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PORT];
+	uint32_t n_pipe_profiles;
+	uint32_t pipe_to_profile[TM_MAX_SUBPORTS * TM_MAX_PIPES_PER_SUBPORT];
+};
+
+/* TM Levels */
+enum tm_node_level {
+	TM_NODE_LEVEL_PORT = 0,
+	TM_NODE_LEVEL_SUBPORT,
+	TM_NODE_LEVEL_PIPE,
+	TM_NODE_LEVEL_TC,
+	TM_NODE_LEVEL_QUEUE,
+	TM_NODE_LEVEL_MAX,
+};
+
+/* TM Shaper Profile */
+struct tm_shaper_profile {
+	TAILQ_ENTRY(tm_shaper_profile) node;
+	uint32_t shaper_profile_id;
+	uint32_t n_users;
+	struct rte_tm_shaper_params params;
+};
+
+TAILQ_HEAD(tm_shaper_profile_list, tm_shaper_profile);
+
+/* TM Shared Shaper */
+struct tm_shared_shaper {
+	TAILQ_ENTRY(tm_shared_shaper) node;
+	uint32_t shared_shaper_id;
+	uint32_t n_users;
+	uint32_t shaper_profile_id;
+};
+
+TAILQ_HEAD(tm_shared_shaper_list, tm_shared_shaper);
+
+/* TM WRED Profile */
+struct tm_wred_profile {
+	TAILQ_ENTRY(tm_wred_profile) node;
+	uint32_t wred_profile_id;
+	uint32_t n_users;
+	struct rte_tm_wred_params params;
+};
+
+TAILQ_HEAD(tm_wred_profile_list, tm_wred_profile);
+
+/* TM Node */
+struct tm_node {
+	TAILQ_ENTRY(tm_node) node;
+	uint32_t node_id;
+	uint32_t parent_node_id;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t level;
+	struct tm_node *parent_node;
+	struct tm_shaper_profile *shaper_profile;
+	struct tm_wred_profile *wred_profile;
+	struct rte_tm_node_params params;
+	struct rte_tm_node_stats stats;
+	uint32_t n_children;
+};
+
+TAILQ_HEAD(tm_node_list, tm_node);
+
+/* TM Hierarchy Specification */
+struct tm_hierarchy {
+	struct tm_shaper_profile_list shaper_profiles;
+	struct tm_shared_shaper_list shared_shapers;
+	struct tm_wred_profile_list wred_profiles;
+	struct tm_node_list nodes;
+
+	uint32_t n_shaper_profiles;
+	uint32_t n_shared_shapers;
+	uint32_t n_wred_profiles;
+	uint32_t n_nodes;
+
+	uint32_t n_tm_nodes[TM_NODE_LEVEL_MAX];
+};
+
+struct tm_internals {
+	/** Hierarchy specification
+	 *
+	 *     -Hierarchy is unfrozen at init and when port is stopped.
+	 *     -Hierarchy is frozen on successful hierarchy commit.
+	 *     -Run-time hierarchy changes are not allowed, therefore it makes
+	 *      sense to keep the hierarchy frozen after the port is started.
+	 */
+	struct tm_hierarchy h;
+	int hierarchy_frozen;
+
+	/** Blueprints */
+	struct tm_params params;
+
+	/** Run-time */
+	struct rte_sched_port *sched;
+	struct rte_mbuf **pkts_enq;
+	struct rte_mbuf **pkts_deq;
+	uint32_t pkts_enq_len;
+	uint32_t txq_pos;
+	uint32_t flush_count;
+};
+
+/**
+ * PMD Internals
+ */
+struct pmd_internals {
+	/** Params */
+	struct pmd_params params;
+
+	/** Soft device */
+	struct {
+		struct default_internals def; /**< Default */
+		struct tm_internals tm; /**< Traffic Management */
+	} soft;
+
+	/** Hard device */
+	struct {
+		uint16_t port_id;
+	} hard;
+};
+
+struct pmd_rx_queue {
+	/** Hard device */
+	struct {
+		uint16_t port_id;
+		uint16_t rx_queue_id;
+	} hard;
+};
+
+/**
+ * Traffic Management (TM) Operation
+ */
+extern const struct rte_tm_ops pmd_tm_ops;
+
+int
+tm_params_check(struct pmd_params *params, uint32_t hard_rate);
+
+int
+tm_init(struct pmd_internals *p, struct pmd_params *params, int numa_node);
+
+void
+tm_free(struct pmd_internals *p);
+
+int
+tm_start(struct pmd_internals *p);
+
+void
+tm_stop(struct pmd_internals *p);
+
+static inline int
+tm_enabled(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	return (p->params.soft.flags & PMD_FEATURE_TM);
+}
+
+static inline int
+tm_used(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	return (p->params.soft.flags & PMD_FEATURE_TM) &&
+		p->soft.tm.h.n_tm_nodes[TM_NODE_LEVEL_PORT];
+}
+
+#endif /* __INCLUDE_RTE_ETH_SOFTNIC_INTERNALS_H__ */
diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
new file mode 100644
index 00000000..dbb25143
--- /dev/null
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -0,0 +1,3452 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_malloc.h>
+
+#include "rte_eth_softnic_internals.h"
+#include "rte_eth_softnic.h"
+
+#define BYTES_IN_MBPS		(1000 * 1000 / 8)
+#define SUBPORT_TC_PERIOD	10
+#define PIPE_TC_PERIOD		40
+
+int
+tm_params_check(struct pmd_params *params, uint32_t hard_rate)
+{
+	uint64_t hard_rate_bytes_per_sec = (uint64_t)hard_rate * BYTES_IN_MBPS;
+	uint32_t i;
+
+	/* rate */
+	if (params->soft.tm.rate) {
+		if (params->soft.tm.rate > hard_rate_bytes_per_sec)
+			return -EINVAL;
+	} else {
+		params->soft.tm.rate =
+			(hard_rate_bytes_per_sec > UINT32_MAX) ?
+				UINT32_MAX : hard_rate_bytes_per_sec;
+	}
+
+	/* nb_queues */
+	if (params->soft.tm.nb_queues == 0)
+		return -EINVAL;
+
+	if (params->soft.tm.nb_queues < RTE_SCHED_QUEUES_PER_PIPE)
+		params->soft.tm.nb_queues = RTE_SCHED_QUEUES_PER_PIPE;
+
+	params->soft.tm.nb_queues =
+		rte_align32pow2(params->soft.tm.nb_queues);
+
+	/* qsize */
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->soft.tm.qsize[i] == 0)
+			return -EINVAL;
+
+		params->soft.tm.qsize[i] =
+			rte_align32pow2(params->soft.tm.qsize[i]);
+	}
+
+	/* enq_bsz, deq_bsz */
+	if (params->soft.tm.enq_bsz == 0 ||
+		params->soft.tm.deq_bsz == 0 ||
+		params->soft.tm.deq_bsz >= params->soft.tm.enq_bsz)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void
+tm_hierarchy_init(struct pmd_internals *p)
+{
+	memset(&p->soft.tm.h, 0, sizeof(p->soft.tm.h));
+
+	/* Initialize shaper profile list */
+	TAILQ_INIT(&p->soft.tm.h.shaper_profiles);
+
+	/* Initialize shared shaper list */
+	TAILQ_INIT(&p->soft.tm.h.shared_shapers);
+
+	/* Initialize wred profile list */
+	TAILQ_INIT(&p->soft.tm.h.wred_profiles);
+
+	/* Initialize TM node list */
+	TAILQ_INIT(&p->soft.tm.h.nodes);
+}
+
+static void
+tm_hierarchy_uninit(struct pmd_internals *p)
+{
+	/* Remove all nodes*/
+	for ( ; ; ) {
+		struct tm_node *tm_node;
+
+		tm_node = TAILQ_FIRST(&p->soft.tm.h.nodes);
+		if (tm_node == NULL)
+			break;
+
+		TAILQ_REMOVE(&p->soft.tm.h.nodes, tm_node, node);
+		free(tm_node);
+	}
+
+	/* Remove all WRED profiles */
+	for ( ; ; ) {
+		struct tm_wred_profile *wred_profile;
+
+		wred_profile = TAILQ_FIRST(&p->soft.tm.h.wred_profiles);
+		if (wred_profile == NULL)
+			break;
+
+		TAILQ_REMOVE(&p->soft.tm.h.wred_profiles, wred_profile, node);
+		free(wred_profile);
+	}
+
+	/* Remove all shared shapers */
+	for ( ; ; ) {
+		struct tm_shared_shaper *shared_shaper;
+
+		shared_shaper = TAILQ_FIRST(&p->soft.tm.h.shared_shapers);
+		if (shared_shaper == NULL)
+			break;
+
+		TAILQ_REMOVE(&p->soft.tm.h.shared_shapers, shared_shaper, node);
+		free(shared_shaper);
+	}
+
+	/* Remove all shaper profiles */
+	for ( ; ; ) {
+		struct tm_shaper_profile *shaper_profile;
+
+		shaper_profile = TAILQ_FIRST(&p->soft.tm.h.shaper_profiles);
+		if (shaper_profile == NULL)
+			break;
+
+		TAILQ_REMOVE(&p->soft.tm.h.shaper_profiles,
+			shaper_profile, node);
+		free(shaper_profile);
+	}
+
+	memset(&p->soft.tm.h, 0, sizeof(p->soft.tm.h));
+}
+
+int
+tm_init(struct pmd_internals *p,
+	struct pmd_params *params,
+	int numa_node)
+{
+	uint32_t enq_bsz = params->soft.tm.enq_bsz;
+	uint32_t deq_bsz = params->soft.tm.deq_bsz;
+
+	p->soft.tm.pkts_enq = rte_zmalloc_socket(params->soft.name,
+		2 * enq_bsz * sizeof(struct rte_mbuf *),
+		0,
+		numa_node);
+
+	if (p->soft.tm.pkts_enq == NULL)
+		return -ENOMEM;
+
+	p->soft.tm.pkts_deq = rte_zmalloc_socket(params->soft.name,
+		deq_bsz * sizeof(struct rte_mbuf *),
+		0,
+		numa_node);
+
+	if (p->soft.tm.pkts_deq == NULL) {
+		rte_free(p->soft.tm.pkts_enq);
+		return -ENOMEM;
+	}
+
+	tm_hierarchy_init(p);
+
+	return 0;
+}
+
+void
+tm_free(struct pmd_internals *p)
+{
+	tm_hierarchy_uninit(p);
+	rte_free(p->soft.tm.pkts_enq);
+	rte_free(p->soft.tm.pkts_deq);
+}
+
+int
+tm_start(struct pmd_internals *p)
+{
+	struct tm_params *t = &p->soft.tm.params;
+	uint32_t n_subports, subport_id;
+	int status;
+
+	/* Is hierarchy frozen? */
+	if (p->soft.tm.hierarchy_frozen == 0)
+		return -1;
+
+	/* Port */
+	p->soft.tm.sched = rte_sched_port_config(&t->port_params);
+	if (p->soft.tm.sched == NULL)
+		return -1;
+
+	/* Subport */
+	n_subports = t->port_params.n_subports_per_port;
+	for (subport_id = 0; subport_id < n_subports; subport_id++) {
+		uint32_t n_pipes_per_subport =
+			t->port_params.n_pipes_per_subport;
+		uint32_t pipe_id;
+
+		status = rte_sched_subport_config(p->soft.tm.sched,
+			subport_id,
+			&t->subport_params[subport_id]);
+		if (status) {
+			rte_sched_port_free(p->soft.tm.sched);
+			return -1;
+		}
+
+		/* Pipe */
+		n_pipes_per_subport = t->port_params.n_pipes_per_subport;
+		for (pipe_id = 0; pipe_id < n_pipes_per_subport; pipe_id++) {
+			int pos = subport_id * TM_MAX_PIPES_PER_SUBPORT +
+				pipe_id;
+			int profile_id = t->pipe_to_profile[pos];
+
+			if (profile_id < 0)
+				continue;
+
+			status = rte_sched_pipe_config(p->soft.tm.sched,
+				subport_id,
+				pipe_id,
+				profile_id);
+			if (status) {
+				rte_sched_port_free(p->soft.tm.sched);
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+void
+tm_stop(struct pmd_internals *p)
+{
+	if (p->soft.tm.sched)
+		rte_sched_port_free(p->soft.tm.sched);
+
+	/* Unfreeze hierarchy */
+	p->soft.tm.hierarchy_frozen = 0;
+}
+
+static struct tm_shaper_profile *
+tm_shaper_profile_search(struct rte_eth_dev *dev, uint32_t shaper_profile_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_shaper_profile_list *spl = &p->soft.tm.h.shaper_profiles;
+	struct tm_shaper_profile *sp;
+
+	TAILQ_FOREACH(sp, spl, node)
+		if (shaper_profile_id == sp->shaper_profile_id)
+			return sp;
+
+	return NULL;
+}
+
+static struct tm_shared_shaper *
+tm_shared_shaper_search(struct rte_eth_dev *dev, uint32_t shared_shaper_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_shared_shaper_list *ssl = &p->soft.tm.h.shared_shapers;
+	struct tm_shared_shaper *ss;
+
+	TAILQ_FOREACH(ss, ssl, node)
+		if (shared_shaper_id == ss->shared_shaper_id)
+			return ss;
+
+	return NULL;
+}
+
+static struct tm_wred_profile *
+tm_wred_profile_search(struct rte_eth_dev *dev, uint32_t wred_profile_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_wred_profile_list *wpl = &p->soft.tm.h.wred_profiles;
+	struct tm_wred_profile *wp;
+
+	TAILQ_FOREACH(wp, wpl, node)
+		if (wred_profile_id == wp->wred_profile_id)
+			return wp;
+
+	return NULL;
+}
+
+static struct tm_node *
+tm_node_search(struct rte_eth_dev *dev, uint32_t node_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_node_list *nl = &p->soft.tm.h.nodes;
+	struct tm_node *n;
+
+	TAILQ_FOREACH(n, nl, node)
+		if (n->node_id == node_id)
+			return n;
+
+	return NULL;
+}
+
+static struct tm_node *
+tm_root_node_present(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_node_list *nl = &p->soft.tm.h.nodes;
+	struct tm_node *n;
+
+	TAILQ_FOREACH(n, nl, node)
+		if (n->parent_node_id == RTE_TM_NODE_ID_NULL)
+			return n;
+
+	return NULL;
+}
+
+static uint32_t
+tm_node_subport_id(struct rte_eth_dev *dev, struct tm_node *subport_node)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_node_list *nl = &p->soft.tm.h.nodes;
+	struct tm_node *ns;
+	uint32_t subport_id;
+
+	subport_id = 0;
+	TAILQ_FOREACH(ns, nl, node) {
+		if (ns->level != TM_NODE_LEVEL_SUBPORT)
+			continue;
+
+		if (ns->node_id == subport_node->node_id)
+			return subport_id;
+
+		subport_id++;
+	}
+
+	return UINT32_MAX;
+}
+
+static uint32_t
+tm_node_pipe_id(struct rte_eth_dev *dev, struct tm_node *pipe_node)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_node_list *nl = &p->soft.tm.h.nodes;
+	struct tm_node *np;
+	uint32_t pipe_id;
+
+	pipe_id = 0;
+	TAILQ_FOREACH(np, nl, node) {
+		if (np->level != TM_NODE_LEVEL_PIPE ||
+			np->parent_node_id != pipe_node->parent_node_id)
+			continue;
+
+		if (np->node_id == pipe_node->node_id)
+			return pipe_id;
+
+		pipe_id++;
+	}
+
+	return UINT32_MAX;
+}
+
+static uint32_t
+tm_node_tc_id(struct rte_eth_dev *dev __rte_unused, struct tm_node *tc_node)
+{
+	return tc_node->priority;
+}
+
+static uint32_t
+tm_node_queue_id(struct rte_eth_dev *dev, struct tm_node *queue_node)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_node_list *nl = &p->soft.tm.h.nodes;
+	struct tm_node *nq;
+	uint32_t queue_id;
+
+	queue_id = 0;
+	TAILQ_FOREACH(nq, nl, node) {
+		if (nq->level != TM_NODE_LEVEL_QUEUE ||
+			nq->parent_node_id != queue_node->parent_node_id)
+			continue;
+
+		if (nq->node_id == queue_node->node_id)
+			return queue_id;
+
+		queue_id++;
+	}
+
+	return UINT32_MAX;
+}
+
+static uint32_t
+tm_level_get_max_nodes(struct rte_eth_dev *dev, enum tm_node_level level)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	uint32_t n_queues_max = p->params.soft.tm.nb_queues;
+	uint32_t n_tc_max = n_queues_max / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+	uint32_t n_pipes_max = n_tc_max / RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE;
+	uint32_t n_subports_max = n_pipes_max;
+	uint32_t n_root_max = 1;
+
+	switch (level) {
+	case TM_NODE_LEVEL_PORT:
+		return n_root_max;
+	case TM_NODE_LEVEL_SUBPORT:
+		return n_subports_max;
+	case TM_NODE_LEVEL_PIPE:
+		return n_pipes_max;
+	case TM_NODE_LEVEL_TC:
+		return n_tc_max;
+	case TM_NODE_LEVEL_QUEUE:
+	default:
+		return n_queues_max;
+	}
+}
+
+/* Traffic manager node type get */
+static int
+pmd_tm_node_type_get(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	int *is_leaf,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	if (is_leaf == NULL)
+		return -rte_tm_error_set(error,
+		   EINVAL,
+		   RTE_TM_ERROR_TYPE_UNSPECIFIED,
+		   NULL,
+		   rte_strerror(EINVAL));
+
+	if (node_id == RTE_TM_NODE_ID_NULL ||
+		(tm_node_search(dev, node_id) == NULL))
+		return -rte_tm_error_set(error,
+		   EINVAL,
+		   RTE_TM_ERROR_TYPE_NODE_ID,
+		   NULL,
+		   rte_strerror(EINVAL));
+
+	*is_leaf = node_id < p->params.soft.tm.nb_queues;
+
+	return 0;
+}
+
+#ifdef RTE_SCHED_RED
+#define WRED_SUPPORTED						1
+#else
+#define WRED_SUPPORTED						0
+#endif
+
+#define STATS_MASK_DEFAULT					\
+	(RTE_TM_STATS_N_PKTS |					\
+	RTE_TM_STATS_N_BYTES |					\
+	RTE_TM_STATS_N_PKTS_GREEN_DROPPED |			\
+	RTE_TM_STATS_N_BYTES_GREEN_DROPPED)
+
+#define STATS_MASK_QUEUE						\
+	(STATS_MASK_DEFAULT |					\
+	RTE_TM_STATS_N_PKTS_QUEUED)
+
+static const struct rte_tm_capabilities tm_cap = {
+	.n_nodes_max = UINT32_MAX,
+	.n_levels_max = TM_NODE_LEVEL_MAX,
+
+	.non_leaf_nodes_identical = 0,
+	.leaf_nodes_identical = 1,
+
+	.shaper_n_max = UINT32_MAX,
+	.shaper_private_n_max = UINT32_MAX,
+	.shaper_private_dual_rate_n_max = 0,
+	.shaper_private_rate_min = 1,
+	.shaper_private_rate_max = UINT32_MAX,
+
+	.shaper_shared_n_max = UINT32_MAX,
+	.shaper_shared_n_nodes_per_shaper_max = UINT32_MAX,
+	.shaper_shared_n_shapers_per_node_max = 1,
+	.shaper_shared_dual_rate_n_max = 0,
+	.shaper_shared_rate_min = 1,
+	.shaper_shared_rate_max = UINT32_MAX,
+
+	.shaper_pkt_length_adjust_min = RTE_TM_ETH_FRAMING_OVERHEAD_FCS,
+	.shaper_pkt_length_adjust_max = RTE_TM_ETH_FRAMING_OVERHEAD_FCS,
+
+	.sched_n_children_max = UINT32_MAX,
+	.sched_sp_n_priorities_max = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE,
+	.sched_wfq_n_children_per_group_max = UINT32_MAX,
+	.sched_wfq_n_groups_max = 1,
+	.sched_wfq_weight_max = UINT32_MAX,
+
+	.cman_head_drop_supported = 0,
+	.cman_wred_context_n_max = 0,
+	.cman_wred_context_private_n_max = 0,
+	.cman_wred_context_shared_n_max = 0,
+	.cman_wred_context_shared_n_nodes_per_context_max = 0,
+	.cman_wred_context_shared_n_contexts_per_node_max = 0,
+
+	.mark_vlan_dei_supported = {0, 0, 0},
+	.mark_ip_ecn_tcp_supported = {0, 0, 0},
+	.mark_ip_ecn_sctp_supported = {0, 0, 0},
+	.mark_ip_dscp_supported = {0, 0, 0},
+
+	.dynamic_update_mask = 0,
+
+	.stats_mask = STATS_MASK_QUEUE,
+};
+
+/* Traffic manager capabilities get */
+static int
+pmd_tm_capabilities_get(struct rte_eth_dev *dev __rte_unused,
+	struct rte_tm_capabilities *cap,
+	struct rte_tm_error *error)
+{
+	if (cap == NULL)
+		return -rte_tm_error_set(error,
+		   EINVAL,
+		   RTE_TM_ERROR_TYPE_CAPABILITIES,
+		   NULL,
+		   rte_strerror(EINVAL));
+
+	memcpy(cap, &tm_cap, sizeof(*cap));
+
+	cap->n_nodes_max = tm_level_get_max_nodes(dev, TM_NODE_LEVEL_PORT) +
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_SUBPORT) +
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_PIPE) +
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_TC) +
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_QUEUE);
+
+	cap->shaper_private_n_max =
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_PORT) +
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_SUBPORT) +
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_PIPE) +
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_TC);
+
+	cap->shaper_shared_n_max = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE *
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_SUBPORT);
+
+	cap->shaper_n_max = cap->shaper_private_n_max +
+		cap->shaper_shared_n_max;
+
+	cap->shaper_shared_n_nodes_per_shaper_max =
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_PIPE);
+
+	cap->sched_n_children_max = RTE_MAX(
+		tm_level_get_max_nodes(dev, TM_NODE_LEVEL_PIPE),
+		(uint32_t)RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE);
+
+	cap->sched_wfq_n_children_per_group_max = cap->sched_n_children_max;
+
+	if (WRED_SUPPORTED)
+		cap->cman_wred_context_private_n_max =
+			tm_level_get_max_nodes(dev, TM_NODE_LEVEL_QUEUE);
+
+	cap->cman_wred_context_n_max = cap->cman_wred_context_private_n_max +
+		cap->cman_wred_context_shared_n_max;
+
+	return 0;
+}
+
+static const struct rte_tm_level_capabilities tm_level_cap[] = {
+	[TM_NODE_LEVEL_PORT] = {
+		.n_nodes_max = 1,
+		.n_nodes_nonleaf_max = 1,
+		.n_nodes_leaf_max = 0,
+		.non_leaf_nodes_identical = 1,
+		.leaf_nodes_identical = 0,
+
+		.nonleaf = {
+			.shaper_private_supported = 1,
+			.shaper_private_dual_rate_supported = 0,
+			.shaper_private_rate_min = 1,
+			.shaper_private_rate_max = UINT32_MAX,
+			.shaper_shared_n_max = 0,
+
+			.sched_n_children_max = UINT32_MAX,
+			.sched_sp_n_priorities_max = 1,
+			.sched_wfq_n_children_per_group_max = UINT32_MAX,
+			.sched_wfq_n_groups_max = 1,
+			.sched_wfq_weight_max = 1,
+
+			.stats_mask = STATS_MASK_DEFAULT,
+		},
+	},
+
+	[TM_NODE_LEVEL_SUBPORT] = {
+		.n_nodes_max = UINT32_MAX,
+		.n_nodes_nonleaf_max = UINT32_MAX,
+		.n_nodes_leaf_max = 0,
+		.non_leaf_nodes_identical = 1,
+		.leaf_nodes_identical = 0,
+
+		.nonleaf = {
+			.shaper_private_supported = 1,
+			.shaper_private_dual_rate_supported = 0,
+			.shaper_private_rate_min = 1,
+			.shaper_private_rate_max = UINT32_MAX,
+			.shaper_shared_n_max = 0,
+
+			.sched_n_children_max = UINT32_MAX,
+			.sched_sp_n_priorities_max = 1,
+			.sched_wfq_n_children_per_group_max = UINT32_MAX,
+			.sched_wfq_n_groups_max = 1,
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+			.sched_wfq_weight_max = UINT32_MAX,
+#else
+			.sched_wfq_weight_max = 1,
+#endif
+			.stats_mask = STATS_MASK_DEFAULT,
+		},
+	},
+
+	[TM_NODE_LEVEL_PIPE] = {
+		.n_nodes_max = UINT32_MAX,
+		.n_nodes_nonleaf_max = UINT32_MAX,
+		.n_nodes_leaf_max = 0,
+		.non_leaf_nodes_identical = 1,
+		.leaf_nodes_identical = 0,
+
+		.nonleaf = {
+			.shaper_private_supported = 1,
+			.shaper_private_dual_rate_supported = 0,
+			.shaper_private_rate_min = 1,
+			.shaper_private_rate_max = UINT32_MAX,
+			.shaper_shared_n_max = 0,
+
+			.sched_n_children_max =
+				RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE,
+			.sched_sp_n_priorities_max =
+				RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE,
+			.sched_wfq_n_children_per_group_max = 1,
+			.sched_wfq_n_groups_max = 0,
+			.sched_wfq_weight_max = 1,
+
+			.stats_mask = STATS_MASK_DEFAULT,
+		},
+	},
+
+	[TM_NODE_LEVEL_TC] = {
+		.n_nodes_max = UINT32_MAX,
+		.n_nodes_nonleaf_max = UINT32_MAX,
+		.n_nodes_leaf_max = 0,
+		.non_leaf_nodes_identical = 1,
+		.leaf_nodes_identical = 0,
+
+		.nonleaf = {
+			.shaper_private_supported = 1,
+			.shaper_private_dual_rate_supported = 0,
+			.shaper_private_rate_min = 1,
+			.shaper_private_rate_max = UINT32_MAX,
+			.shaper_shared_n_max = 1,
+
+			.sched_n_children_max =
+				RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+			.sched_sp_n_priorities_max = 1,
+			.sched_wfq_n_children_per_group_max =
+				RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+			.sched_wfq_n_groups_max = 1,
+			.sched_wfq_weight_max = UINT32_MAX,
+
+			.stats_mask = STATS_MASK_DEFAULT,
+		},
+	},
+
+	[TM_NODE_LEVEL_QUEUE] = {
+		.n_nodes_max = UINT32_MAX,
+		.n_nodes_nonleaf_max = 0,
+		.n_nodes_leaf_max = UINT32_MAX,
+		.non_leaf_nodes_identical = 0,
+		.leaf_nodes_identical = 1,
+
+		.leaf = {
+			.shaper_private_supported = 0,
+			.shaper_private_dual_rate_supported = 0,
+			.shaper_private_rate_min = 0,
+			.shaper_private_rate_max = 0,
+			.shaper_shared_n_max = 0,
+
+			.cman_head_drop_supported = 0,
+			.cman_wred_context_private_supported = WRED_SUPPORTED,
+			.cman_wred_context_shared_n_max = 0,
+
+			.stats_mask = STATS_MASK_QUEUE,
+		},
+	},
+};
+
+/* Traffic manager level capabilities get */
+static int
+pmd_tm_level_capabilities_get(struct rte_eth_dev *dev __rte_unused,
+	uint32_t level_id,
+	struct rte_tm_level_capabilities *cap,
+	struct rte_tm_error *error)
+{
+	if (cap == NULL)
+		return -rte_tm_error_set(error,
+		   EINVAL,
+		   RTE_TM_ERROR_TYPE_CAPABILITIES,
+		   NULL,
+		   rte_strerror(EINVAL));
+
+	if (level_id >= TM_NODE_LEVEL_MAX)
+		return -rte_tm_error_set(error,
+		   EINVAL,
+		   RTE_TM_ERROR_TYPE_LEVEL_ID,
+		   NULL,
+		   rte_strerror(EINVAL));
+
+	memcpy(cap, &tm_level_cap[level_id], sizeof(*cap));
+
+	switch (level_id) {
+	case TM_NODE_LEVEL_PORT:
+		cap->nonleaf.sched_n_children_max =
+			tm_level_get_max_nodes(dev,
+				TM_NODE_LEVEL_SUBPORT);
+		cap->nonleaf.sched_wfq_n_children_per_group_max =
+			cap->nonleaf.sched_n_children_max;
+		break;
+
+	case TM_NODE_LEVEL_SUBPORT:
+		cap->n_nodes_max = tm_level_get_max_nodes(dev,
+			TM_NODE_LEVEL_SUBPORT);
+		cap->n_nodes_nonleaf_max = cap->n_nodes_max;
+		cap->nonleaf.sched_n_children_max =
+			tm_level_get_max_nodes(dev,
+				TM_NODE_LEVEL_PIPE);
+		cap->nonleaf.sched_wfq_n_children_per_group_max =
+			cap->nonleaf.sched_n_children_max;
+		break;
+
+	case TM_NODE_LEVEL_PIPE:
+		cap->n_nodes_max = tm_level_get_max_nodes(dev,
+			TM_NODE_LEVEL_PIPE);
+		cap->n_nodes_nonleaf_max = cap->n_nodes_max;
+		break;
+
+	case TM_NODE_LEVEL_TC:
+		cap->n_nodes_max = tm_level_get_max_nodes(dev,
+			TM_NODE_LEVEL_TC);
+		cap->n_nodes_nonleaf_max = cap->n_nodes_max;
+		break;
+
+	case TM_NODE_LEVEL_QUEUE:
+	default:
+		cap->n_nodes_max = tm_level_get_max_nodes(dev,
+			TM_NODE_LEVEL_QUEUE);
+		cap->n_nodes_leaf_max = cap->n_nodes_max;
+		break;
+	}
+
+	return 0;
+}
+
+static const struct rte_tm_node_capabilities tm_node_cap[] = {
+	[TM_NODE_LEVEL_PORT] = {
+		.shaper_private_supported = 1,
+		.shaper_private_dual_rate_supported = 0,
+		.shaper_private_rate_min = 1,
+		.shaper_private_rate_max = UINT32_MAX,
+		.shaper_shared_n_max = 0,
+
+		.nonleaf = {
+			.sched_n_children_max = UINT32_MAX,
+			.sched_sp_n_priorities_max = 1,
+			.sched_wfq_n_children_per_group_max = UINT32_MAX,
+			.sched_wfq_n_groups_max = 1,
+			.sched_wfq_weight_max = 1,
+		},
+
+		.stats_mask = STATS_MASK_DEFAULT,
+	},
+
+	[TM_NODE_LEVEL_SUBPORT] = {
+		.shaper_private_supported = 1,
+		.shaper_private_dual_rate_supported = 0,
+		.shaper_private_rate_min = 1,
+		.shaper_private_rate_max = UINT32_MAX,
+		.shaper_shared_n_max = 0,
+
+		.nonleaf = {
+			.sched_n_children_max = UINT32_MAX,
+			.sched_sp_n_priorities_max = 1,
+			.sched_wfq_n_children_per_group_max = UINT32_MAX,
+			.sched_wfq_n_groups_max = 1,
+			.sched_wfq_weight_max = UINT32_MAX,
+		},
+
+		.stats_mask = STATS_MASK_DEFAULT,
+	},
+
+	[TM_NODE_LEVEL_PIPE] = {
+		.shaper_private_supported = 1,
+		.shaper_private_dual_rate_supported = 0,
+		.shaper_private_rate_min = 1,
+		.shaper_private_rate_max = UINT32_MAX,
+		.shaper_shared_n_max = 0,
+
+		.nonleaf = {
+			.sched_n_children_max =
+				RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE,
+			.sched_sp_n_priorities_max =
+				RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE,
+			.sched_wfq_n_children_per_group_max = 1,
+			.sched_wfq_n_groups_max = 0,
+			.sched_wfq_weight_max = 1,
+		},
+
+		.stats_mask = STATS_MASK_DEFAULT,
+	},
+
+	[TM_NODE_LEVEL_TC] = {
+		.shaper_private_supported = 1,
+		.shaper_private_dual_rate_supported = 0,
+		.shaper_private_rate_min = 1,
+		.shaper_private_rate_max = UINT32_MAX,
+		.shaper_shared_n_max = 1,
+
+		.nonleaf = {
+			.sched_n_children_max =
+				RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+			.sched_sp_n_priorities_max = 1,
+			.sched_wfq_n_children_per_group_max =
+				RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+			.sched_wfq_n_groups_max = 1,
+			.sched_wfq_weight_max = UINT32_MAX,
+		},
+
+		.stats_mask = STATS_MASK_DEFAULT,
+	},
+
+	[TM_NODE_LEVEL_QUEUE] = {
+		.shaper_private_supported = 0,
+		.shaper_private_dual_rate_supported = 0,
+		.shaper_private_rate_min = 0,
+		.shaper_private_rate_max = 0,
+		.shaper_shared_n_max = 0,
+
+
+		.leaf = {
+			.cman_head_drop_supported = 0,
+			.cman_wred_context_private_supported = WRED_SUPPORTED,
+			.cman_wred_context_shared_n_max = 0,
+		},
+
+		.stats_mask = STATS_MASK_QUEUE,
+	},
+};
+
+/* Traffic manager node capabilities get */
+static int
+pmd_tm_node_capabilities_get(struct rte_eth_dev *dev __rte_unused,
+	uint32_t node_id,
+	struct rte_tm_node_capabilities *cap,
+	struct rte_tm_error *error)
+{
+	struct tm_node *tm_node;
+
+	if (cap == NULL)
+		return -rte_tm_error_set(error,
+		   EINVAL,
+		   RTE_TM_ERROR_TYPE_CAPABILITIES,
+		   NULL,
+		   rte_strerror(EINVAL));
+
+	tm_node = tm_node_search(dev, node_id);
+	if (tm_node == NULL)
+		return -rte_tm_error_set(error,
+		   EINVAL,
+		   RTE_TM_ERROR_TYPE_NODE_ID,
+		   NULL,
+		   rte_strerror(EINVAL));
+
+	memcpy(cap, &tm_node_cap[tm_node->level], sizeof(*cap));
+
+	switch (tm_node->level) {
+	case TM_NODE_LEVEL_PORT:
+		cap->nonleaf.sched_n_children_max =
+			tm_level_get_max_nodes(dev,
+				TM_NODE_LEVEL_SUBPORT);
+		cap->nonleaf.sched_wfq_n_children_per_group_max =
+			cap->nonleaf.sched_n_children_max;
+		break;
+
+	case TM_NODE_LEVEL_SUBPORT:
+		cap->nonleaf.sched_n_children_max =
+			tm_level_get_max_nodes(dev,
+				TM_NODE_LEVEL_PIPE);
+		cap->nonleaf.sched_wfq_n_children_per_group_max =
+			cap->nonleaf.sched_n_children_max;
+		break;
+
+	case TM_NODE_LEVEL_PIPE:
+	case TM_NODE_LEVEL_TC:
+	case TM_NODE_LEVEL_QUEUE:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int
+shaper_profile_check(struct rte_eth_dev *dev,
+	uint32_t shaper_profile_id,
+	struct rte_tm_shaper_params *profile,
+	struct rte_tm_error *error)
+{
+	struct tm_shaper_profile *sp;
+
+	/* Shaper profile ID must not be NONE. */
+	if (shaper_profile_id == RTE_TM_SHAPER_PROFILE_ID_NONE)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Shaper profile must not exist. */
+	sp = tm_shaper_profile_search(dev, shaper_profile_id);
+	if (sp)
+		return -rte_tm_error_set(error,
+			EEXIST,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+			NULL,
+			rte_strerror(EEXIST));
+
+	/* Profile must not be NULL. */
+	if (profile == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Peak rate: non-zero, 32-bit */
+	if (profile->peak.rate == 0 ||
+		profile->peak.rate >= UINT32_MAX)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_RATE,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Peak size: non-zero, 32-bit */
+	if (profile->peak.size == 0 ||
+		profile->peak.size >= UINT32_MAX)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Dual-rate profiles are not supported. */
+	if (profile->committed.rate != 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_RATE,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Packet length adjust: 24 bytes */
+	if (profile->pkt_length_adjust != RTE_TM_ETH_FRAMING_OVERHEAD_FCS)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN,
+			NULL,
+			rte_strerror(EINVAL));
+
+	return 0;
+}
+
+/* Traffic manager shaper profile add */
+static int
+pmd_tm_shaper_profile_add(struct rte_eth_dev *dev,
+	uint32_t shaper_profile_id,
+	struct rte_tm_shaper_params *profile,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_shaper_profile_list *spl = &p->soft.tm.h.shaper_profiles;
+	struct tm_shaper_profile *sp;
+	int status;
+
+	/* Check input params */
+	status = shaper_profile_check(dev, shaper_profile_id, profile, error);
+	if (status)
+		return status;
+
+	/* Memory allocation */
+	sp = calloc(1, sizeof(struct tm_shaper_profile));
+	if (sp == NULL)
+		return -rte_tm_error_set(error,
+			ENOMEM,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(ENOMEM));
+
+	/* Fill in */
+	sp->shaper_profile_id = shaper_profile_id;
+	memcpy(&sp->params, profile, sizeof(sp->params));
+
+	/* Add to list */
+	TAILQ_INSERT_TAIL(spl, sp, node);
+	p->soft.tm.h.n_shaper_profiles++;
+
+	return 0;
+}
+
+/* Traffic manager shaper profile delete */
+static int
+pmd_tm_shaper_profile_delete(struct rte_eth_dev *dev,
+	uint32_t shaper_profile_id,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_shaper_profile *sp;
+
+	/* Check existing */
+	sp = tm_shaper_profile_search(dev, shaper_profile_id);
+	if (sp == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Check unused */
+	if (sp->n_users)
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+			NULL,
+			rte_strerror(EBUSY));
+
+	/* Remove from list */
+	TAILQ_REMOVE(&p->soft.tm.h.shaper_profiles, sp, node);
+	p->soft.tm.h.n_shaper_profiles--;
+	free(sp);
+
+	return 0;
+}
+
+static struct tm_node *
+tm_shared_shaper_get_tc(struct rte_eth_dev *dev,
+	struct tm_shared_shaper *ss)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_node_list *nl = &p->soft.tm.h.nodes;
+	struct tm_node *n;
+
+	/* Subport: each TC uses shared shaper  */
+	TAILQ_FOREACH(n, nl, node) {
+		if (n->level != TM_NODE_LEVEL_TC ||
+			n->params.n_shared_shapers == 0 ||
+			n->params.shared_shaper_id[0] != ss->shared_shaper_id)
+			continue;
+
+		return n;
+	}
+
+	return NULL;
+}
+
+static int
+update_subport_tc_rate(struct rte_eth_dev *dev,
+	struct tm_node *nt,
+	struct tm_shared_shaper *ss,
+	struct tm_shaper_profile *sp_new)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	uint32_t tc_id = tm_node_tc_id(dev, nt);
+
+	struct tm_node *np = nt->parent_node;
+
+	struct tm_node *ns = np->parent_node;
+	uint32_t subport_id = tm_node_subport_id(dev, ns);
+
+	struct rte_sched_subport_params subport_params;
+
+	struct tm_shaper_profile *sp_old = tm_shaper_profile_search(dev,
+		ss->shaper_profile_id);
+
+	/* Derive new subport configuration. */
+	memcpy(&subport_params,
+		&p->soft.tm.params.subport_params[subport_id],
+		sizeof(subport_params));
+	subport_params.tc_rate[tc_id] = sp_new->params.peak.rate;
+
+	/* Update the subport configuration. */
+	if (rte_sched_subport_config(p->soft.tm.sched,
+		subport_id, &subport_params))
+		return -1;
+
+	/* Commit changes. */
+	sp_old->n_users--;
+
+	ss->shaper_profile_id = sp_new->shaper_profile_id;
+	sp_new->n_users++;
+
+	memcpy(&p->soft.tm.params.subport_params[subport_id],
+		&subport_params,
+		sizeof(subport_params));
+
+	return 0;
+}
+
+/* Traffic manager shared shaper add/update */
+static int
+pmd_tm_shared_shaper_add_update(struct rte_eth_dev *dev,
+	uint32_t shared_shaper_id,
+	uint32_t shaper_profile_id,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_shared_shaper *ss;
+	struct tm_shaper_profile *sp;
+	struct tm_node *nt;
+
+	/* Shaper profile must be valid. */
+	sp = tm_shaper_profile_search(dev, shaper_profile_id);
+	if (sp == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/**
+	 * Add new shared shaper
+	 */
+	ss = tm_shared_shaper_search(dev, shared_shaper_id);
+	if (ss == NULL) {
+		struct tm_shared_shaper_list *ssl =
+			&p->soft.tm.h.shared_shapers;
+
+		/* Hierarchy must not be frozen */
+		if (p->soft.tm.hierarchy_frozen)
+			return -rte_tm_error_set(error,
+				EBUSY,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EBUSY));
+
+		/* Memory allocation */
+		ss = calloc(1, sizeof(struct tm_shared_shaper));
+		if (ss == NULL)
+			return -rte_tm_error_set(error,
+				ENOMEM,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(ENOMEM));
+
+		/* Fill in */
+		ss->shared_shaper_id = shared_shaper_id;
+		ss->shaper_profile_id = shaper_profile_id;
+
+		/* Add to list */
+		TAILQ_INSERT_TAIL(ssl, ss, node);
+		p->soft.tm.h.n_shared_shapers++;
+
+		return 0;
+	}
+
+	/**
+	 * Update existing shared shaper
+	 */
+	/* Hierarchy must be frozen (run-time update) */
+	if (p->soft.tm.hierarchy_frozen == 0)
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EBUSY));
+
+
+	/* Propagate change. */
+	nt = tm_shared_shaper_get_tc(dev, ss);
+	if (update_subport_tc_rate(dev, nt, ss, sp))
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EINVAL));
+
+	return 0;
+}
+
+/* Traffic manager shared shaper delete */
+static int
+pmd_tm_shared_shaper_delete(struct rte_eth_dev *dev,
+	uint32_t shared_shaper_id,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_shared_shaper *ss;
+
+	/* Check existing */
+	ss = tm_shared_shaper_search(dev, shared_shaper_id);
+	if (ss == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_SHARED_SHAPER_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Check unused */
+	if (ss->n_users)
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_SHARED_SHAPER_ID,
+			NULL,
+			rte_strerror(EBUSY));
+
+	/* Remove from list */
+	TAILQ_REMOVE(&p->soft.tm.h.shared_shapers, ss, node);
+	p->soft.tm.h.n_shared_shapers--;
+	free(ss);
+
+	return 0;
+}
+
+static int
+wred_profile_check(struct rte_eth_dev *dev,
+	uint32_t wred_profile_id,
+	struct rte_tm_wred_params *profile,
+	struct rte_tm_error *error)
+{
+	struct tm_wred_profile *wp;
+	enum rte_tm_color color;
+
+	/* WRED profile ID must not be NONE. */
+	if (wred_profile_id == RTE_TM_WRED_PROFILE_ID_NONE)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_WRED_PROFILE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* WRED profile must not exist. */
+	wp = tm_wred_profile_search(dev, wred_profile_id);
+	if (wp)
+		return -rte_tm_error_set(error,
+			EEXIST,
+			RTE_TM_ERROR_TYPE_WRED_PROFILE_ID,
+			NULL,
+			rte_strerror(EEXIST));
+
+	/* Profile must not be NULL. */
+	if (profile == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_WRED_PROFILE,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* min_th <= max_th, max_th > 0  */
+	for (color = RTE_TM_GREEN; color < RTE_TM_COLORS; color++) {
+		uint16_t min_th = profile->red_params[color].min_th;
+		uint16_t max_th = profile->red_params[color].max_th;
+
+		if (min_th > max_th || max_th == 0)
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_WRED_PROFILE,
+				NULL,
+				rte_strerror(EINVAL));
+	}
+
+	return 0;
+}
+
+/* Traffic manager WRED profile add */
+static int
+pmd_tm_wred_profile_add(struct rte_eth_dev *dev,
+	uint32_t wred_profile_id,
+	struct rte_tm_wred_params *profile,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_wred_profile_list *wpl = &p->soft.tm.h.wred_profiles;
+	struct tm_wred_profile *wp;
+	int status;
+
+	/* Check input params */
+	status = wred_profile_check(dev, wred_profile_id, profile, error);
+	if (status)
+		return status;
+
+	/* Memory allocation */
+	wp = calloc(1, sizeof(struct tm_wred_profile));
+	if (wp == NULL)
+		return -rte_tm_error_set(error,
+			ENOMEM,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(ENOMEM));
+
+	/* Fill in */
+	wp->wred_profile_id = wred_profile_id;
+	memcpy(&wp->params, profile, sizeof(wp->params));
+
+	/* Add to list */
+	TAILQ_INSERT_TAIL(wpl, wp, node);
+	p->soft.tm.h.n_wred_profiles++;
+
+	return 0;
+}
+
+/* Traffic manager WRED profile delete */
+static int
+pmd_tm_wred_profile_delete(struct rte_eth_dev *dev,
+	uint32_t wred_profile_id,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_wred_profile *wp;
+
+	/* Check existing */
+	wp = tm_wred_profile_search(dev, wred_profile_id);
+	if (wp == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_WRED_PROFILE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Check unused */
+	if (wp->n_users)
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_WRED_PROFILE_ID,
+			NULL,
+			rte_strerror(EBUSY));
+
+	/* Remove from list */
+	TAILQ_REMOVE(&p->soft.tm.h.wred_profiles, wp, node);
+	p->soft.tm.h.n_wred_profiles--;
+	free(wp);
+
+	return 0;
+}
+
+static int
+node_add_check_port(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	uint32_t parent_node_id __rte_unused,
+	uint32_t priority,
+	uint32_t weight,
+	uint32_t level_id __rte_unused,
+	struct rte_tm_node_params *params,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_shaper_profile *sp = tm_shaper_profile_search(dev,
+		params->shaper_profile_id);
+
+	/* node type: non-leaf */
+	if (node_id < p->params.soft.tm.nb_queues)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Priority must be 0 */
+	if (priority != 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PRIORITY,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Weight must be 1 */
+	if (weight != 1)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Shaper must be valid.
+	 * Shaper profile peak rate must fit the configured port rate.
+	 */
+	if (params->shaper_profile_id == RTE_TM_SHAPER_PROFILE_ID_NONE ||
+		sp == NULL ||
+		sp->params.peak.rate > p->params.soft.tm.rate)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* No shared shapers */
+	if (params->n_shared_shapers != 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Number of SP priorities must be 1 */
+	if (params->nonleaf.n_sp_priorities != 1)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Stats */
+	if (params->stats_mask & ~STATS_MASK_DEFAULT)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	return 0;
+}
+
+static int
+node_add_check_subport(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	uint32_t parent_node_id __rte_unused,
+	uint32_t priority,
+	uint32_t weight,
+	uint32_t level_id __rte_unused,
+	struct rte_tm_node_params *params,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	/* node type: non-leaf */
+	if (node_id < p->params.soft.tm.nb_queues)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Priority must be 0 */
+	if (priority != 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PRIORITY,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Weight must be 1 */
+	if (weight != 1)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Shaper must be valid */
+	if (params->shaper_profile_id == RTE_TM_SHAPER_PROFILE_ID_NONE ||
+		(!tm_shaper_profile_search(dev, params->shaper_profile_id)))
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* No shared shapers */
+	if (params->n_shared_shapers != 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Number of SP priorities must be 1 */
+	if (params->nonleaf.n_sp_priorities != 1)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Stats */
+	if (params->stats_mask & ~STATS_MASK_DEFAULT)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	return 0;
+}
+
+static int
+node_add_check_pipe(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	uint32_t parent_node_id __rte_unused,
+	uint32_t priority,
+	uint32_t weight __rte_unused,
+	uint32_t level_id __rte_unused,
+	struct rte_tm_node_params *params,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	/* node type: non-leaf */
+	if (node_id < p->params.soft.tm.nb_queues)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Priority must be 0 */
+	if (priority != 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PRIORITY,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Shaper must be valid */
+	if (params->shaper_profile_id == RTE_TM_SHAPER_PROFILE_ID_NONE ||
+		(!tm_shaper_profile_search(dev, params->shaper_profile_id)))
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* No shared shapers */
+	if (params->n_shared_shapers != 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Number of SP priorities must be 4 */
+	if (params->nonleaf.n_sp_priorities !=
+		RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* WFQ mode must be byte mode */
+	if (params->nonleaf.wfq_weight_mode != NULL &&
+		params->nonleaf.wfq_weight_mode[0] != 0 &&
+		params->nonleaf.wfq_weight_mode[1] != 0 &&
+		params->nonleaf.wfq_weight_mode[2] != 0 &&
+		params->nonleaf.wfq_weight_mode[3] != 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Stats */
+	if (params->stats_mask & ~STATS_MASK_DEFAULT)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	return 0;
+}
+
+static int
+node_add_check_tc(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	uint32_t parent_node_id __rte_unused,
+	uint32_t priority __rte_unused,
+	uint32_t weight,
+	uint32_t level_id __rte_unused,
+	struct rte_tm_node_params *params,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	/* node type: non-leaf */
+	if (node_id < p->params.soft.tm.nb_queues)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Weight must be 1 */
+	if (weight != 1)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Shaper must be valid */
+	if (params->shaper_profile_id == RTE_TM_SHAPER_PROFILE_ID_NONE ||
+		(!tm_shaper_profile_search(dev, params->shaper_profile_id)))
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Single valid shared shaper */
+	if (params->n_shared_shapers > 1)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	if (params->n_shared_shapers == 1 &&
+		(params->shared_shaper_id == NULL ||
+		(!tm_shared_shaper_search(dev, params->shared_shaper_id[0]))))
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Number of priorities must be 1 */
+	if (params->nonleaf.n_sp_priorities != 1)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Stats */
+	if (params->stats_mask & ~STATS_MASK_DEFAULT)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	return 0;
+}
+
+static int
+node_add_check_queue(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	uint32_t parent_node_id __rte_unused,
+	uint32_t priority,
+	uint32_t weight __rte_unused,
+	uint32_t level_id __rte_unused,
+	struct rte_tm_node_params *params,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	/* node type: leaf */
+	if (node_id >= p->params.soft.tm.nb_queues)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Priority must be 0 */
+	if (priority != 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PRIORITY,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* No shaper */
+	if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* No shared shapers */
+	if (params->n_shared_shapers != 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Congestion management must not be head drop */
+	if (params->leaf.cman == RTE_TM_CMAN_HEAD_DROP)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Congestion management set to WRED */
+	if (params->leaf.cman == RTE_TM_CMAN_WRED) {
+		uint32_t wred_profile_id = params->leaf.wred.wred_profile_id;
+		struct tm_wred_profile *wp = tm_wred_profile_search(dev,
+			wred_profile_id);
+
+		/* WRED profile (for private WRED context) must be valid */
+		if (wred_profile_id == RTE_TM_WRED_PROFILE_ID_NONE ||
+			wp == NULL)
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID,
+				NULL,
+				rte_strerror(EINVAL));
+
+		/* No shared WRED contexts */
+		if (params->leaf.wred.n_shared_wred_contexts != 0)
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS,
+				NULL,
+				rte_strerror(EINVAL));
+	}
+
+	/* Stats */
+	if (params->stats_mask & ~STATS_MASK_QUEUE)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	return 0;
+}
+
+static int
+node_add_check(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	uint32_t parent_node_id,
+	uint32_t priority,
+	uint32_t weight,
+	uint32_t level_id,
+	struct rte_tm_node_params *params,
+	struct rte_tm_error *error)
+{
+	struct tm_node *pn;
+	uint32_t level;
+	int status;
+
+	/* node_id, parent_node_id:
+	 *    -node_id must not be RTE_TM_NODE_ID_NULL
+	 *    -node_id must not be in use
+	 *    -root node add (parent_node_id is RTE_TM_NODE_ID_NULL):
+	 *        -root node must not exist
+	 *    -non-root node add (parent_node_id is not RTE_TM_NODE_ID_NULL):
+	 *        -parent_node_id must be valid
+	 */
+	if (node_id == RTE_TM_NODE_ID_NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	if (tm_node_search(dev, node_id))
+		return -rte_tm_error_set(error,
+			EEXIST,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EEXIST));
+
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		pn = NULL;
+		if (tm_root_node_present(dev))
+			return -rte_tm_error_set(error,
+				EEXIST,
+				RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID,
+				NULL,
+				rte_strerror(EEXIST));
+	} else {
+		pn = tm_node_search(dev, parent_node_id);
+		if (pn == NULL)
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID,
+				NULL,
+				rte_strerror(EINVAL));
+	}
+
+	/* priority: must be 0 .. 3 */
+	if (priority >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PRIORITY,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* weight: must be 1 .. 255 */
+	if (weight == 0 || weight >= UINT8_MAX)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* level_id: if valid, then
+	 *    -root node add (parent_node_id is RTE_TM_NODE_ID_NULL):
+	 *        -level_id must be zero
+	 *    -non-root node add (parent_node_id is not RTE_TM_NODE_ID_NULL):
+	 *        -level_id must be parent level ID plus one
+	 */
+	level = (pn == NULL) ? 0 : pn->level + 1;
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY && level_id != level)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_LEVEL_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* params: must not be NULL */
+	if (params == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARAMS,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* params: per level checks */
+	switch (level) {
+	case TM_NODE_LEVEL_PORT:
+		status = node_add_check_port(dev, node_id,
+			parent_node_id, priority, weight, level_id,
+			params, error);
+		if (status)
+			return status;
+		break;
+
+	case TM_NODE_LEVEL_SUBPORT:
+		status = node_add_check_subport(dev, node_id,
+			parent_node_id, priority, weight, level_id,
+			params, error);
+		if (status)
+			return status;
+		break;
+
+	case TM_NODE_LEVEL_PIPE:
+		status = node_add_check_pipe(dev, node_id,
+			parent_node_id, priority, weight, level_id,
+			params, error);
+		if (status)
+			return status;
+		break;
+
+	case TM_NODE_LEVEL_TC:
+		status = node_add_check_tc(dev, node_id,
+			parent_node_id, priority, weight, level_id,
+			params, error);
+		if (status)
+			return status;
+		break;
+
+	case TM_NODE_LEVEL_QUEUE:
+		status = node_add_check_queue(dev, node_id,
+			parent_node_id, priority, weight, level_id,
+			params, error);
+		if (status)
+			return status;
+		break;
+
+	default:
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_LEVEL_ID,
+			NULL,
+			rte_strerror(EINVAL));
+	}
+
+	return 0;
+}
+
+/* Traffic manager node add */
+static int
+pmd_tm_node_add(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	uint32_t parent_node_id,
+	uint32_t priority,
+	uint32_t weight,
+	uint32_t level_id,
+	struct rte_tm_node_params *params,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_node_list *nl = &p->soft.tm.h.nodes;
+	struct tm_node *n;
+	uint32_t i;
+	int status;
+
+	/* Checks */
+	if (p->soft.tm.hierarchy_frozen)
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EBUSY));
+
+	status = node_add_check(dev, node_id, parent_node_id, priority, weight,
+		level_id, params, error);
+	if (status)
+		return status;
+
+	/* Memory allocation */
+	n = calloc(1, sizeof(struct tm_node));
+	if (n == NULL)
+		return -rte_tm_error_set(error,
+			ENOMEM,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(ENOMEM));
+
+	/* Fill in */
+	n->node_id = node_id;
+	n->parent_node_id = parent_node_id;
+	n->priority = priority;
+	n->weight = weight;
+
+	if (parent_node_id != RTE_TM_NODE_ID_NULL) {
+		n->parent_node = tm_node_search(dev, parent_node_id);
+		n->level = n->parent_node->level + 1;
+	}
+
+	if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE)
+		n->shaper_profile = tm_shaper_profile_search(dev,
+			params->shaper_profile_id);
+
+	if (n->level == TM_NODE_LEVEL_QUEUE &&
+		params->leaf.cman == RTE_TM_CMAN_WRED)
+		n->wred_profile = tm_wred_profile_search(dev,
+			params->leaf.wred.wred_profile_id);
+
+	memcpy(&n->params, params, sizeof(n->params));
+
+	/* Add to list */
+	TAILQ_INSERT_TAIL(nl, n, node);
+	p->soft.tm.h.n_nodes++;
+
+	/* Update dependencies */
+	if (n->parent_node)
+		n->parent_node->n_children++;
+
+	if (n->shaper_profile)
+		n->shaper_profile->n_users++;
+
+	for (i = 0; i < params->n_shared_shapers; i++) {
+		struct tm_shared_shaper *ss;
+
+		ss = tm_shared_shaper_search(dev, params->shared_shaper_id[i]);
+		ss->n_users++;
+	}
+
+	if (n->wred_profile)
+		n->wred_profile->n_users++;
+
+	p->soft.tm.h.n_tm_nodes[n->level]++;
+
+	return 0;
+}
+
+/* Traffic manager node delete */
+static int
+pmd_tm_node_delete(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_node *n;
+	uint32_t i;
+
+	/* Check hierarchy changes are currently allowed */
+	if (p->soft.tm.hierarchy_frozen)
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EBUSY));
+
+	/* Check existing */
+	n = tm_node_search(dev, node_id);
+	if (n == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Check unused */
+	if (n->n_children)
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EBUSY));
+
+	/* Update dependencies */
+	p->soft.tm.h.n_tm_nodes[n->level]--;
+
+	if (n->wred_profile)
+		n->wred_profile->n_users--;
+
+	for (i = 0; i < n->params.n_shared_shapers; i++) {
+		struct tm_shared_shaper *ss;
+
+		ss = tm_shared_shaper_search(dev,
+				n->params.shared_shaper_id[i]);
+		ss->n_users--;
+	}
+
+	if (n->shaper_profile)
+		n->shaper_profile->n_users--;
+
+	if (n->parent_node)
+		n->parent_node->n_children--;
+
+	/* Remove from list */
+	TAILQ_REMOVE(&p->soft.tm.h.nodes, n, node);
+	p->soft.tm.h.n_nodes--;
+	free(n);
+
+	return 0;
+}
+
+
+static void
+pipe_profile_build(struct rte_eth_dev *dev,
+	struct tm_node *np,
+	struct rte_sched_pipe_params *pp)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_hierarchy *h = &p->soft.tm.h;
+	struct tm_node_list *nl = &h->nodes;
+	struct tm_node *nt, *nq;
+
+	memset(pp, 0, sizeof(*pp));
+
+	/* Pipe */
+	pp->tb_rate = np->shaper_profile->params.peak.rate;
+	pp->tb_size = np->shaper_profile->params.peak.size;
+
+	/* Traffic Class (TC) */
+	pp->tc_period = PIPE_TC_PERIOD;
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+	pp->tc_ov_weight = np->weight;
+#endif
+
+	TAILQ_FOREACH(nt, nl, node) {
+		uint32_t queue_id = 0;
+
+		if (nt->level != TM_NODE_LEVEL_TC ||
+			nt->parent_node_id != np->node_id)
+			continue;
+
+		pp->tc_rate[nt->priority] =
+			nt->shaper_profile->params.peak.rate;
+
+		/* Queue */
+		TAILQ_FOREACH(nq, nl, node) {
+			uint32_t pipe_queue_id;
+
+			if (nq->level != TM_NODE_LEVEL_QUEUE ||
+				nq->parent_node_id != nt->node_id)
+				continue;
+
+			pipe_queue_id = nt->priority *
+				RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue_id;
+			pp->wrr_weights[pipe_queue_id] = nq->weight;
+
+			queue_id++;
+		}
+	}
+}
+
+static int
+pipe_profile_free_exists(struct rte_eth_dev *dev,
+	uint32_t *pipe_profile_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_params *t = &p->soft.tm.params;
+
+	if (t->n_pipe_profiles < RTE_SCHED_PIPE_PROFILES_PER_PORT) {
+		*pipe_profile_id = t->n_pipe_profiles;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int
+pipe_profile_exists(struct rte_eth_dev *dev,
+	struct rte_sched_pipe_params *pp,
+	uint32_t *pipe_profile_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_params *t = &p->soft.tm.params;
+	uint32_t i;
+
+	for (i = 0; i < t->n_pipe_profiles; i++)
+		if (memcmp(&t->pipe_profiles[i], pp, sizeof(*pp)) == 0) {
+			if (pipe_profile_id)
+				*pipe_profile_id = i;
+			return 1;
+		}
+
+	return 0;
+}
+
+static void
+pipe_profile_install(struct rte_eth_dev *dev,
+	struct rte_sched_pipe_params *pp,
+	uint32_t pipe_profile_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_params *t = &p->soft.tm.params;
+
+	memcpy(&t->pipe_profiles[pipe_profile_id], pp, sizeof(*pp));
+	t->n_pipe_profiles++;
+}
+
+static void
+pipe_profile_mark(struct rte_eth_dev *dev,
+	uint32_t subport_id,
+	uint32_t pipe_id,
+	uint32_t pipe_profile_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_hierarchy *h = &p->soft.tm.h;
+	struct tm_params *t = &p->soft.tm.params;
+	uint32_t n_pipes_per_subport, pos;
+
+	n_pipes_per_subport = h->n_tm_nodes[TM_NODE_LEVEL_PIPE] /
+		h->n_tm_nodes[TM_NODE_LEVEL_SUBPORT];
+	pos = subport_id * n_pipes_per_subport + pipe_id;
+
+	t->pipe_to_profile[pos] = pipe_profile_id;
+}
+
+static struct rte_sched_pipe_params *
+pipe_profile_get(struct rte_eth_dev *dev, struct tm_node *np)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_hierarchy *h = &p->soft.tm.h;
+	struct tm_params *t = &p->soft.tm.params;
+	uint32_t n_pipes_per_subport = h->n_tm_nodes[TM_NODE_LEVEL_PIPE] /
+		h->n_tm_nodes[TM_NODE_LEVEL_SUBPORT];
+
+	uint32_t subport_id = tm_node_subport_id(dev, np->parent_node);
+	uint32_t pipe_id = tm_node_pipe_id(dev, np);
+
+	uint32_t pos = subport_id * n_pipes_per_subport + pipe_id;
+	uint32_t pipe_profile_id = t->pipe_to_profile[pos];
+
+	return &t->pipe_profiles[pipe_profile_id];
+}
+
+static int
+pipe_profiles_generate(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_hierarchy *h = &p->soft.tm.h;
+	struct tm_node_list *nl = &h->nodes;
+	struct tm_node *ns, *np;
+	uint32_t subport_id;
+
+	/* Objective: Fill in the following fields in struct tm_params:
+	 *    - pipe_profiles
+	 *    - n_pipe_profiles
+	 *    - pipe_to_profile
+	 */
+
+	subport_id = 0;
+	TAILQ_FOREACH(ns, nl, node) {
+		uint32_t pipe_id;
+
+		if (ns->level != TM_NODE_LEVEL_SUBPORT)
+			continue;
+
+		pipe_id = 0;
+		TAILQ_FOREACH(np, nl, node) {
+			struct rte_sched_pipe_params pp;
+			uint32_t pos;
+
+			if (np->level != TM_NODE_LEVEL_PIPE ||
+				np->parent_node_id != ns->node_id)
+				continue;
+
+			pipe_profile_build(dev, np, &pp);
+
+			if (!pipe_profile_exists(dev, &pp, &pos)) {
+				if (!pipe_profile_free_exists(dev, &pos))
+					return -1;
+
+				pipe_profile_install(dev, &pp, pos);
+			}
+
+			pipe_profile_mark(dev, subport_id, pipe_id, pos);
+
+			pipe_id++;
+		}
+
+		subport_id++;
+	}
+
+	return 0;
+}
+
+static struct tm_wred_profile *
+tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_hierarchy *h = &p->soft.tm.h;
+	struct tm_node_list *nl = &h->nodes;
+	struct tm_node *nq;
+
+	TAILQ_FOREACH(nq, nl, node) {
+		if (nq->level != TM_NODE_LEVEL_QUEUE ||
+			nq->parent_node->priority != tc_id)
+			continue;
+
+		return nq->wred_profile;
+	}
+
+	return NULL;
+}
+
+#ifdef RTE_SCHED_RED
+
+static void
+wred_profiles_set(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct rte_sched_port_params *pp = &p->soft.tm.params.port_params;
+	uint32_t tc_id;
+	enum rte_tm_color color;
+
+	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
+		for (color = RTE_TM_GREEN; color < RTE_TM_COLORS; color++) {
+			struct rte_red_params *dst =
+				&pp->red_params[tc_id][color];
+			struct tm_wred_profile *src_wp =
+				tm_tc_wred_profile_get(dev, tc_id);
+			struct rte_tm_red_params *src =
+				&src_wp->params.red_params[color];
+
+			memcpy(dst, src, sizeof(*dst));
+		}
+}
+
+#else
+
+#define wred_profiles_set(dev)
+
+#endif
+
+static struct tm_shared_shaper *
+tm_tc_shared_shaper_get(struct rte_eth_dev *dev, struct tm_node *tc_node)
+{
+	return (tc_node->params.n_shared_shapers) ?
+		tm_shared_shaper_search(dev,
+			tc_node->params.shared_shaper_id[0]) :
+		NULL;
+}
+
+static struct tm_shared_shaper *
+tm_subport_tc_shared_shaper_get(struct rte_eth_dev *dev,
+	struct tm_node *subport_node,
+	uint32_t tc_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_node_list *nl = &p->soft.tm.h.nodes;
+	struct tm_node *n;
+
+	TAILQ_FOREACH(n, nl, node) {
+		if (n->level != TM_NODE_LEVEL_TC ||
+			n->parent_node->parent_node_id !=
+				subport_node->node_id ||
+			n->priority != tc_id)
+			continue;
+
+		return tm_tc_shared_shaper_get(dev, n);
+	}
+
+	return NULL;
+}
+
+static int
+hierarchy_commit_check(struct rte_eth_dev *dev, struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_hierarchy *h = &p->soft.tm.h;
+	struct tm_node_list *nl = &h->nodes;
+	struct tm_shared_shaper_list *ssl = &h->shared_shapers;
+	struct tm_wred_profile_list *wpl = &h->wred_profiles;
+	struct tm_node *nr = tm_root_node_present(dev), *ns, *np, *nt, *nq;
+	struct tm_shared_shaper *ss;
+
+	uint32_t n_pipes_per_subport;
+
+	/* Root node exists. */
+	if (nr == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_LEVEL_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* There is at least one subport, max is not exceeded. */
+	if (nr->n_children == 0 || nr->n_children > TM_MAX_SUBPORTS)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_LEVEL_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* There is at least one pipe. */
+	if (h->n_tm_nodes[TM_NODE_LEVEL_PIPE] == 0)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_LEVEL_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Number of pipes is the same for all subports. Maximum number of pipes
+	 * per subport is not exceeded.
+	 */
+	n_pipes_per_subport = h->n_tm_nodes[TM_NODE_LEVEL_PIPE] /
+		h->n_tm_nodes[TM_NODE_LEVEL_SUBPORT];
+
+	if (n_pipes_per_subport > TM_MAX_PIPES_PER_SUBPORT)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EINVAL));
+
+	TAILQ_FOREACH(ns, nl, node) {
+		if (ns->level != TM_NODE_LEVEL_SUBPORT)
+			continue;
+
+		if (ns->n_children != n_pipes_per_subport)
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+	}
+
+	/* Each pipe has exactly 4 TCs, with exactly one TC for each priority */
+	TAILQ_FOREACH(np, nl, node) {
+		uint32_t mask = 0, mask_expected =
+			RTE_LEN2MASK(RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE,
+				uint32_t);
+
+		if (np->level != TM_NODE_LEVEL_PIPE)
+			continue;
+
+		if (np->n_children != RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE)
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+
+		TAILQ_FOREACH(nt, nl, node) {
+			if (nt->level != TM_NODE_LEVEL_TC ||
+				nt->parent_node_id != np->node_id)
+				continue;
+
+			mask |= 1 << nt->priority;
+		}
+
+		if (mask != mask_expected)
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+	}
+
+	/* Each TC has exactly 4 packet queues. */
+	TAILQ_FOREACH(nt, nl, node) {
+		if (nt->level != TM_NODE_LEVEL_TC)
+			continue;
+
+		if (nt->n_children != RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+	}
+
+	/**
+	 * Shared shapers:
+	 *    -For each TC #i, all pipes in the same subport use the same
+	 *     shared shaper (or no shared shaper) for their TC#i.
+	 *    -Each shared shaper needs to have at least one user. All its
+	 *     users have to be TC nodes with the same priority and the same
+	 *     subport.
+	 */
+	TAILQ_FOREACH(ns, nl, node) {
+		struct tm_shared_shaper *s[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+		uint32_t id;
+
+		if (ns->level != TM_NODE_LEVEL_SUBPORT)
+			continue;
+
+		for (id = 0; id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; id++)
+			s[id] = tm_subport_tc_shared_shaper_get(dev, ns, id);
+
+		TAILQ_FOREACH(nt, nl, node) {
+			struct tm_shared_shaper *subport_ss, *tc_ss;
+
+			if (nt->level != TM_NODE_LEVEL_TC ||
+				nt->parent_node->parent_node_id !=
+					ns->node_id)
+				continue;
+
+			subport_ss = s[nt->priority];
+			tc_ss = tm_tc_shared_shaper_get(dev, nt);
+
+			if (subport_ss == NULL && tc_ss == NULL)
+				continue;
+
+			if ((subport_ss == NULL && tc_ss != NULL) ||
+				(subport_ss != NULL && tc_ss == NULL) ||
+				subport_ss->shared_shaper_id !=
+					tc_ss->shared_shaper_id)
+				return -rte_tm_error_set(error,
+					EINVAL,
+					RTE_TM_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					rte_strerror(EINVAL));
+		}
+	}
+
+	TAILQ_FOREACH(ss, ssl, node) {
+		struct tm_node *nt_any = tm_shared_shaper_get_tc(dev, ss);
+		uint32_t n_users = 0;
+
+		if (nt_any != NULL)
+			TAILQ_FOREACH(nt, nl, node) {
+				if (nt->level != TM_NODE_LEVEL_TC ||
+					nt->priority != nt_any->priority ||
+					nt->parent_node->parent_node_id !=
+					nt_any->parent_node->parent_node_id)
+					continue;
+
+				n_users++;
+			}
+
+		if (ss->n_users == 0 || ss->n_users != n_users)
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+	}
+
+	/* Not too many pipe profiles. */
+	if (pipe_profiles_generate(dev))
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/**
+	 * WRED (when used, i.e. at least one WRED profile defined):
+	 *    -Each WRED profile must have at least one user.
+	 *    -All leaf nodes must have their private WRED context enabled.
+	 *    -For each TC #i, all leaf nodes must use the same WRED profile
+	 *     for their private WRED context.
+	 */
+	if (h->n_wred_profiles) {
+		struct tm_wred_profile *wp;
+		struct tm_wred_profile *w[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+		uint32_t id;
+
+		TAILQ_FOREACH(wp, wpl, node)
+			if (wp->n_users == 0)
+				return -rte_tm_error_set(error,
+					EINVAL,
+					RTE_TM_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					rte_strerror(EINVAL));
+
+		for (id = 0; id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; id++) {
+			w[id] = tm_tc_wred_profile_get(dev, id);
+
+			if (w[id] == NULL)
+				return -rte_tm_error_set(error,
+					EINVAL,
+					RTE_TM_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					rte_strerror(EINVAL));
+		}
+
+		TAILQ_FOREACH(nq, nl, node) {
+			uint32_t id;
+
+			if (nq->level != TM_NODE_LEVEL_QUEUE)
+				continue;
+
+			id = nq->parent_node->priority;
+
+			if (nq->wred_profile == NULL ||
+				nq->wred_profile->wred_profile_id !=
+					w[id]->wred_profile_id)
+				return -rte_tm_error_set(error,
+					EINVAL,
+					RTE_TM_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					rte_strerror(EINVAL));
+		}
+	}
+
+	return 0;
+}
+
+static void
+hierarchy_blueprints_create(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_params *t = &p->soft.tm.params;
+	struct tm_hierarchy *h = &p->soft.tm.h;
+
+	struct tm_node_list *nl = &h->nodes;
+	struct tm_node *root = tm_root_node_present(dev), *n;
+
+	uint32_t subport_id;
+
+	t->port_params = (struct rte_sched_port_params) {
+		.name = dev->data->name,
+		.socket = dev->data->numa_node,
+		.rate = root->shaper_profile->params.peak.rate,
+		.mtu = dev->data->mtu,
+		.frame_overhead =
+			root->shaper_profile->params.pkt_length_adjust,
+		.n_subports_per_port = root->n_children,
+		.n_pipes_per_subport = h->n_tm_nodes[TM_NODE_LEVEL_PIPE] /
+			h->n_tm_nodes[TM_NODE_LEVEL_SUBPORT],
+		.qsize = {p->params.soft.tm.qsize[0],
+			p->params.soft.tm.qsize[1],
+			p->params.soft.tm.qsize[2],
+			p->params.soft.tm.qsize[3],
+		},
+		.pipe_profiles = t->pipe_profiles,
+		.n_pipe_profiles = t->n_pipe_profiles,
+	};
+
+	wred_profiles_set(dev);
+
+	subport_id = 0;
+	TAILQ_FOREACH(n, nl, node) {
+		uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+		uint32_t i;
+
+		if (n->level != TM_NODE_LEVEL_SUBPORT)
+			continue;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			struct tm_shared_shaper *ss;
+			struct tm_shaper_profile *sp;
+
+			ss = tm_subport_tc_shared_shaper_get(dev, n, i);
+			sp = (ss) ? tm_shaper_profile_search(dev,
+				ss->shaper_profile_id) :
+				n->shaper_profile;
+			tc_rate[i] = sp->params.peak.rate;
+		}
+
+		t->subport_params[subport_id] =
+			(struct rte_sched_subport_params) {
+				.tb_rate = n->shaper_profile->params.peak.rate,
+				.tb_size = n->shaper_profile->params.peak.size,
+
+				.tc_rate = {tc_rate[0],
+					tc_rate[1],
+					tc_rate[2],
+					tc_rate[3],
+			},
+			.tc_period = SUBPORT_TC_PERIOD,
+		};
+
+		subport_id++;
+	}
+}
+
+/* Traffic manager hierarchy commit */
+static int
+pmd_tm_hierarchy_commit(struct rte_eth_dev *dev,
+	int clear_on_fail,
+	struct rte_tm_error *error)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	int status;
+
+	/* Checks */
+	if (p->soft.tm.hierarchy_frozen)
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EBUSY));
+
+	status = hierarchy_commit_check(dev, error);
+	if (status) {
+		if (clear_on_fail) {
+			tm_hierarchy_uninit(p);
+			tm_hierarchy_init(p);
+		}
+
+		return status;
+	}
+
+	/* Create blueprints */
+	hierarchy_blueprints_create(dev);
+
+	/* Freeze hierarchy */
+	p->soft.tm.hierarchy_frozen = 1;
+
+	return 0;
+}
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+
+static int
+update_pipe_weight(struct rte_eth_dev *dev, struct tm_node *np, uint32_t weight)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	uint32_t pipe_id = tm_node_pipe_id(dev, np);
+
+	struct tm_node *ns = np->parent_node;
+	uint32_t subport_id = tm_node_subport_id(dev, ns);
+
+	struct rte_sched_pipe_params *profile0 = pipe_profile_get(dev, np);
+	struct rte_sched_pipe_params profile1;
+	uint32_t pipe_profile_id;
+
+	/* Derive new pipe profile. */
+	memcpy(&profile1, profile0, sizeof(profile1));
+	profile1.tc_ov_weight = (uint8_t)weight;
+
+	/* Since implementation does not allow adding more pipe profiles after
+	 * port configuration, the pipe configuration can be successfully
+	 * updated only if the new profile is also part of the existing set of
+	 * pipe profiles.
+	 */
+	if (pipe_profile_exists(dev, &profile1, &pipe_profile_id) == 0)
+		return -1;
+
+	/* Update the pipe profile used by the current pipe. */
+	if (rte_sched_pipe_config(p->soft.tm.sched, subport_id, pipe_id,
+		(int32_t)pipe_profile_id))
+		return -1;
+
+	/* Commit changes. */
+	pipe_profile_mark(dev, subport_id, pipe_id, pipe_profile_id);
+	np->weight = weight;
+
+	return 0;
+}
+
+#endif
+
+static int
+update_queue_weight(struct rte_eth_dev *dev,
+	struct tm_node *nq, uint32_t weight)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	uint32_t queue_id = tm_node_queue_id(dev, nq);
+
+	struct tm_node *nt = nq->parent_node;
+	uint32_t tc_id = tm_node_tc_id(dev, nt);
+
+	struct tm_node *np = nt->parent_node;
+	uint32_t pipe_id = tm_node_pipe_id(dev, np);
+
+	struct tm_node *ns = np->parent_node;
+	uint32_t subport_id = tm_node_subport_id(dev, ns);
+
+	uint32_t pipe_queue_id =
+		tc_id * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue_id;
+
+	struct rte_sched_pipe_params *profile0 = pipe_profile_get(dev, np);
+	struct rte_sched_pipe_params profile1;
+	uint32_t pipe_profile_id;
+
+	/* Derive new pipe profile. */
+	memcpy(&profile1, profile0, sizeof(profile1));
+	profile1.wrr_weights[pipe_queue_id] = (uint8_t)weight;
+
+	/* Since implementation does not allow adding more pipe profiles after
+	 * port configuration, the pipe configuration can be successfully
+	 * updated only if the new profile is also part of the existing set
+	 * of pipe profiles.
+	 */
+	if (pipe_profile_exists(dev, &profile1, &pipe_profile_id) == 0)
+		return -1;
+
+	/* Update the pipe profile used by the current pipe. */
+	if (rte_sched_pipe_config(p->soft.tm.sched, subport_id, pipe_id,
+		(int32_t)pipe_profile_id))
+		return -1;
+
+	/* Commit changes. */
+	pipe_profile_mark(dev, subport_id, pipe_id, pipe_profile_id);
+	nq->weight = weight;
+
+	return 0;
+}
+
+/* Traffic manager node parent update */
+static int
+pmd_tm_node_parent_update(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	uint32_t parent_node_id,
+	uint32_t priority,
+	uint32_t weight,
+	struct rte_tm_error *error)
+{
+	struct tm_node *n;
+
+	/* Port must be started and TM used. */
+	if (dev->data->dev_started == 0 && (tm_used(dev) == 0))
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EBUSY));
+
+	/* Node must be valid */
+	n = tm_node_search(dev, node_id);
+	if (n == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Parent node must be the same */
+	if (n->parent_node_id != parent_node_id)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Priority must be the same */
+	if (n->priority != priority)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_PRIORITY,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* weight: must be 1 .. 255 */
+	if (weight == 0 || weight >= UINT8_MAX)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+			NULL,
+			rte_strerror(EINVAL));
+
+	switch (n->level) {
+	case TM_NODE_LEVEL_PORT:
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+			NULL,
+			rte_strerror(EINVAL));
+		/* fall-through */
+	case TM_NODE_LEVEL_SUBPORT:
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+			NULL,
+			rte_strerror(EINVAL));
+		/* fall-through */
+	case TM_NODE_LEVEL_PIPE:
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+		if (update_pipe_weight(dev, n, weight))
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+		return 0;
+#else
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+			NULL,
+			rte_strerror(EINVAL));
+#endif
+		/* fall-through */
+	case TM_NODE_LEVEL_TC:
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+			NULL,
+			rte_strerror(EINVAL));
+		/* fall-through */
+	case TM_NODE_LEVEL_QUEUE:
+		/* fall-through */
+	default:
+		if (update_queue_weight(dev, n, weight))
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+		return 0;
+	}
+}
+
+static int
+update_subport_rate(struct rte_eth_dev *dev,
+	struct tm_node *ns,
+	struct tm_shaper_profile *sp)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	uint32_t subport_id = tm_node_subport_id(dev, ns);
+
+	struct rte_sched_subport_params subport_params;
+
+	/* Derive new subport configuration. */
+	memcpy(&subport_params,
+		&p->soft.tm.params.subport_params[subport_id],
+		sizeof(subport_params));
+	subport_params.tb_rate = sp->params.peak.rate;
+	subport_params.tb_size = sp->params.peak.size;
+
+	/* Update the subport configuration. */
+	if (rte_sched_subport_config(p->soft.tm.sched, subport_id,
+		&subport_params))
+		return -1;
+
+	/* Commit changes. */
+	ns->shaper_profile->n_users--;
+
+	ns->shaper_profile = sp;
+	ns->params.shaper_profile_id = sp->shaper_profile_id;
+	sp->n_users++;
+
+	memcpy(&p->soft.tm.params.subport_params[subport_id],
+		&subport_params,
+		sizeof(subport_params));
+
+	return 0;
+}
+
+static int
+update_pipe_rate(struct rte_eth_dev *dev,
+	struct tm_node *np,
+	struct tm_shaper_profile *sp)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	uint32_t pipe_id = tm_node_pipe_id(dev, np);
+
+	struct tm_node *ns = np->parent_node;
+	uint32_t subport_id = tm_node_subport_id(dev, ns);
+
+	struct rte_sched_pipe_params *profile0 = pipe_profile_get(dev, np);
+	struct rte_sched_pipe_params profile1;
+	uint32_t pipe_profile_id;
+
+	/* Derive new pipe profile. */
+	memcpy(&profile1, profile0, sizeof(profile1));
+	profile1.tb_rate = sp->params.peak.rate;
+	profile1.tb_size = sp->params.peak.size;
+
+	/* Since implementation does not allow adding more pipe profiles after
+	 * port configuration, the pipe configuration can be successfully
+	 * updated only if the new profile is also part of the existing set of
+	 * pipe profiles.
+	 */
+	if (pipe_profile_exists(dev, &profile1, &pipe_profile_id) == 0)
+		return -1;
+
+	/* Update the pipe profile used by the current pipe. */
+	if (rte_sched_pipe_config(p->soft.tm.sched, subport_id, pipe_id,
+		(int32_t)pipe_profile_id))
+		return -1;
+
+	/* Commit changes. */
+	pipe_profile_mark(dev, subport_id, pipe_id, pipe_profile_id);
+	np->shaper_profile->n_users--;
+	np->shaper_profile = sp;
+	np->params.shaper_profile_id = sp->shaper_profile_id;
+	sp->n_users++;
+
+	return 0;
+}
+
+static int
+update_tc_rate(struct rte_eth_dev *dev,
+	struct tm_node *nt,
+	struct tm_shaper_profile *sp)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	uint32_t tc_id = tm_node_tc_id(dev, nt);
+
+	struct tm_node *np = nt->parent_node;
+	uint32_t pipe_id = tm_node_pipe_id(dev, np);
+
+	struct tm_node *ns = np->parent_node;
+	uint32_t subport_id = tm_node_subport_id(dev, ns);
+
+	struct rte_sched_pipe_params *profile0 = pipe_profile_get(dev, np);
+	struct rte_sched_pipe_params profile1;
+	uint32_t pipe_profile_id;
+
+	/* Derive new pipe profile. */
+	memcpy(&profile1, profile0, sizeof(profile1));
+	profile1.tc_rate[tc_id] = sp->params.peak.rate;
+
+	/* Since implementation does not allow adding more pipe profiles after
+	 * port configuration, the pipe configuration can be successfully
+	 * updated only if the new profile is also part of the existing set of
+	 * pipe profiles.
+	 */
+	if (pipe_profile_exists(dev, &profile1, &pipe_profile_id) == 0)
+		return -1;
+
+	/* Update the pipe profile used by the current pipe. */
+	if (rte_sched_pipe_config(p->soft.tm.sched, subport_id, pipe_id,
+		(int32_t)pipe_profile_id))
+		return -1;
+
+	/* Commit changes. */
+	pipe_profile_mark(dev, subport_id, pipe_id, pipe_profile_id);
+	nt->shaper_profile->n_users--;
+	nt->shaper_profile = sp;
+	nt->params.shaper_profile_id = sp->shaper_profile_id;
+	sp->n_users++;
+
+	return 0;
+}
+
+/* Traffic manager node shaper update */
+static int
+pmd_tm_node_shaper_update(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	uint32_t shaper_profile_id,
+	struct rte_tm_error *error)
+{
+	struct tm_node *n;
+	struct tm_shaper_profile *sp;
+
+	/* Port must be started and TM used. */
+	if (dev->data->dev_started == 0 && (tm_used(dev) == 0))
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EBUSY));
+
+	/* Node must be valid */
+	n = tm_node_search(dev, node_id);
+	if (n == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	/* Shaper profile must be valid. */
+	sp = tm_shaper_profile_search(dev, shaper_profile_id);
+	if (sp == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_SHAPER_PROFILE,
+			NULL,
+			rte_strerror(EINVAL));
+
+	switch (n->level) {
+	case TM_NODE_LEVEL_PORT:
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EINVAL));
+		/* fall-through */
+	case TM_NODE_LEVEL_SUBPORT:
+		if (update_subport_rate(dev, n, sp))
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+		return 0;
+		/* fall-through */
+	case TM_NODE_LEVEL_PIPE:
+		if (update_pipe_rate(dev, n, sp))
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+		return 0;
+		/* fall-through */
+	case TM_NODE_LEVEL_TC:
+		if (update_tc_rate(dev, n, sp))
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+		return 0;
+		/* fall-through */
+	case TM_NODE_LEVEL_QUEUE:
+		/* fall-through */
+	default:
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EINVAL));
+	}
+}
+
+static inline uint32_t
+tm_port_queue_id(struct rte_eth_dev *dev,
+	uint32_t port_subport_id,
+	uint32_t subport_pipe_id,
+	uint32_t pipe_tc_id,
+	uint32_t tc_queue_id)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_hierarchy *h = &p->soft.tm.h;
+	uint32_t n_pipes_per_subport = h->n_tm_nodes[TM_NODE_LEVEL_PIPE] /
+			h->n_tm_nodes[TM_NODE_LEVEL_SUBPORT];
+
+	uint32_t port_pipe_id =
+		port_subport_id * n_pipes_per_subport + subport_pipe_id;
+	uint32_t port_tc_id =
+		port_pipe_id * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + pipe_tc_id;
+	uint32_t port_queue_id =
+		port_tc_id * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + tc_queue_id;
+
+	return port_queue_id;
+}
+
+static int
+read_port_stats(struct rte_eth_dev *dev,
+	struct tm_node *nr,
+	struct rte_tm_node_stats *stats,
+	uint64_t *stats_mask,
+	int clear)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct tm_hierarchy *h = &p->soft.tm.h;
+	uint32_t n_subports_per_port = h->n_tm_nodes[TM_NODE_LEVEL_SUBPORT];
+	uint32_t subport_id;
+
+	for (subport_id = 0; subport_id < n_subports_per_port; subport_id++) {
+		struct rte_sched_subport_stats s;
+		uint32_t tc_ov, id;
+
+		/* Stats read */
+		int status = rte_sched_subport_read_stats(
+			p->soft.tm.sched,
+			subport_id,
+			&s,
+			&tc_ov);
+		if (status)
+			return status;
+
+		/* Stats accumulate */
+		for (id = 0; id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; id++) {
+			nr->stats.n_pkts +=
+				s.n_pkts_tc[id] - s.n_pkts_tc_dropped[id];
+			nr->stats.n_bytes +=
+				s.n_bytes_tc[id] - s.n_bytes_tc_dropped[id];
+			nr->stats.leaf.n_pkts_dropped[RTE_TM_GREEN] +=
+				s.n_pkts_tc_dropped[id];
+			nr->stats.leaf.n_bytes_dropped[RTE_TM_GREEN] +=
+				s.n_bytes_tc_dropped[id];
+		}
+	}
+
+	/* Stats copy */
+	if (stats)
+		memcpy(stats, &nr->stats, sizeof(*stats));
+
+	if (stats_mask)
+		*stats_mask = STATS_MASK_DEFAULT;
+
+	/* Stats clear */
+	if (clear)
+		memset(&nr->stats, 0, sizeof(nr->stats));
+
+	return 0;
+}
+
+static int
+read_subport_stats(struct rte_eth_dev *dev,
+	struct tm_node *ns,
+	struct rte_tm_node_stats *stats,
+	uint64_t *stats_mask,
+	int clear)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	uint32_t subport_id = tm_node_subport_id(dev, ns);
+	struct rte_sched_subport_stats s;
+	uint32_t tc_ov, tc_id;
+
+	/* Stats read */
+	int status = rte_sched_subport_read_stats(
+		p->soft.tm.sched,
+		subport_id,
+		&s,
+		&tc_ov);
+	if (status)
+		return status;
+
+	/* Stats accumulate */
+	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++) {
+		ns->stats.n_pkts +=
+			s.n_pkts_tc[tc_id] - s.n_pkts_tc_dropped[tc_id];
+		ns->stats.n_bytes +=
+			s.n_bytes_tc[tc_id] - s.n_bytes_tc_dropped[tc_id];
+		ns->stats.leaf.n_pkts_dropped[RTE_TM_GREEN] +=
+			s.n_pkts_tc_dropped[tc_id];
+		ns->stats.leaf.n_bytes_dropped[RTE_TM_GREEN] +=
+			s.n_bytes_tc_dropped[tc_id];
+	}
+
+	/* Stats copy */
+	if (stats)
+		memcpy(stats, &ns->stats, sizeof(*stats));
+
+	if (stats_mask)
+		*stats_mask = STATS_MASK_DEFAULT;
+
+	/* Stats clear */
+	if (clear)
+		memset(&ns->stats, 0, sizeof(ns->stats));
+
+	return 0;
+}
+
+static int
+read_pipe_stats(struct rte_eth_dev *dev,
+	struct tm_node *np,
+	struct rte_tm_node_stats *stats,
+	uint64_t *stats_mask,
+	int clear)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	uint32_t pipe_id = tm_node_pipe_id(dev, np);
+
+	struct tm_node *ns = np->parent_node;
+	uint32_t subport_id = tm_node_subport_id(dev, ns);
+
+	uint32_t i;
+
+	/* Stats read */
+	for (i = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
+		struct rte_sched_queue_stats s;
+		uint16_t qlen;
+
+		uint32_t qid = tm_port_queue_id(dev,
+			subport_id,
+			pipe_id,
+			i / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS,
+			i % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
+
+		int status = rte_sched_queue_read_stats(
+			p->soft.tm.sched,
+			qid,
+			&s,
+			&qlen);
+		if (status)
+			return status;
+
+		/* Stats accumulate */
+		np->stats.n_pkts += s.n_pkts - s.n_pkts_dropped;
+		np->stats.n_bytes += s.n_bytes - s.n_bytes_dropped;
+		np->stats.leaf.n_pkts_dropped[RTE_TM_GREEN] += s.n_pkts_dropped;
+		np->stats.leaf.n_bytes_dropped[RTE_TM_GREEN] +=
+			s.n_bytes_dropped;
+		np->stats.leaf.n_pkts_queued = qlen;
+	}
+
+	/* Stats copy */
+	if (stats)
+		memcpy(stats, &np->stats, sizeof(*stats));
+
+	if (stats_mask)
+		*stats_mask = STATS_MASK_DEFAULT;
+
+	/* Stats clear */
+	if (clear)
+		memset(&np->stats, 0, sizeof(np->stats));
+
+	return 0;
+}
+
+static int
+read_tc_stats(struct rte_eth_dev *dev,
+	struct tm_node *nt,
+	struct rte_tm_node_stats *stats,
+	uint64_t *stats_mask,
+	int clear)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	uint32_t tc_id = tm_node_tc_id(dev, nt);
+
+	struct tm_node *np = nt->parent_node;
+	uint32_t pipe_id = tm_node_pipe_id(dev, np);
+
+	struct tm_node *ns = np->parent_node;
+	uint32_t subport_id = tm_node_subport_id(dev, ns);
+
+	uint32_t i;
+
+	/* Stats read */
+	for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+		struct rte_sched_queue_stats s;
+		uint16_t qlen;
+
+		uint32_t qid = tm_port_queue_id(dev,
+			subport_id,
+			pipe_id,
+			tc_id,
+			i);
+
+		int status = rte_sched_queue_read_stats(
+			p->soft.tm.sched,
+			qid,
+			&s,
+			&qlen);
+		if (status)
+			return status;
+
+		/* Stats accumulate */
+		nt->stats.n_pkts += s.n_pkts - s.n_pkts_dropped;
+		nt->stats.n_bytes += s.n_bytes - s.n_bytes_dropped;
+		nt->stats.leaf.n_pkts_dropped[RTE_TM_GREEN] += s.n_pkts_dropped;
+		nt->stats.leaf.n_bytes_dropped[RTE_TM_GREEN] +=
+			s.n_bytes_dropped;
+		nt->stats.leaf.n_pkts_queued = qlen;
+	}
+
+	/* Stats copy */
+	if (stats)
+		memcpy(stats, &nt->stats, sizeof(*stats));
+
+	if (stats_mask)
+		*stats_mask = STATS_MASK_DEFAULT;
+
+	/* Stats clear */
+	if (clear)
+		memset(&nt->stats, 0, sizeof(nt->stats));
+
+	return 0;
+}
+
+static int
+read_queue_stats(struct rte_eth_dev *dev,
+	struct tm_node *nq,
+	struct rte_tm_node_stats *stats,
+	uint64_t *stats_mask,
+	int clear)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct rte_sched_queue_stats s;
+	uint16_t qlen;
+
+	uint32_t queue_id = tm_node_queue_id(dev, nq);
+
+	struct tm_node *nt = nq->parent_node;
+	uint32_t tc_id = tm_node_tc_id(dev, nt);
+
+	struct tm_node *np = nt->parent_node;
+	uint32_t pipe_id = tm_node_pipe_id(dev, np);
+
+	struct tm_node *ns = np->parent_node;
+	uint32_t subport_id = tm_node_subport_id(dev, ns);
+
+	/* Stats read */
+	uint32_t qid = tm_port_queue_id(dev,
+		subport_id,
+		pipe_id,
+		tc_id,
+		queue_id);
+
+	int status = rte_sched_queue_read_stats(
+		p->soft.tm.sched,
+		qid,
+		&s,
+		&qlen);
+	if (status)
+		return status;
+
+	/* Stats accumulate */
+	nq->stats.n_pkts += s.n_pkts - s.n_pkts_dropped;
+	nq->stats.n_bytes += s.n_bytes - s.n_bytes_dropped;
+	nq->stats.leaf.n_pkts_dropped[RTE_TM_GREEN] += s.n_pkts_dropped;
+	nq->stats.leaf.n_bytes_dropped[RTE_TM_GREEN] +=
+		s.n_bytes_dropped;
+	nq->stats.leaf.n_pkts_queued = qlen;
+
+	/* Stats copy */
+	if (stats)
+		memcpy(stats, &nq->stats, sizeof(*stats));
+
+	if (stats_mask)
+		*stats_mask = STATS_MASK_QUEUE;
+
+	/* Stats clear */
+	if (clear)
+		memset(&nq->stats, 0, sizeof(nq->stats));
+
+	return 0;
+}
+
+/* Traffic manager read stats counters for specific node */
+static int
+pmd_tm_node_stats_read(struct rte_eth_dev *dev,
+	uint32_t node_id,
+	struct rte_tm_node_stats *stats,
+	uint64_t *stats_mask,
+	int clear,
+	struct rte_tm_error *error)
+{
+	struct tm_node *n;
+
+	/* Port must be started and TM used. */
+	if (dev->data->dev_started == 0 && (tm_used(dev) == 0))
+		return -rte_tm_error_set(error,
+			EBUSY,
+			RTE_TM_ERROR_TYPE_UNSPECIFIED,
+			NULL,
+			rte_strerror(EBUSY));
+
+	/* Node must be valid */
+	n = tm_node_search(dev, node_id);
+	if (n == NULL)
+		return -rte_tm_error_set(error,
+			EINVAL,
+			RTE_TM_ERROR_TYPE_NODE_ID,
+			NULL,
+			rte_strerror(EINVAL));
+
+	switch (n->level) {
+	case TM_NODE_LEVEL_PORT:
+		if (read_port_stats(dev, n, stats, stats_mask, clear))
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+		return 0;
+
+	case TM_NODE_LEVEL_SUBPORT:
+		if (read_subport_stats(dev, n, stats, stats_mask, clear))
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+		return 0;
+
+	case TM_NODE_LEVEL_PIPE:
+		if (read_pipe_stats(dev, n, stats, stats_mask, clear))
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+		return 0;
+
+	case TM_NODE_LEVEL_TC:
+		if (read_tc_stats(dev, n, stats, stats_mask, clear))
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+		return 0;
+
+	case TM_NODE_LEVEL_QUEUE:
+	default:
+		if (read_queue_stats(dev, n, stats, stats_mask, clear))
+			return -rte_tm_error_set(error,
+				EINVAL,
+				RTE_TM_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				rte_strerror(EINVAL));
+		return 0;
+	}
+}
+
+const struct rte_tm_ops pmd_tm_ops = {
+	.node_type_get = pmd_tm_node_type_get,
+	.capabilities_get = pmd_tm_capabilities_get,
+	.level_capabilities_get = pmd_tm_level_capabilities_get,
+	.node_capabilities_get = pmd_tm_node_capabilities_get,
+
+	.wred_profile_add = pmd_tm_wred_profile_add,
+	.wred_profile_delete = pmd_tm_wred_profile_delete,
+	.shared_wred_context_add_update = NULL,
+	.shared_wred_context_delete = NULL,
+
+	.shaper_profile_add = pmd_tm_shaper_profile_add,
+	.shaper_profile_delete = pmd_tm_shaper_profile_delete,
+	.shared_shaper_add_update = pmd_tm_shared_shaper_add_update,
+	.shared_shaper_delete = pmd_tm_shared_shaper_delete,
+
+	.node_add = pmd_tm_node_add,
+	.node_delete = pmd_tm_node_delete,
+	.node_suspend = NULL,
+	.node_resume = NULL,
+	.hierarchy_commit = pmd_tm_hierarchy_commit,
+
+	.node_parent_update = pmd_tm_node_parent_update,
+	.node_shaper_update = pmd_tm_node_shaper_update,
+	.node_shared_shaper_update = NULL,
+	.node_stats_update = NULL,
+	.node_wfq_weight_mode_update = NULL,
+	.node_cman_update = NULL,
+	.node_wred_context_update = NULL,
+	.node_shared_wred_context_update = NULL,
+
+	.node_stats_read = pmd_tm_node_stats_read,
+};
diff --git a/drivers/net/softnic/rte_pmd_eth_softnic_version.map b/drivers/net/softnic/rte_pmd_eth_softnic_version.map
new file mode 100644
index 00000000..fb2cb68c
--- /dev/null
+++ b/drivers/net/softnic/rte_pmd_eth_softnic_version.map
@@ -0,0 +1,7 @@
+DPDK_17.11 {
+	global:
+
+	rte_pmd_softnic_run;
+
+	local: *;
+};
diff --git a/drivers/net/szedata2/Makefile b/drivers/net/szedata2/Makefile
index 0e96b922..0ebd3ec5 100644
--- a/drivers/net/szedata2/Makefile
+++ b/drivers/net/szedata2/Makefile
@@ -39,6 +39,9 @@ LIB = librte_pmd_szedata2.a
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lsze2
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 EXPORT_MAP := rte_pmd_szedata2_version.map
 
diff --git a/drivers/net/szedata2/rte_eth_szedata2.c b/drivers/net/szedata2/rte_eth_szedata2.c
index 9c0d57cc..403cfdbb 100644
--- a/drivers/net/szedata2/rte_eth_szedata2.c
+++ b/drivers/net/szedata2/rte_eth_szedata2.c
@@ -71,7 +71,7 @@
 struct szedata2_rx_queue {
 	struct szedata *sze;
 	uint8_t rx_channel;
-	uint8_t in_port;
+	uint16_t in_port;
 	struct rte_mempool *mb_pool;
 	volatile uint64_t rx_pkts;
 	volatile uint64_t rx_bytes;
@@ -1042,7 +1042,7 @@ eth_dev_info(struct rte_eth_dev *dev,
 	dev_info->speed_capa = ETH_LINK_SPEED_100G;
 }
 
-static void
+static int
 eth_stats_get(struct rte_eth_dev *dev,
 		struct rte_eth_stats *stats)
 {
@@ -1077,6 +1077,8 @@ eth_stats_get(struct rte_eth_dev *dev,
 	stats->ibytes = rx_total_bytes;
 	stats->obytes = tx_total_bytes;
 	stats->oerrors = tx_err_total;
+
+	return 0;
 }
 
 static void
@@ -1538,7 +1540,7 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev)
 		return -EINVAL;
 	}
 	snprintf(rsc_filename, PATH_MAX,
-		"%s/" PCI_PRI_FMT "/resource%u", pci_get_sysfs_path(),
+		"%s/" PCI_PRI_FMT "/resource%u", rte_pci_get_sysfs_path(),
 		pci_addr->domain, pci_addr->bus,
 		pci_addr->devid, pci_addr->function, PCI_RESOURCE_NUMBER);
 	fd = open(rsc_filename, O_RDWR);
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
index b0de0284..405b49e4 100644
--- a/drivers/net/tap/Makefile
+++ b/drivers/net/tap/Makefile
@@ -43,6 +43,9 @@ CFLAGS += -O3
 CFLAGS += -I$(SRCDIR)
 CFLAGS += -I.
 CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
+LDLIBS += -lrte_bus_vdev
 
 #
 # all source are stored in SRCS-y
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 9acea839..6b27679a 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -39,7 +39,7 @@
 #include <rte_ethdev.h>
 #include <rte_ethdev_vdev.h>
 #include <rte_malloc.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_net.h>
 #include <rte_debug.h>
@@ -603,8 +603,31 @@ tap_dev_stop(struct rte_eth_dev *dev)
 }
 
 static int
-tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+tap_dev_configure(struct rte_eth_dev *dev)
 {
+	if (dev->data->nb_rx_queues > RTE_PMD_TAP_MAX_QUEUES) {
+		RTE_LOG(ERR, PMD,
+			"%s: number of rx queues %d exceeds max num of queues %d\n",
+			dev->device->name,
+			dev->data->nb_rx_queues,
+			RTE_PMD_TAP_MAX_QUEUES);
+		return -1;
+	}
+	if (dev->data->nb_tx_queues > RTE_PMD_TAP_MAX_QUEUES) {
+		RTE_LOG(ERR, PMD,
+			"%s: number of tx queues %d exceeds max num of queues %d\n",
+			dev->device->name,
+			dev->data->nb_tx_queues,
+			RTE_PMD_TAP_MAX_QUEUES);
+		return -1;
+	}
+
+	RTE_LOG(INFO, PMD, "%s: %p: TX configured queues number: %u\n",
+	     dev->device->name, (void *)dev, dev->data->nb_tx_queues);
+
+	RTE_LOG(INFO, PMD, "%s: %p: RX configured queues number: %u\n",
+	     dev->device->name, (void *)dev, dev->data->nb_rx_queues);
+
 	return 0;
 }
 
@@ -650,8 +673,8 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	dev_info->if_index = internals->if_index;
 	dev_info->max_mac_addrs = 1;
 	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
-	dev_info->max_rx_queues = internals->nb_queues;
-	dev_info->max_tx_queues = internals->nb_queues;
+	dev_info->max_rx_queues = RTE_PMD_TAP_MAX_QUEUES;
+	dev_info->max_tx_queues = RTE_PMD_TAP_MAX_QUEUES;
 	dev_info->min_rx_bufsize = 0;
 	dev_info->pci_dev = NULL;
 	dev_info->speed_capa = tap_dev_speed_capa();
@@ -664,7 +687,7 @@ tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 		 DEV_TX_OFFLOAD_TCP_CKSUM);
 }
 
-static void
+static int
 tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 {
 	unsigned int i, imax;
@@ -673,9 +696,9 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 	unsigned long rx_nombuf = 0, ierrors = 0;
 	const struct pmd_internals *pmd = dev->data->dev_private;
 
-	imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
-		pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
-
+	/* rx queue statistics */
+	imax = (dev->data->nb_rx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		dev->data->nb_rx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
 	for (i = 0; i < imax; i++) {
 		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
 		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
@@ -683,7 +706,13 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 		rx_bytes_total += tap_stats->q_ibytes[i];
 		rx_nombuf += pmd->rxq[i].stats.rx_nombuf;
 		ierrors += pmd->rxq[i].stats.ierrors;
+	}
 
+	/* tx queue statistics */
+	imax = (dev->data->nb_tx_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		dev->data->nb_tx_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
 		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
 		tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
 		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
@@ -699,6 +728,7 @@ tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
 	tap_stats->opackets = tx_total;
 	tap_stats->oerrors = tx_err_total;
 	tap_stats->obytes = tx_bytes_total;
+	return 0;
 }
 
 static void
@@ -707,7 +737,7 @@ tap_stats_reset(struct rte_eth_dev *dev)
 	int i;
 	struct pmd_internals *pmd = dev->data->dev_private;
 
-	for (i = 0; i < pmd->nb_queues; i++) {
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
 		pmd->rxq[i].stats.ipackets = 0;
 		pmd->rxq[i].stats.ibytes = 0;
 		pmd->rxq[i].stats.ierrors = 0;
@@ -729,11 +759,15 @@ tap_dev_close(struct rte_eth_dev *dev)
 	tap_flow_flush(dev, NULL);
 	tap_flow_implicit_flush(internals, NULL);
 
-	for (i = 0; i < internals->nb_queues; i++) {
-		if (internals->rxq[i].fd != -1)
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		if (internals->rxq[i].fd != -1) {
 			close(internals->rxq[i].fd);
-		internals->rxq[i].fd = -1;
-		internals->txq[i].fd = -1;
+			internals->rxq[i].fd = -1;
+		}
+		if (internals->txq[i].fd != -1) {
+			close(internals->txq[i].fd);
+			internals->txq[i].fd = -1;
+		}
 	}
 
 	if (internals->remote_if_index) {
@@ -887,30 +921,57 @@ tap_mac_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 static int
 tap_setup_queue(struct rte_eth_dev *dev,
 		struct pmd_internals *internals,
-		uint16_t qid)
+		uint16_t qid,
+		int is_rx)
 {
+	int *fd;
+	int *other_fd;
+	const char *dir;
 	struct pmd_internals *pmd = dev->data->dev_private;
 	struct rx_queue *rx = &internals->rxq[qid];
 	struct tx_queue *tx = &internals->txq[qid];
-	int fd = rx->fd == -1 ? tx->fd : rx->fd;
 
-	if (fd == -1) {
-		RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
-			pmd->name, qid);
-		fd = tun_alloc(pmd);
-		if (fd < 0) {
+	if (is_rx) {
+		fd = &rx->fd;
+		other_fd = &tx->fd;
+		dir = "rx";
+	} else {
+		fd = &tx->fd;
+		other_fd = &rx->fd;
+		dir = "tx";
+	}
+	if (*fd != -1) {
+		/* fd for this queue already exists */
+		RTE_LOG(DEBUG, PMD, "%s: fd %d for %s queue qid %d exists\n",
+			pmd->name, *fd, dir, qid);
+	} else if (*other_fd != -1) {
+		/* Only other_fd exists. dup it */
+		*fd = dup(*other_fd);
+		if (*fd < 0) {
+			*fd = -1;
+			RTE_LOG(ERR, PMD, "%s: dup() failed.\n",
+				pmd->name);
+			return -1;
+		}
+		RTE_LOG(DEBUG, PMD, "%s: dup fd %d for %s queue qid %d (%d)\n",
+			pmd->name, *other_fd, dir, qid, *fd);
+	} else {
+		/* Both RX and TX fds do not exist (equal -1). Create fd */
+		*fd = tun_alloc(pmd);
+		if (*fd < 0) {
+			*fd = -1; /* restore original value */
 			RTE_LOG(ERR, PMD, "%s: tun_alloc() failed.\n",
 				pmd->name);
 			return -1;
 		}
+		RTE_LOG(DEBUG, PMD, "%s: add %s queue for qid %d fd %d\n",
+			pmd->name, dir, qid, *fd);
 	}
 
-	rx->fd = fd;
-	tx->fd = fd;
 	tx->mtu = &dev->data->mtu;
 	rx->rxmode = &dev->data->dev_conf.rxmode;
 
-	return fd;
+	return *fd;
 }
 
 static int
@@ -932,10 +993,10 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
 	int fd;
 	int i;
 
-	if ((rx_queue_id >= internals->nb_queues) || !mp) {
+	if (rx_queue_id >= dev->data->nb_rx_queues || !mp) {
 		RTE_LOG(WARNING, PMD,
-			"nb_queues %d too small or mempool NULL\n",
-			internals->nb_queues);
+			"nb_rx_queues %d too small or mempool NULL\n",
+			dev->data->nb_rx_queues);
 		return -1;
 	}
 
@@ -954,7 +1015,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->iovecs = iovecs;
 
 	dev->data->rx_queues[rx_queue_id] = rxq;
-	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	fd = tap_setup_queue(dev, internals, rx_queue_id, 1);
 	if (fd == -1) {
 		ret = fd;
 		goto error;
@@ -1002,11 +1063,11 @@ tap_tx_queue_setup(struct rte_eth_dev *dev,
 	struct pmd_internals *internals = dev->data->dev_private;
 	int ret;
 
-	if (tx_queue_id >= internals->nb_queues)
+	if (tx_queue_id >= dev->data->nb_tx_queues)
 		return -1;
 
 	dev->data->tx_queues[tx_queue_id] = &internals->txq[tx_queue_id];
-	ret = tap_setup_queue(dev, internals, tx_queue_id);
+	ret = tap_setup_queue(dev, internals, tx_queue_id, 0);
 	if (ret == -1)
 		return -1;
 
@@ -1072,10 +1133,11 @@ tap_intr_handle_set(struct rte_eth_dev *dev, int set)
 
 	/* In any case, disable interrupt if the conf is no longer there. */
 	if (!dev->data->dev_conf.intr_conf.lsc) {
-		if (pmd->intr_handle.fd != -1)
+		if (pmd->intr_handle.fd != -1) {
 			nl_final(pmd->intr_handle.fd);
-		rte_intr_callback_unregister(
-			&pmd->intr_handle, tap_dev_intr_handler, dev);
+			rte_intr_callback_unregister(&pmd->intr_handle,
+				tap_dev_intr_handler, dev);
+		}
 		return 0;
 	}
 	if (set) {
@@ -1166,7 +1228,6 @@ static const struct eth_dev_ops ops = {
 	.filter_ctrl            = tap_dev_filter_ctrl,
 };
 
-
 static int
 eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name,
 		   char *remote_iface, int fixed_mac_type)
@@ -1193,8 +1254,8 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name,
 	}
 
 	pmd = dev->data->dev_private;
+	pmd->dev = dev;
 	snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
-	pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
 
 	pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
 	if (pmd->ioctl_sock == -1) {
@@ -1207,13 +1268,14 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name,
 	/* Setup some default values */
 	rte_memcpy(data, dev->data, sizeof(*data));
 	data->dev_private = pmd;
-	data->dev_flags = RTE_ETH_DEV_DETACHABLE | RTE_ETH_DEV_INTR_LSC;
+	data->dev_flags = RTE_ETH_DEV_INTR_LSC;
 	data->numa_node = numa_node;
 
 	data->dev_link = pmd_link;
 	data->mac_addrs = &pmd->eth_addr;
-	data->nb_rx_queues = pmd->nb_queues;
-	data->nb_tx_queues = pmd->nb_queues;
+	/* Set the number of RX and TX queues */
+	data->nb_rx_queues = 0;
+	data->nb_tx_queues = 0;
 
 	dev->data = data;
 	dev->dev_ops = &ops;
@@ -1241,7 +1303,11 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name,
 	}
 
 	/* Immediately create the netdevice (this will create the 1st queue). */
-	if (tap_setup_queue(dev, pmd, 0) == -1)
+	/* rx queue */
+	if (tap_setup_queue(dev, pmd, 0, 1) == -1)
+		goto error_exit;
+	/* tx queue */
+	if (tap_setup_queue(dev, pmd, 0, 0) == -1)
 		goto error_exit;
 
 	ifr.ifr_mtu = dev->data->mtu;
@@ -1515,9 +1581,16 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
 		tap_flow_implicit_flush(internals, NULL);
 		nl_final(internals->nlsk_fd);
 	}
-	for (i = 0; i < internals->nb_queues; i++)
-		if (internals->rxq[i].fd != -1)
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		if (internals->rxq[i].fd != -1) {
 			close(internals->rxq[i].fd);
+			internals->rxq[i].fd = -1;
+		}
+		if (internals->txq[i].fd != -1) {
+			close(internals->txq[i].fd);
+			internals->txq[i].fd = -1;
+		}
+	}
 
 	close(internals->ioctl_sock);
 	rte_free(eth_dev->data->dev_private);
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 928a0454..829f32f3 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -80,9 +80,9 @@ struct tx_queue {
 };
 
 struct pmd_internals {
+	struct rte_eth_dev *dev;          /* Ethernet device. */
 	char remote_iface[RTE_ETH_NAME_MAX_LEN]; /* Remote netdevice name */
 	char name[RTE_ETH_NAME_MAX_LEN];  /* Internal Tap device name */
-	uint16_t nb_queues;               /* Number of queues supported */
 	struct ether_addr eth_addr;       /* Mac address of the device port */
 	struct ifreq remote_initial_flags;   /* Remote netdevice flags on init */
 	int remote_if_index;              /* remote netdevice IF_INDEX */
diff --git a/drivers/net/tap/tap_flow.c b/drivers/net/tap/tap_flow.c
index 41f73452..ffc0b85b 100644
--- a/drivers/net/tap/tap_flow.c
+++ b/drivers/net/tap/tap_flow.c
@@ -1089,13 +1089,29 @@ priv_flow_process(struct pmd_internals *pmd,
 			const struct rte_flow_action_queue *queue =
 				(const struct rte_flow_action_queue *)
 				actions->conf;
+
 			if (action)
 				goto exit_action_not_supported;
 			action = 1;
-			if (!queue || (queue->index >= pmd->nb_queues))
+			if (!queue ||
+			    (queue->index > pmd->dev->data->nb_rx_queues - 1))
 				goto exit_action_not_supported;
 			if (flow)
 				err = add_action_skbedit(flow, queue->index);
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
+			/* Fake RSS support. */
+			const struct rte_flow_action_rss *rss =
+				(const struct rte_flow_action_rss *)
+				actions->conf;
+
+			if (action)
+				goto exit_action_not_supported;
+			action = 1;
+			if (!rss || rss->num < 1 ||
+			    (rss->queue[0] > pmd->dev->data->nb_rx_queues - 1))
+				goto exit_action_not_supported;
+			if (flow)
+				err = add_action_skbedit(flow, rss->queue[0]);
 		} else {
 			goto exit_action_not_supported;
 		}
@@ -1446,7 +1462,7 @@ tap_flow_isolate(struct rte_eth_dev *dev,
 	return 0;
 error:
 	pmd->flow_isolate = 0;
-	return -rte_flow_error_set(
+	return rte_flow_error_set(
 		error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 		"TC rule creation failed");
 }
diff --git a/drivers/net/thunderx/Makefile b/drivers/net/thunderx/Makefile
index 915ae945..e50e1ad8 100644
--- a/drivers/net/thunderx/Makefile
+++ b/drivers/net/thunderx/Makefile
@@ -40,12 +40,15 @@ LIB = librte_pmd_thunderx_nicvf.a
 CFLAGS += $(WERROR_FLAGS)
 
 LDLIBS += -lm
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 EXPORT_MAP := rte_pmd_thunderx_nicvf_version.map
 
 LIBABIVER := 1
 
-OBJS_BASE_DRIVER=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))
+OBJS_BASE_DRIVER=$(sort $(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))))
 $(foreach obj, $(OBJS_BASE_DRIVER), $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER)))
 
 VPATH += $(SRCDIR)/base
diff --git a/drivers/net/thunderx/base/nicvf_hw.c b/drivers/net/thunderx/base/nicvf_hw.c
index 2634285e..dc0af1ca 100644
--- a/drivers/net/thunderx/base/nicvf_hw.c
+++ b/drivers/net/thunderx/base/nicvf_hw.c
@@ -509,7 +509,7 @@ nicvf_qset_rbdr_precharge(void *dev, struct nicvf *nic,
 	struct rbdr_entry_t *desc, *desc0;
 	struct nicvf_rbdr *rbdr = nic->rbdr;
 	uint32_t count;
-	nicvf_phys_addr_t phy;
+	nicvf_iova_addr_t phy;
 
 	assert(rbdr != NULL);
 	desc = rbdr->desc;
diff --git a/drivers/net/thunderx/base/nicvf_hw.h b/drivers/net/thunderx/base/nicvf_hw.h
index b7d0a3dc..698aa487 100644
--- a/drivers/net/thunderx/base/nicvf_hw.h
+++ b/drivers/net/thunderx/base/nicvf_hw.h
@@ -88,7 +88,7 @@ enum nicvf_err_e {
 	NICVF_ERR_RSS_GET_SZ,    /* -8171 */
 };
 
-typedef nicvf_phys_addr_t (*rbdr_pool_get_handler)(void *dev, void *opaque);
+typedef nicvf_iova_addr_t (*rbdr_pool_get_handler)(void *dev, void *opaque);
 
 struct nicvf_hw_rx_qstats {
 	uint64_t q_rx_bytes;
diff --git a/drivers/net/thunderx/base/nicvf_hw_defs.h b/drivers/net/thunderx/base/nicvf_hw_defs.h
index 0fe673e6..e7e092b6 100644
--- a/drivers/net/thunderx/base/nicvf_hw_defs.h
+++ b/drivers/net/thunderx/base/nicvf_hw_defs.h
@@ -213,7 +213,7 @@
 #define NICVF_STATIC_ASSERT(s) _Static_assert(s, #s)
 #define assert_primary(nic) assert((nic)->sqs_mode == 0)
 
-typedef uint64_t nicvf_phys_addr_t;
+typedef uint64_t nicvf_iova_addr_t;
 
 /* vNIC HW Enumerations */
 
@@ -840,7 +840,7 @@ struct rbdr_entry_t {
 			uint64_t   buf_addr:42;
 			uint64_t   cache_align:7;
 		};
-		nicvf_phys_addr_t full_addr;
+		nicvf_iova_addr_t full_addr;
 	};
 #else
 	union {
@@ -849,7 +849,7 @@ struct rbdr_entry_t {
 			uint64_t   buf_addr:42;
 			uint64_t   rsvd0:15;
 		};
-		nicvf_phys_addr_t full_addr;
+		nicvf_iova_addr_t full_addr;
 	};
 #endif
 };
diff --git a/drivers/net/thunderx/nicvf_ethdev.c b/drivers/net/thunderx/nicvf_ethdev.c
index edc17f1d..d65d3cee 100644
--- a/drivers/net/thunderx/nicvf_ethdev.c
+++ b/drivers/net/thunderx/nicvf_ethdev.c
@@ -61,6 +61,7 @@
 #include <rte_malloc.h>
 #include <rte_random.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_tailq.h>
 
 #include "base/nicvf_plat.h"
@@ -242,7 +243,7 @@ nicvf_dev_get_regs(struct rte_eth_dev *dev, struct rte_dev_reg_info *regs)
 	return -ENOTSUP;
 }
 
-static void
+static int
 nicvf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	uint16_t qidx;
@@ -332,6 +333,8 @@ nicvf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	stats->opackets += port_stats.tx_bcast_frames_ok;
 	stats->opackets += port_stats.tx_mcast_frames_ok;
 	stats->oerrors = port_stats.tx_drops;
+
+	return 0;
 }
 
 static const uint32_t *
@@ -602,7 +605,7 @@ nicvf_qset_cq_alloc(struct rte_eth_dev *dev, struct nicvf *nic,
 
 	memset(rz->addr, 0, ring_size);
 
-	rxq->phys = rz->phys_addr;
+	rxq->phys = rz->iova;
 	rxq->desc = rz->addr;
 	rxq->qlen_mask = desc_cnt - 1;
 
@@ -626,7 +629,7 @@ nicvf_qset_sq_alloc(struct rte_eth_dev *dev, struct nicvf *nic,
 
 	memset(rz->addr, 0, ring_size);
 
-	sq->phys = rz->phys_addr;
+	sq->phys = rz->iova;
 	sq->desc = rz->addr;
 	sq->qlen_mask = desc_cnt - 1;
 
@@ -660,7 +663,7 @@ nicvf_qset_rbdr_alloc(struct rte_eth_dev *dev, struct nicvf *nic,
 
 	memset(rz->addr, 0, ring_size);
 
-	rbdr->phys = rz->phys_addr;
+	rbdr->phys = rz->iova;
 	rbdr->tail = 0;
 	rbdr->next_tail = 0;
 	rbdr->desc = rz->addr;
@@ -677,7 +680,7 @@ nicvf_qset_rbdr_alloc(struct rte_eth_dev *dev, struct nicvf *nic,
 
 static void
 nicvf_rbdr_release_mbuf(struct rte_eth_dev *dev, struct nicvf *nic,
-			nicvf_phys_addr_t phy)
+			nicvf_iova_addr_t phy)
 {
 	uint16_t qidx;
 	void *obj;
@@ -1380,6 +1383,13 @@ nicvf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 
 	dev_info->pci_dev = RTE_ETH_DEV_TO_PCI(dev);
 
+	/* Autonegotiation may be disabled */
+	dev_info->speed_capa = ETH_LINK_SPEED_FIXED;
+	dev_info->speed_capa |= ETH_LINK_SPEED_10M | ETH_LINK_SPEED_100M |
+				 ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G;
+	if (nicvf_hw_version(nic) != PCI_SUB_DEVICE_ID_CN81XX_NICVF)
+		dev_info->speed_capa |= ETH_LINK_SPEED_40G;
+
 	dev_info->min_rx_bufsize = ETHER_MIN_MTU;
 	dev_info->max_rx_pktlen = NIC_HW_MAX_FRS;
 	dev_info->max_rx_queues =
@@ -1419,7 +1429,7 @@ nicvf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	};
 }
 
-static nicvf_phys_addr_t
+static nicvf_iova_addr_t
 rbdr_rte_mempool_get(void *dev, void *opaque)
 {
 	uint16_t qidx;
diff --git a/drivers/net/thunderx/nicvf_ethdev.h b/drivers/net/thunderx/nicvf_ethdev.h
index 3734430f..71bc3cf2 100644
--- a/drivers/net/thunderx/nicvf_ethdev.h
+++ b/drivers/net/thunderx/nicvf_ethdev.h
@@ -78,7 +78,7 @@ nicvf_mempool_phy_offset(struct rte_mempool *mp)
 
 	hdr = STAILQ_FIRST(&mp->mem_list);
 	assert(hdr != NULL);
-	return (uint64_t)((uintptr_t)hdr->addr - hdr->phys_addr);
+	return (uint64_t)((uintptr_t)hdr->addr - hdr->iova);
 }
 
 static inline uint16_t
@@ -104,7 +104,7 @@ nicvf_netdev_qidx(struct nicvf *nic, uint8_t local_qidx)
  * P = V - offset
  */
 static inline uintptr_t
-nicvf_mbuff_phy2virt(phys_addr_t phy, uint64_t mbuf_phys_off)
+nicvf_mbuff_phy2virt(rte_iova_t phy, uint64_t mbuf_phys_off)
 {
 	return (uintptr_t)(phy + mbuf_phys_off);
 }
@@ -112,7 +112,7 @@ nicvf_mbuff_phy2virt(phys_addr_t phy, uint64_t mbuf_phys_off)
 static inline uintptr_t
 nicvf_mbuff_virt2phy(uintptr_t virt, uint64_t mbuf_phys_off)
 {
-	return (phys_addr_t)(virt - mbuf_phys_off);
+	return (rte_iova_t)(virt - mbuf_phys_off);
 }
 
 static inline void
diff --git a/drivers/net/thunderx/nicvf_rxtx.h b/drivers/net/thunderx/nicvf_rxtx.h
index cd1b754b..a3ccce29 100644
--- a/drivers/net/thunderx/nicvf_rxtx.h
+++ b/drivers/net/thunderx/nicvf_rxtx.h
@@ -60,7 +60,7 @@ fill_sq_desc_gather(union sq_entry_t *entry, struct rte_mbuf *pkt)
 	sqe.gather.subdesc_type = SQ_DESC_TYPE_GATHER;
 	sqe.gather.ld_type = NIC_SEND_LD_TYPE_E_LDT;
 	sqe.gather.size = pkt->data_len;
-	sqe.gather.addr = rte_mbuf_data_dma_addr(pkt);
+	sqe.gather.addr = rte_mbuf_data_iova(pkt);
 
 	entry->buff[0] = sqe.buff[0];
 	entry->buff[1] = sqe.buff[1];
@@ -80,7 +80,7 @@ fill_sq_desc_gather(union sq_entry_t *entry, struct rte_mbuf *pkt)
 	entry->buff[0] = (uint64_t)SQ_DESC_TYPE_GATHER << 60 |
 			 (uint64_t)NIC_SEND_LD_TYPE_E_LDT << 58 |
 			 pkt->data_len;
-	entry->buff[1] = rte_mbuf_data_dma_addr(pkt);
+	entry->buff[1] = rte_mbuf_data_iova(pkt);
 }
 #endif
 
diff --git a/drivers/net/thunderx/nicvf_struct.h b/drivers/net/thunderx/nicvf_struct.h
index 4ee6c3bb..0f8208ef 100644
--- a/drivers/net/thunderx/nicvf_struct.h
+++ b/drivers/net/thunderx/nicvf_struct.h
@@ -46,7 +46,7 @@ struct nicvf_rbdr {
 	uintptr_t rbdr_status;
 	uintptr_t rbdr_door;
 	struct rbdr_entry_t *desc;
-	nicvf_phys_addr_t phys;
+	nicvf_iova_addr_t phys;
 	uint32_t buffsz;
 	uint32_t tail;
 	uint32_t next_tail;
@@ -56,7 +56,7 @@ struct nicvf_rbdr {
 
 struct nicvf_txq {
 	union sq_entry_t *desc;
-	nicvf_phys_addr_t phys;
+	nicvf_iova_addr_t phys;
 	struct rte_mbuf **txbuffs;
 	uintptr_t sq_head;
 	uintptr_t sq_door;
@@ -87,7 +87,7 @@ struct nicvf_rxq {
 	uintptr_t cq_status;
 	uintptr_t cq_door;
 	union mbuf_initializer mbuf_initializer;
-	nicvf_phys_addr_t phys;
+	nicvf_iova_addr_t phys;
 	union cq_entry_t *desc;
 	struct nicvf_rbdr *shared_rbdr;
 	struct nicvf *nic;
@@ -100,7 +100,7 @@ struct nicvf_rxq {
 	uint16_t queue_id;
 	uint16_t precharge_cnt;
 	uint8_t rx_drop_en;
-	uint8_t  port_id;
+	uint16_t port_id;
 	uint8_t  rbptr_offset;
 } __rte_cache_aligned;
 
diff --git a/drivers/net/vhost/Makefile b/drivers/net/vhost/Makefile
index 3ba8ad64..c411745b 100644
--- a/drivers/net/vhost/Makefile
+++ b/drivers/net/vhost/Makefile
@@ -37,13 +37,16 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_vhost.a
 
 LDLIBS += -lpthread
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_vhost
+LDLIBS += -lrte_bus_vdev
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
 EXPORT_MAP := rte_pmd_vhost_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 #
 # all source are stored in SRCS-y
diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 0dac5e60..2536ee4a 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -39,7 +39,7 @@
 #include <rte_ethdev_vdev.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 #include <rte_kvargs.h>
 #include <rte_vhost.h>
 #include <rte_spinlock.h>
@@ -52,6 +52,7 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 #define ETH_VHOST_QUEUES_ARG		"queues"
 #define ETH_VHOST_CLIENT_ARG		"client"
 #define ETH_VHOST_DEQUEUE_ZERO_COPY	"dequeue-zero-copy"
+#define ETH_VHOST_IOMMU_SUPPORT		"iommu-support"
 #define VHOST_MAX_PKT_BURST 32
 
 static const char *valid_arguments[] = {
@@ -59,6 +60,7 @@ static const char *valid_arguments[] = {
 	ETH_VHOST_QUEUES_ARG,
 	ETH_VHOST_CLIENT_ARG,
 	ETH_VHOST_DEQUEUE_ZERO_COPY,
+	ETH_VHOST_IOMMU_SUPPORT,
 	NULL
 };
 
@@ -105,7 +107,7 @@ struct vhost_queue {
 	rte_atomic32_t while_queuing;
 	struct pmd_internal *internal;
 	struct rte_mempool *mb_pool;
-	uint8_t port;
+	uint16_t port;
 	uint16_t virtqueue_id;
 	struct vhost_stats stats;
 };
@@ -705,7 +707,7 @@ static struct vhost_device_ops vhost_ops = {
 };
 
 int
-rte_eth_vhost_get_queue_event(uint8_t port_id,
+rte_eth_vhost_get_queue_event(uint16_t port_id,
 		struct rte_eth_vhost_queue_event *event)
 {
 	struct rte_vhost_vring_state *state;
@@ -742,7 +744,7 @@ rte_eth_vhost_get_queue_event(uint8_t port_id,
 }
 
 int
-rte_eth_vhost_get_vid_from_port_id(uint8_t port_id)
+rte_eth_vhost_get_vid_from_port_id(uint16_t port_id)
 {
 	struct internal_list *list;
 	struct rte_eth_dev *eth_dev;
@@ -890,7 +892,7 @@ eth_dev_info(struct rte_eth_dev *dev,
 	dev_info->min_rx_bufsize = 0;
 }
 
-static void
+static int
 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned i;
@@ -928,6 +930,8 @@ eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	stats->oerrors = tx_missed_total;
 	stats->ibytes = rx_total_bytes;
 	stats->obytes = tx_total_bytes;
+
+	return 0;
 }
 
 static void
@@ -1084,8 +1088,7 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
 	internal->max_queues = queues;
 	data->dev_link = pmd_link;
 	data->mac_addrs = eth_addr;
-	data->dev_flags =
-		RTE_ETH_DEV_DETACHABLE | RTE_ETH_DEV_INTR_LSC;
+	data->dev_flags = RTE_ETH_DEV_INTR_LSC;
 
 	eth_dev->dev_ops = &ops;
 
@@ -1163,6 +1166,7 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
 	uint64_t flags = 0;
 	int client_mode = 0;
 	int dequeue_zero_copy = 0;
+	int iommu_support = 0;
 
 	RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n",
 		rte_vdev_device_name(dev));
@@ -1210,6 +1214,16 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
 			flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
 	}
 
+	if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT,
+					 &open_int, &iommu_support);
+		if (ret < 0)
+			goto out_free;
+
+		if (iommu_support)
+			flags |= RTE_VHOST_USER_IOMMU_SUPPORT;
+	}
+
 	if (dev->device.numa_node == SOCKET_ID_ANY)
 		dev->device.numa_node = rte_socket_id();
 
diff --git a/drivers/net/vhost/rte_eth_vhost.h b/drivers/net/vhost/rte_eth_vhost.h
index 39ca7719..948f3c81 100644
--- a/drivers/net/vhost/rte_eth_vhost.h
+++ b/drivers/net/vhost/rte_eth_vhost.h
@@ -69,7 +69,7 @@ struct rte_eth_vhost_queue_event {
  *  - On success, zero.
  *  - On failure, a negative value.
  */
-int rte_eth_vhost_get_queue_event(uint8_t port_id,
+int rte_eth_vhost_get_queue_event(uint16_t port_id,
 		struct rte_eth_vhost_queue_event *event);
 
 /**
@@ -79,7 +79,7 @@ int rte_eth_vhost_get_queue_event(uint8_t port_id,
  *  - On success, the 'vid' associated with 'port_id'.
  *  - On failure, a negative value.
  */
-int rte_eth_vhost_get_vid_from_port_id(uint8_t port_id);
+int rte_eth_vhost_get_vid_from_port_id(uint16_t port_id);
 
 #ifdef __cplusplus
 }
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index b21b8781..f2b5d1c3 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -38,6 +38,12 @@ LIB = librte_pmd_virtio.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
+ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
+LDLIBS += -lrte_bus_vdev
+endif
 
 EXPORT_MAP := rte_pmd_virtio_version.map
 
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index e320811e..d2576d5e 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -46,9 +46,11 @@
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_ether.h>
 #include <rte_common.h>
 #include <rte_errno.h>
+#include <rte_cpuflags.h>
 
 #include <rte_memory.h>
 #include <rte_eal.h>
@@ -72,11 +74,12 @@ static void virtio_dev_info_get(struct rte_eth_dev *dev,
 				struct rte_eth_dev_info *dev_info);
 static int virtio_dev_link_update(struct rte_eth_dev *dev,
 	int wait_to_complete);
+static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 
 static void virtio_set_hwaddr(struct virtio_hw *hw);
 static void virtio_get_hwaddr(struct virtio_hw *hw);
 
-static void virtio_dev_stats_get(struct rte_eth_dev *dev,
+static int virtio_dev_stats_get(struct rte_eth_dev *dev,
 				 struct rte_eth_stats *stats);
 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
 				 struct rte_eth_xstat *xstats, unsigned n);
@@ -162,7 +165,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 	uint32_t head, i;
 	int k, sum = 0;
 	virtio_net_ctrl_ack status = ~0;
-	struct virtio_pmd_ctrl result;
+	struct virtio_pmd_ctrl *result;
 	struct virtqueue *vq;
 
 	ctrl->status = status;
@@ -253,10 +256,9 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
 			vq->vq_free_cnt, vq->vq_desc_head_idx);
 
-	memcpy(&result, cvq->virtio_net_hdr_mz->addr,
-			sizeof(struct virtio_pmd_ctrl));
+	result = cvq->virtio_net_hdr_mz->addr;
 
-	return result.status;
+	return result->status;
 }
 
 static int
@@ -426,10 +428,10 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
 
 	memset(mz->addr, 0, mz->len);
 
-	vq->vq_ring_mem = mz->phys_addr;
+	vq->vq_ring_mem = mz->iova;
 	vq->vq_ring_virt_mem = mz->addr;
 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%" PRIx64,
-		     (uint64_t)mz->phys_addr);
+		     (uint64_t)mz->iova);
 	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
 		     (uint64_t)(uintptr_t)mz->addr);
 
@@ -474,13 +476,13 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
 		txvq->port_id = dev->data->port_id;
 		txvq->mz = mz;
 		txvq->virtio_net_hdr_mz = hdr_mz;
-		txvq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+		txvq->virtio_net_hdr_mem = hdr_mz->iova;
 	} else if (queue_type == VTNET_CQ) {
 		cvq = &vq->cq;
 		cvq->vq = vq;
 		cvq->mz = mz;
 		cvq->virtio_net_hdr_mz = hdr_mz;
-		cvq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+		cvq->virtio_net_hdr_mem = hdr_mz->iova;
 		memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
 
 		hw->cvq = cvq;
@@ -491,7 +493,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
 	 * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information.
 	 */
 	if (!hw->virtio_user_dev)
-		vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
+		vq->offset = offsetof(struct rte_mbuf, buf_iova);
 	else {
 		vq->vq_ring_mem = (uintptr_t)mz->addr;
 		vq->offset = offsetof(struct rte_mbuf, buf_addr);
@@ -779,6 +781,7 @@ static const struct eth_dev_ops virtio_eth_dev_ops = {
 	.stats_reset             = virtio_dev_stats_reset,
 	.xstats_reset            = virtio_dev_stats_reset,
 	.link_update             = virtio_dev_link_update,
+	.vlan_offload_set        = virtio_dev_vlan_offload_set,
 	.rx_queue_setup          = virtio_dev_rx_queue_setup,
 	.rx_queue_intr_enable    = virtio_dev_rx_queue_intr_enable,
 	.rx_queue_intr_disable   = virtio_dev_rx_queue_intr_disable,
@@ -964,10 +967,12 @@ virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 	return count;
 }
 
-static void
+static int
 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	virtio_update_stats(dev, stats);
+
+	return 0;
 }
 
 static void
@@ -1235,14 +1240,36 @@ virtio_interrupt_handler(void *param)
 
 }
 
+/* set rx and tx handlers according to what is supported */
 static void
-rx_func_get(struct rte_eth_dev *eth_dev)
+set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 {
 	struct virtio_hw *hw = eth_dev->data->dev_private;
-	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
+
+	if (hw->use_simple_rx) {
+		PMD_INIT_LOG(INFO, "virtio: using simple Rx path on port %u",
+			eth_dev->data->port_id);
+		eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
+	} else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+		PMD_INIT_LOG(INFO,
+			"virtio: using mergeable buffer Rx path on port %u",
+			eth_dev->data->port_id);
 		eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
-	else
+	} else {
+		PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
+			eth_dev->data->port_id);
 		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
+	}
+
+	if (hw->use_simple_tx) {
+		PMD_INIT_LOG(INFO, "virtio: using simple Tx path on port %u",
+			eth_dev->data->port_id);
+		eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+	} else {
+		PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
+			eth_dev->data->port_id);
+		eth_dev->tx_pkt_burst = virtio_xmit_pkts;
+	}
 }
 
 /* Only support 1:1 queue/interrupt mapping so far.
@@ -1360,15 +1387,12 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
 		rte_eth_copy_pci_info(eth_dev, pci_dev);
 	}
 
-	eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 	/* If host does not support both status and MSI-X then disable LSC */
 	if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->use_msix)
 		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
 	else
 		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
 
-	rx_func_get(eth_dev);
-
 	/* Setting up rx_header size for the device */
 	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
 	    vtpci_with_feature(hw, VIRTIO_F_VERSION_1))
@@ -1534,7 +1558,6 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
 
 	eth_dev->dev_ops = &virtio_eth_dev_ops;
-	eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
 
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
 		if (!hw->virtio_user_dev) {
@@ -1544,12 +1567,8 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 		}
 
 		virtio_set_vtpci_ops(hw);
-		if (hw->use_simple_rxtx) {
-			eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
-			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
-		} else {
-			rx_func_get(eth_dev);
-		}
+		set_rxtx_funcs(eth_dev);
+
 		return 0;
 	}
 
@@ -1659,9 +1678,11 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 {
 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	struct virtio_hw *hw = dev->data->dev_private;
+	uint64_t req_features;
 	int ret;
 
 	PMD_INIT_LOG(DEBUG, "configure");
+	req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
 
 	if (dev->data->dev_conf.intr_conf.rxq) {
 		ret = virtio_init_device(dev, hw->req_guest_features);
@@ -1669,16 +1690,37 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 			return ret;
 	}
 
-	/* Virtio does L4 checksum but not L3! */
-	if (rxmode->hw_ip_checksum) {
-		PMD_DRV_LOG(NOTICE,
-			    "virtio does not support IP checksum");
+	/* The name hw_ip_checksum is a bit confusing since it can be
+	 * set by the application to request L3 and/or L4 checksums. In
+	 * case of virtio, only L4 checksum is supported.
+	 */
+	if (rxmode->hw_ip_checksum)
+		req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
+
+	if (rxmode->enable_lro)
+		req_features |=
+			(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
+			(1ULL << VIRTIO_NET_F_GUEST_TSO6);
+
+	/* if request features changed, reinit the device */
+	if (req_features != hw->req_guest_features) {
+		ret = virtio_init_device(dev, req_features);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (rxmode->hw_ip_checksum &&
+		!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
+		PMD_DRV_LOG(ERR,
+			"rx checksum not available on this host");
 		return -ENOTSUP;
 	}
 
-	if (rxmode->enable_lro) {
-		PMD_DRV_LOG(NOTICE,
-			    "virtio does not support Large Receive Offload");
+	if (rxmode->enable_lro &&
+		(!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
+			!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4))) {
+		PMD_DRV_LOG(ERR,
+			"Large Receive Offload not available on this host");
 		return -ENOTSUP;
 	}
 
@@ -1690,7 +1732,7 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 
 	if (rxmode->hw_vlan_filter
 	    && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
-		PMD_DRV_LOG(NOTICE,
+		PMD_DRV_LOG(ERR,
 			    "vlan filtering not available on this host");
 		return -ENOTSUP;
 	}
@@ -1703,6 +1745,23 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 			return -EBUSY;
 		}
 
+	hw->use_simple_rx = 1;
+	hw->use_simple_tx = 1;
+
+#if defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
+	if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
+		hw->use_simple_rx = 0;
+		hw->use_simple_tx = 0;
+	}
+#endif
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+		hw->use_simple_rx = 0;
+		hw->use_simple_tx = 0;
+	}
+
+	if (rxmode->hw_ip_checksum)
+		hw->use_simple_rx = 0;
+
 	return 0;
 }
 
@@ -1714,6 +1773,19 @@ virtio_dev_start(struct rte_eth_dev *dev)
 	struct virtnet_rx *rxvq;
 	struct virtnet_tx *txvq __rte_unused;
 	struct virtio_hw *hw = dev->data->dev_private;
+	int ret;
+
+	/* Finish the initialization of the queues */
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		ret = virtio_dev_rx_queue_setup_finish(dev, i);
+		if (ret < 0)
+			return ret;
+	}
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		ret = virtio_dev_tx_queue_setup_finish(dev, i);
+		if (ret < 0)
+			return ret;
+	}
 
 	/* check if lsc interrupt feature is enabled */
 	if (dev->data->dev_conf.intr_conf.lsc) {
@@ -1751,9 +1823,16 @@ virtio_dev_start(struct rte_eth_dev *dev)
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
 		rxvq = dev->data->rx_queues[i];
+		/* Flush the old packets */
+		virtqueue_flush(rxvq->vq);
 		virtqueue_notify(rxvq->vq);
 	}
 
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		txvq = dev->data->tx_queues[i];
+		virtqueue_notify(txvq->vq);
+	}
+
 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -1766,6 +1845,7 @@ virtio_dev_start(struct rte_eth_dev *dev)
 		VIRTQUEUE_DUMP(txvq->vq);
 	}
 
+	set_rxtx_funcs(dev);
 	hw->started = 1;
 
 	/* Initialize Link state */
@@ -1875,6 +1955,29 @@ virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complet
 	return (old.link_status == link.link_status) ? -1 : 0;
 }
 
+static int
+virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
+{
+	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	if (mask & ETH_VLAN_FILTER_MASK) {
+		if (rxmode->hw_vlan_filter &&
+				!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
+
+			PMD_DRV_LOG(NOTICE,
+				"vlan filtering not available on this host");
+
+			return -ENOTSUP;
+		}
+	}
+
+	if (mask & ETH_VLAN_STRIP_MASK)
+		hw->vlan_strip = rxmode->hw_vlan_strip;
+
+	return 0;
+}
+
 static void
 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
@@ -1904,6 +2007,8 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 	}
 	tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 		(1ULL << VIRTIO_NET_F_GUEST_TSO6);
+	if ((host_features & tso_mask) == tso_mask)
+		dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
 
 	dev_info->tx_offload_capa = 0;
 	if (hw->guest_features & (1ULL << VIRTIO_NET_F_CSUM)) {
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index c3413c6d..2039bc54 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -92,10 +92,16 @@ int  virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
 		const struct rte_eth_rxconf *rx_conf,
 		struct rte_mempool *mb_pool);
 
+int virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev,
+				uint16_t rx_queue_id);
+
 int  virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 		uint16_t nb_tx_desc, unsigned int socket_id,
 		const struct rte_eth_txconf *tx_conf);
 
+int virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
+				uint16_t tx_queue_id);
+
 uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts);
 
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index e6da6802..55b717c0 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -553,7 +553,7 @@ get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
 	uint32_t offset = cap->offset;
 	uint8_t *base;
 
-	if (bar > 5) {
+	if (bar >= PCI_MAX_RESOURCE) {
 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
 		return NULL;
 	}
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 18caebdd..36d452c0 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -37,6 +37,7 @@
 #include <stdint.h>
 
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_ethdev.h>
 
 struct virtqueue;
@@ -259,8 +260,9 @@ struct virtio_hw {
 	uint8_t	    vlan_strip;
 	uint8_t	    use_msix;
 	uint8_t     modern;
-	uint8_t     use_simple_rxtx;
-	uint8_t     port_id;
+	uint8_t     use_simple_rx;
+	uint8_t     use_simple_tx;
+	uint16_t    port_id;
 	uint8_t     mac_addr[ETHER_ADDR_LEN];
 	uint32_t    notify_off_multiplier;
 	uint8_t     *isr;
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index e30377c5..390c137c 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -39,7 +39,6 @@
 
 #include <rte_cycles.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_branch_prediction.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
@@ -50,7 +49,6 @@
 #include <rte_string_fns.h>
 #include <rte_errno.h>
 #include <rte_byteorder.h>
-#include <rte_cpuflags.h>
 #include <rte_net.h>
 #include <rte_ip.h>
 #include <rte_udp.h>
@@ -81,7 +79,7 @@ virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
 	return VIRTQUEUE_NUSED(vq) >= offset;
 }
 
-static void
+void
 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 {
 	struct vring_desc *dp, *dp_tail;
@@ -300,6 +298,10 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 		/* prepend cannot fail, checked by caller */
 		hdr = (struct virtio_net_hdr *)
 			rte_pktmbuf_prepend(cookie, head_size);
+		/* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
+		 * which is wrong. Below subtract restores correct pkt size.
+		 */
+		cookie->pkt_len -= head_size;
 		/* if offload disabled, it is not zeroed below, do it now */
 		if (offload == 0) {
 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
@@ -421,9 +423,6 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	struct virtio_hw *hw = dev->data->dev_private;
 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
 	struct virtnet_rx *rxvq;
-	int error, nbufs;
-	struct rte_mbuf *m;
-	uint16_t desc_idx;
 
 	PMD_INIT_FUNC_TRACE();
 
@@ -440,12 +439,26 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	}
 	dev->data->rx_queues[queue_idx] = rxvq;
 
+	return 0;
+}
+
+int
+virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
+{
+	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
+	struct virtnet_rx *rxvq = &vq->rxq;
+	struct rte_mbuf *m;
+	uint16_t desc_idx;
+	int error, nbufs;
+
+	PMD_INIT_FUNC_TRACE();
 
 	/* Allocate blank mbufs for the each rx descriptor */
 	nbufs = 0;
-	error = ENOSPC;
 
-	if (hw->use_simple_rxtx) {
+	if (hw->use_simple_rx) {
 		for (desc_idx = 0; desc_idx < vq->vq_nentries;
 		     desc_idx++) {
 			vq->vq_ring.avail->ring[desc_idx] = desc_idx;
@@ -467,7 +480,7 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			break;
 
 		/* Enqueue allocated buffers */
-		if (hw->use_simple_rxtx)
+		if (hw->use_simple_rx)
 			error = virtqueue_enqueue_recv_refill_simple(vq, m);
 		else
 			error = virtqueue_enqueue_recv_refill(vq, m);
@@ -490,31 +503,6 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	return 0;
 }
 
-static void
-virtio_update_rxtx_handler(struct rte_eth_dev *dev,
-			   const struct rte_eth_txconf *tx_conf)
-{
-	uint8_t use_simple_rxtx = 0;
-	struct virtio_hw *hw = dev->data->dev_private;
-
-#if defined RTE_ARCH_X86
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3))
-		use_simple_rxtx = 1;
-#elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
-		use_simple_rxtx = 1;
-#endif
-	/* Use simple rx/tx func if single segment and no offloads */
-	if (use_simple_rxtx &&
-	    (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS &&
-	    !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
-		PMD_INIT_LOG(INFO, "Using simple rx/tx path");
-		dev->tx_pkt_burst = virtio_xmit_pkts_simple;
-		dev->rx_pkt_burst = virtio_recv_pkts_vec;
-		hw->use_simple_rxtx = use_simple_rxtx;
-	}
-}
-
 /*
  * struct rte_eth_dev *dev: Used to update dev
  * uint16_t nb_desc: Defaults to values read from config space
@@ -534,11 +522,12 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
 	struct virtnet_tx *txvq;
 	uint16_t tx_free_thresh;
-	uint16_t desc_idx;
 
 	PMD_INIT_FUNC_TRACE();
 
-	virtio_update_rxtx_handler(dev, tx_conf);
+	/* cannot use simple rxtx funcs with multisegs or offloads */
+	if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) != VIRTIO_SIMPLE_FLAGS)
+		hw->use_simple_tx = 0;
 
 	if (nb_desc == 0 || nb_desc > vq->vq_nentries)
 		nb_desc = vq->vq_nentries;
@@ -563,9 +552,24 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
 	vq->vq_free_thresh = tx_free_thresh;
 
-	if (hw->use_simple_rxtx) {
-		uint16_t mid_idx  = vq->vq_nentries >> 1;
+	dev->data->tx_queues[queue_idx] = txvq;
+	return 0;
+}
 
+int
+virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
+				uint16_t queue_idx)
+{
+	uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
+	struct virtio_hw *hw = dev->data->dev_private;
+	struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
+	uint16_t mid_idx = vq->vq_nentries >> 1;
+	struct virtnet_tx *txvq = &vq->txq;
+	uint16_t desc_idx;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (hw->use_simple_tx) {
 		for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
 			vq->vq_ring.avail->ring[desc_idx] =
 				desc_idx + mid_idx;
@@ -587,7 +591,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
 	VIRTQUEUE_DUMP(vq);
 
-	dev->data->tx_queues[queue_idx] = txvq;
 	return 0;
 }
 
@@ -670,7 +673,7 @@ virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
 			 * In case of SCTP, this will be wrong since it's a CRC
 			 * but there's nothing we can do.
 			 */
-			uint16_t csum, off;
+			uint16_t csum = 0, off;
 
 			rte_raw_cksum_mbuf(m, hdr->csum_start,
 				rte_pktmbuf_pkt_len(m) - hdr->csum_start,
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index 28f82d6a..54f1e849 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -54,7 +54,7 @@ struct virtnet_rx {
 	struct rte_mempool *mpool; /**< mempool for mbuf allocation */
 
 	uint16_t queue_id;   /**< DPDK queue index. */
-	uint8_t port_id;     /**< Device port identifier. */
+	uint16_t port_id;     /**< Device port identifier. */
 
 	/* Statistics */
 	struct virtnet_stats stats;
@@ -66,10 +66,10 @@ struct virtnet_tx {
 	struct virtqueue *vq;
 	/**< memzone to populate hdr. */
 	const struct rte_memzone *virtio_net_hdr_mz;
-	phys_addr_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
+	rte_iova_t virtio_net_hdr_mem;   /**< hdr for each xmit packet */
 
 	uint16_t    queue_id;            /**< DPDK queue index. */
-	uint8_t     port_id;             /**< Device port identifier. */
+	uint16_t    port_id;             /**< Device port identifier. */
 
 	/* Statistics */
 	struct virtnet_stats stats;
@@ -81,9 +81,9 @@ struct virtnet_ctl {
 	struct virtqueue *vq;
 	/**< memzone to populate hdr. */
 	const struct rte_memzone *virtio_net_hdr_mz;
-	phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
-	uint8_t port_id;                /**< Device port identifier. */
-	const struct rte_memzone *mz;   /**< mem zone to populate RX ring. */
+	rte_iova_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
+	uint16_t port_id;               /**< Device port identifier. */
+	const struct rte_memzone *mz;   /**< mem zone to populate CTL ring. */
 };
 
 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 542cf805..b5bc1c49 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -39,7 +39,6 @@
 
 #include <rte_cycles.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_branch_prediction.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
@@ -65,6 +64,8 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
 	struct vring_desc *start_dp;
 	uint16_t desc_idx;
 
+	cookie->port = vq->rxq.port_id;
+
 	desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
 	dxp = &vq->vq_descx[desc_idx];
 	dxp->cookie = (void *)cookie;
diff --git a/drivers/net/virtio/virtio_rxtx_simple_neon.c b/drivers/net/virtio/virtio_rxtx_simple_neon.c
index 6b40c7f7..b8b93551 100644
--- a/drivers/net/virtio/virtio_rxtx_simple_neon.c
+++ b/drivers/net/virtio/virtio_rxtx_simple_neon.c
@@ -43,7 +43,6 @@
 #include <rte_ethdev.h>
 #include <rte_errno.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
diff --git a/drivers/net/virtio/virtio_rxtx_simple_sse.c b/drivers/net/virtio/virtio_rxtx_simple_sse.c
index 7cf0f8b8..94f65143 100644
--- a/drivers/net/virtio/virtio_rxtx_simple_sse.c
+++ b/drivers/net/virtio/virtio_rxtx_simple_sse.c
@@ -46,7 +46,6 @@
 #include <rte_ethdev.h>
 #include <rte_errno.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
diff --git a/drivers/net/virtio/virtio_user/vhost_kernel_tap.c b/drivers/net/virtio/virtio_user/vhost_kernel_tap.c
index f585de8c..689a5cff 100644
--- a/drivers/net/virtio/virtio_user/vhost_kernel_tap.c
+++ b/drivers/net/virtio/virtio_user/vhost_kernel_tap.c
@@ -95,9 +95,9 @@ vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq)
 		ifr.ifr_flags |= IFF_MULTI_QUEUE;
 
 	if (*p_ifname)
-		strncpy(ifr.ifr_name, *p_ifname, IFNAMSIZ);
+		strncpy(ifr.ifr_name, *p_ifname, IFNAMSIZ - 1);
 	else
-		strncpy(ifr.ifr_name, "tap%d", IFNAMSIZ);
+		strncpy(ifr.ifr_name, "tap%d", IFNAMSIZ - 1);
 	if (ioctl(tapfd, TUNSETIFF, (void *)&ifr) == -1) {
 		PMD_DRV_LOG(ERR, "TUNSETIFF failed: %s", strerror(errno));
 		goto error;
diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
index 4ad7b21b..97bd8326 100644
--- a/drivers/net/virtio/virtio_user/vhost_user.c
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -130,6 +130,10 @@ vhost_user_read(int fd, struct vhost_user_msg *msg)
 	}
 
 	sz_payload = msg->size;
+
+	if ((size_t)sz_payload > sizeof(msg->payload))
+		goto fail;
+
 	if (sz_payload) {
 		ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0);
 		if (ret < sz_payload) {
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index 79412714..906d7a2b 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -270,6 +270,8 @@ virtio_user_fill_intr_handle(struct virtio_user_dev *dev)
 	eth_dev->intr_handle->nb_efd = dev->max_queue_pairs;
 	eth_dev->intr_handle->max_intr = dev->max_queue_pairs + 1;
 	eth_dev->intr_handle->type = RTE_INTR_HANDLE_VDEV;
+	/* For virtio vdev, no need to read counter for clean */
+	eth_dev->intr_handle->efd_counter_size = 0;
 	if (dev->vhostfd >= 0)
 		eth_dev->intr_handle->fd = dev->vhostfd;
 
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index c9614443..7be57ce6 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -40,7 +40,7 @@
 #include <rte_malloc.h>
 #include <rte_kvargs.h>
 #include <rte_ethdev_vdev.h>
-#include <rte_vdev.h>
+#include <rte_bus_vdev.h>
 #include <rte_alarm.h>
 
 #include "virtio_ethdev.h"
@@ -86,7 +86,11 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
 			int flags;
 
 			flags = fcntl(dev->vhostfd, F_GETFL);
-			fcntl(dev->vhostfd, F_SETFL, flags | O_NONBLOCK);
+			if (fcntl(dev->vhostfd, F_SETFL,
+					flags | O_NONBLOCK) == -1) {
+				PMD_DRV_LOG(ERR, "error setting O_NONBLOCK flag");
+				return;
+			}
 			r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
 			if (r == 0 || (r < 0 && errno != EAGAIN)) {
 				dev->status &= (~VIRTIO_NET_S_LINK_UP);
@@ -369,9 +373,9 @@ virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev)
 	 */
 	hw->use_msix = 1;
 	hw->modern   = 0;
-	hw->use_simple_rxtx = 0;
+	hw->use_simple_rx = 0;
+	hw->use_simple_tx = 0;
 	hw->virtio_user_dev = dev;
-	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
 	return eth_dev;
 }
 
diff --git a/drivers/net/virtio/virtqueue.c b/drivers/net/virtio/virtqueue.c
index 9ad77b8a..c3a536f8 100644
--- a/drivers/net/virtio/virtqueue.c
+++ b/drivers/net/virtio/virtqueue.c
@@ -59,3 +59,28 @@ virtqueue_detatch_unused(struct virtqueue *vq)
 		}
 	return NULL;
 }
+
+/* Flush the elements in the used ring. */
+void
+virtqueue_flush(struct virtqueue *vq)
+{
+	struct vring_used_elem *uep;
+	struct vq_desc_extra *dxp;
+	uint16_t used_idx, desc_idx;
+	uint16_t nb_used, i;
+
+	nb_used = VIRTQUEUE_NUSED(vq);
+
+	for (i = 0; i < nb_used; i++) {
+		used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
+		uep = &vq->vq_ring.used->ring[used_idx];
+		desc_idx = (uint16_t)uep->id;
+		dxp = &vq->vq_descx[desc_idx];
+		if (dxp->cookie != NULL) {
+			rte_pktmbuf_free(dxp->cookie);
+			dxp->cookie = NULL;
+		}
+		vq->vq_used_cons_idx++;
+		vq_ring_free_chain(vq, desc_idx);
+	}
+}
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 2e120861..2305d91a 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -38,7 +38,6 @@
 
 #include <rte_atomic.h>
 #include <rte_memory.h>
-#include <rte_memzone.h>
 #include <rte_mempool.h>
 
 #include "virtio_pci.h"
@@ -80,7 +79,7 @@ struct rte_mbuf;
 #define VIRTIO_MBUF_ADDR(mb, vq) \
 	((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->offset)))
 #else
-#define VIRTIO_MBUF_ADDR(mb, vq) ((mb)->buf_physaddr)
+#define VIRTIO_MBUF_ADDR(mb, vq) ((mb)->buf_iova)
 #endif
 
 /**
@@ -143,8 +142,8 @@ struct virtio_net_ctrl_mac {
 } __attribute__((__packed__));
 
 #define VIRTIO_NET_CTRL_MAC    1
- #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
- #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
+#define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
+#define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
 
 /**
  * Control VLAN filtering
@@ -204,8 +203,8 @@ struct virtqueue {
 		struct virtnet_ctl cq;
 	};
 
-	phys_addr_t vq_ring_mem; /**< physical address of vring,
-				  * or virtual address for virtio_user. */
+	rte_iova_t vq_ring_mem; /**< physical address of vring,
+	                         * or virtual address for virtio_user. */
 
 	/**
 	 * Head of the free chain in the descriptor table. If
@@ -304,6 +303,9 @@ void virtqueue_dump(struct virtqueue *vq);
  */
 struct rte_mbuf *virtqueue_detatch_unused(struct virtqueue *vq);
 
+/* Flush the elements in the used ring. */
+void virtqueue_flush(struct virtqueue *vq);
+
 static inline int
 virtqueue_full(const struct virtqueue *vq)
 {
@@ -312,6 +314,8 @@ virtqueue_full(const struct virtqueue *vq)
 
 #define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx))
 
+void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx);
+
 static inline void
 vq_update_avail_idx(struct virtqueue *vq)
 {
diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile
index 84356ae2..f09de96e 100644
--- a/drivers/net/vmxnet3/Makefile
+++ b/drivers/net/vmxnet3/Makefile
@@ -63,6 +63,9 @@ CFLAGS_BASE_DRIVER = -Wno-unused-parameter -Wno-unused-value
 CFLAGS_BASE_DRIVER += -Wno-strict-aliasing -Wno-format-extra-args
 
 endif
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
 
 VPATH += $(SRCDIR)/base
 
diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index 39109919..82d59ca8 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -48,6 +48,7 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_pci.h>
+#include <rte_bus_pci.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_memory.h>
@@ -87,7 +88,7 @@ static int __vmxnet3_dev_link_update(struct rte_eth_dev *dev,
 static int vmxnet3_dev_link_update(struct rte_eth_dev *dev,
 				   int wait_to_complete);
 static void vmxnet3_hw_stats_save(struct vmxnet3_hw *hw);
-static void vmxnet3_dev_stats_get(struct rte_eth_dev *dev,
+static int vmxnet3_dev_stats_get(struct rte_eth_dev *dev,
 				  struct rte_eth_stats *stats);
 static int vmxnet3_dev_xstats_get_names(struct rte_eth_dev *dev,
 					struct rte_eth_xstat_name *xstats,
@@ -100,7 +101,7 @@ static const uint32_t *
 vmxnet3_dev_supported_ptypes_get(struct rte_eth_dev *dev);
 static int vmxnet3_dev_vlan_filter_set(struct rte_eth_dev *dev,
 				       uint16_t vid, int on);
-static void vmxnet3_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
+static int vmxnet3_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 static void vmxnet3_mac_addr_set(struct rte_eth_dev *dev,
 				 struct ether_addr *mac_addr);
 static void vmxnet3_interrupt_handler(void *param);
@@ -309,7 +310,6 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
 		return 0;
 
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	/* Vendor and Device ID need to be set before init of shared code */
 	hw->device_id = pci_dev->id.device_id;
@@ -484,7 +484,7 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev)
 	memset(mz->addr, 0, mz->len);
 
 	hw->shared = mz->addr;
-	hw->sharedPA = mz->phys_addr;
+	hw->sharedPA = mz->iova;
 
 	/*
 	 * Allocate a memzone for Vmxnet3_RxQueueDesc - Vmxnet3_TxQueueDesc
@@ -505,7 +505,7 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev)
 	hw->tqd_start = (Vmxnet3_TxQueueDesc *)mz->addr;
 	hw->rqd_start = (Vmxnet3_RxQueueDesc *)(hw->tqd_start + hw->num_tx_queues);
 
-	hw->queueDescPA = mz->phys_addr;
+	hw->queueDescPA = mz->iova;
 	hw->queue_desc_len = (uint16_t)size;
 
 	if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) {
@@ -521,7 +521,7 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev)
 		memset(mz->addr, 0, mz->len);
 
 		hw->rss_conf = mz->addr;
-		hw->rss_confPA = mz->phys_addr;
+		hw->rss_confPA = mz->iova;
 	}
 
 	return 0;
@@ -537,10 +537,10 @@ vmxnet3_write_mac(struct vmxnet3_hw *hw, const uint8_t *addr)
 		     addr[0], addr[1], addr[2],
 		     addr[3], addr[4], addr[5]);
 
-	val = *(const uint32_t *)addr;
+	memcpy(&val, addr, 4);
 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_MACL, val);
 
-	val = (addr[5] << 8) | addr[4];
+	memcpy(&val, addr + 4, 2);
 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_MACH, val);
 }
 
@@ -569,7 +569,7 @@ vmxnet3_dev_setup_memreg(struct rte_eth_dev *dev)
 		}
 		memset(mz->addr, 0, mz->len);
 		hw->memRegs = mz->addr;
-		hw->memRegsPA = mz->phys_addr;
+		hw->memRegsPA = mz->iova;
 	}
 
 	num = hw->num_rx_queues;
@@ -604,7 +604,7 @@ vmxnet3_dev_setup_memreg(struct rte_eth_dev *dev)
 		Vmxnet3_MemoryRegion *mr = &hw->memRegs->memRegs[j];
 
 		mr->startPA =
-			(uintptr_t)STAILQ_FIRST(&mp[i]->mem_list)->phys_addr;
+			(uintptr_t)STAILQ_FIRST(&mp[i]->mem_list)->iova;
 		mr->length = STAILQ_FIRST(&mp[i]->mem_list)->len <= INT32_MAX ?
 			STAILQ_FIRST(&mp[i]->mem_list)->len : INT32_MAX;
 		mr->txQueueBits = index[i];
@@ -730,8 +730,10 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
 		devRead->rssConfDesc.confPA  = hw->rss_confPA;
 	}
 
-	vmxnet3_dev_vlan_offload_set(dev,
-				     ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK);
+	ret = vmxnet3_dev_vlan_offload_set(dev,
+			ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK);
+	if (ret)
+		return ret;
 
 	vmxnet3_write_mac(hw, dev->data->mac_addrs->addr_bytes);
 
@@ -1034,7 +1036,7 @@ vmxnet3_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 	return count;
 }
 
-static void
+static int
 vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
 	unsigned int i;
@@ -1080,6 +1082,8 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 		stats->ierrors += rxStats.pktsRxError;
 		stats->rx_nombuf += rxStats.pktsRxOutOfBuf;
 	}
+
+	return 0;
 }
 
 static void
@@ -1144,6 +1148,8 @@ vmxnet3_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 {
 	struct vmxnet3_hw *hw = dev->data->dev_private;
 
+	ether_addr_copy(mac_addr, (struct ether_addr *)(hw->perm_addr));
+	ether_addr_copy(mac_addr, &dev->data->mac_addrs[0]);
 	vmxnet3_write_mac(hw, mac_addr->addr_bytes);
 }
 
@@ -1275,7 +1281,7 @@ vmxnet3_dev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vid, int on)
 	return 0;
 }
 
-static void
+static int
 vmxnet3_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 {
 	struct vmxnet3_hw *hw = dev->data->dev_private;
@@ -1301,6 +1307,8 @@ vmxnet3_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
 		VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
 				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
 	}
+
+	return 0;
 }
 
 static void
diff --git a/drivers/net/vmxnet3/vmxnet3_ring.h b/drivers/net/vmxnet3/vmxnet3_ring.h
index d2e8323b..a6fa93ac 100644
--- a/drivers/net/vmxnet3/vmxnet3_ring.h
+++ b/drivers/net/vmxnet3/vmxnet3_ring.h
@@ -144,7 +144,7 @@ typedef struct vmxnet3_tx_queue {
 	const struct rte_memzone     *mz;
 	bool                         stopped;
 	uint16_t                     queue_id;      /**< Device TX queue index. */
-	uint8_t                      port_id;       /**< Device port identifier. */
+	uint16_t                     port_id;       /**< Device port identifier. */
 	uint16_t		     txdata_desc_size;
 } vmxnet3_tx_queue_t;
 
@@ -179,7 +179,7 @@ typedef struct vmxnet3_rx_queue {
 	const struct rte_memzone    *mz;
 	bool                        stopped;
 	uint16_t                    queue_id;      /**< Device RX queue index. */
-	uint8_t                     port_id;       /**< Device port identifier. */
+	uint16_t                    port_id;       /**< Device port identifier. */
 } vmxnet3_rx_queue_t;
 
 #endif /* _VMXNET3_RING_H_ */
diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index d9cf4373..aa396ab2 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -203,6 +203,8 @@ vmxnet3_dev_tx_queue_release(void *txq)
 		vmxnet3_cmd_ring_release(&tq->cmd_ring);
 		/* Release the memzone */
 		rte_memzone_free(tq->mz);
+		/* Release the queue */
+		rte_free(tq);
 	}
 }
 
@@ -223,6 +225,9 @@ vmxnet3_dev_rx_queue_release(void *rxq)
 
 		/* Release the memzone */
 		rte_memzone_free(rq->mz);
+
+		/* Release the queue */
+		rte_free(rq);
 	}
 }
 
@@ -265,11 +270,9 @@ vmxnet3_dev_rx_queue_reset(void *rxq)
 	struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
 	int size;
 
-	if (rq != NULL) {
-		/* Release both the cmd_rings mbufs */
-		for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
-			vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
-	}
+	/* Release both the cmd_rings mbufs */
+	for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
+		vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
 
 	ring0 = &rq->cmd_ring[0];
 	ring1 = &rq->cmd_ring[1];
@@ -504,13 +507,14 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			 */
 			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
 			if (copy_size) {
-				uint64 offset = txq->cmd_ring.next2fill *
-						txq->txdata_desc_size;
+				uint64 offset =
+					(uint64)txq->cmd_ring.next2fill *
+							txq->txdata_desc_size;
 				gdesc->txd.addr =
 					rte_cpu_to_le_64(txq->data_ring.basePA +
 							 offset);
 			} else {
-				gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg);
+				gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
 			}
 
 			gdesc->dword[2] = dw2 | m_seg->data_len;
@@ -618,7 +622,7 @@ vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
 	 */
 	buf_info->m = mbuf;
 	buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
-	buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf);
+	buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
 
 	/* Load Rx Descriptor with the buffer's GPA */
 	rxd->addr = buf_info->bufPA;
@@ -848,7 +852,8 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 			/* Check for hardware stripped VLAN tag */
 			if (rcd->ts) {
-				start->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
+				start->ol_flags |= (PKT_RX_VLAN |
+						PKT_RX_VLAN_STRIPPED);
 				start->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
 			}
 
@@ -880,6 +885,23 @@ rcd_done:
 		}
 	}
 
+	if (unlikely(nb_rxd == 0)) {
+		uint32_t avail;
+		for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
+			avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
+			if (unlikely(avail > 0)) {
+				/* try to alloc new buf and renew descriptors */
+				vmxnet3_post_rx_bufs(rxq, ring_idx);
+			}
+		}
+		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
+			for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
+				VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
+						       rxq->cmd_ring[ring_idx].next2fill);
+			}
+		}
+	}
+
 	return nb_rx;
 }
 
@@ -962,7 +984,7 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
 	/* cmd_ring initialization */
 	ring->base = mz->addr;
-	ring->basePA = mz->phys_addr;
+	ring->basePA = mz->iova;
 
 	/* comp_ring initialization */
 	comp_ring->base = ring->base + ring->size;
@@ -1073,7 +1095,7 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
 	/* cmd_ring0 initialization */
 	ring0->base = mz->addr;
-	ring0->basePA = mz->phys_addr;
+	ring0->basePA = mz->iova;
 
 	/* cmd_ring1 initialization */
 	ring1->base = ring0->base + ring0->size;
diff --git a/drivers/net/xenvirt/rte_eth_xenvirt.c b/drivers/net/xenvirt/rte_eth_xenvirt.c
deleted file mode 100644
index e404b775..00000000
--- a/drivers/net/xenvirt/rte_eth_xenvirt.c
+++ /dev/null
@@ -1,766 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <errno.h>
-#include <sys/user.h>
-#ifndef PAGE_SIZE
-#define PAGE_SIZE sysconf(_SC_PAGE_SIZE)
-#endif
-#include <linux/binfmts.h>
-#include <xen/xen-compat.h>
-#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
-#include <xs.h>
-#else
-#include <xenstore.h>
-#endif
-#include <linux/virtio_ring.h>
-
-#include <rte_mbuf.h>
-#include <rte_ethdev.h>
-#include <rte_malloc.h>
-#include <rte_memcpy.h>
-#include <rte_string_fns.h>
-#include <rte_vdev.h>
-#include <cmdline_parse.h>
-#include <cmdline_parse_etheraddr.h>
-
-#include "rte_xen_lib.h"
-#include "virtqueue.h"
-#include "rte_eth_xenvirt.h"
-
-#define VQ_DESC_NUM 256
-#define VIRTIO_MBUF_BURST_SZ 64
-
-/* virtio_idx is increased after new device is created.*/
-static int virtio_idx = 0;
-
-static struct rte_eth_link pmd_link = {
-		.link_speed = ETH_SPEED_NUM_10G,
-		.link_duplex = ETH_LINK_FULL_DUPLEX,
-		.link_status = ETH_LINK_DOWN,
-		.link_autoneg = ETH_LINK_SPEED_FIXED
-};
-
-static void
-eth_xenvirt_free_queues(struct rte_eth_dev *dev);
-
-static uint16_t
-eth_xenvirt_rx(void *q, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
-{
-	struct virtqueue *rxvq = q;
-	struct rte_mbuf *rxm, *new_mbuf;
-	uint16_t nb_used, num;
-	uint32_t len[VIRTIO_MBUF_BURST_SZ];
-	uint32_t i;
-	struct pmd_internals *pi = rxvq->internals;
-
-	nb_used = VIRTQUEUE_NUSED(rxvq);
-
-	rte_smp_rmb();
-	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
-	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
-	if (unlikely(num == 0)) return 0;
-
-	num = virtqueue_dequeue_burst(rxvq, rx_pkts, len, num);
-	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num);
-	for (i = 0; i < num ; i ++) {
-		rxm = rx_pkts[i];
-		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]);
-		rxm->next = NULL;
-		rxm->data_off = RTE_PKTMBUF_HEADROOM;
-		rxm->data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr));
-		rxm->nb_segs = 1;
-		rxm->port = pi->port_id;
-		rxm->pkt_len  = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr));
-	}
-	/* allocate new mbuf for the used descriptor */
-	while (likely(!virtqueue_full(rxvq))) {
-		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
-		if (unlikely(new_mbuf == NULL)) {
-			break;
-		}
-		if (unlikely(virtqueue_enqueue_recv_refill(rxvq, new_mbuf))) {
-			rte_pktmbuf_free_seg(new_mbuf);
-			break;
-		}
-	}
-	pi->eth_stats.ipackets += num;
-	return num;
-}
-
-static uint16_t
-eth_xenvirt_tx(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
-{
-	struct virtqueue *txvq = tx_queue;
-	struct rte_mbuf *txm;
-	uint16_t nb_used, nb_tx, num, i;
-	int error;
-	uint32_t len[VIRTIO_MBUF_BURST_SZ];
-	struct rte_mbuf *snd_pkts[VIRTIO_MBUF_BURST_SZ];
-	struct pmd_internals *pi = txvq->internals;
-
-	nb_tx = 0;
-
-	if (unlikely(nb_pkts == 0))
-		return 0;
-
-	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
-	nb_used = VIRTQUEUE_NUSED(txvq);
-
-	rte_smp_rmb();
-
-	num = (uint16_t)(likely(nb_used <= VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
-	num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num);
-
-	for (i = 0; i < num ; i ++) {
-		/* mergable not supported, one segment only */
-		rte_pktmbuf_free_seg(snd_pkts[i]);
-	}
-
-	while (nb_tx < nb_pkts) {
-		if (likely(!virtqueue_full(txvq))) {
-		/* TODO drop tx_pkts if it contains multiple segments */
-			txm = tx_pkts[nb_tx];
-			error = virtqueue_enqueue_xmit(txvq, txm);
-			if (unlikely(error)) {
-				if (error == ENOSPC)
-					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0\n");
-				else if (error == EMSGSIZE)
-					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1\n");
-				else
-					PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d\n", error);
-				break;
-			}
-			nb_tx++;
-		} else {
-			PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n");
-			/* virtqueue_notify not needed in our para-virt solution */
-			break;
-		}
-	}
-	pi->eth_stats.opackets += nb_tx;
-	return nb_tx;
-}
-
-static int
-eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
-{
-	RTE_LOG(ERR, PMD, "%s\n", __func__);
-	return 0;
-}
-
-/*
- * Create a shared page between guest and host.
- * Host monitors this page if it is cleared on unmap, and then
- * do necessary clean up.
- */
-static void
-gntalloc_vring_flag(int vtidx)
-{
-	char key_str[PATH_MAX];
-	char val_str[PATH_MAX];
-	uint32_t gref_tmp;
-	void *ptr;
-
-	if (grefwatch_from_alloc(&gref_tmp, &ptr)) {
-		RTE_LOG(ERR, PMD, "grefwatch_from_alloc error\n");
-		exit(0);
-	}
-
-	*(uint8_t *)ptr = MAP_FLAG;
-	snprintf(val_str, sizeof(val_str), "%u", gref_tmp);
-	snprintf(key_str, sizeof(key_str),
-		DPDK_XENSTORE_PATH"%d"VRING_FLAG_STR, vtidx);
-	xenstore_write(key_str, val_str);
-}
-
-/*
- * Notify host this virtio device is started.
- * Host could start polling this device.
- */
-static void
-dev_start_notify(int vtidx)
-{
-	char key_str[PATH_MAX];
-	char val_str[PATH_MAX];
-
-	RTE_LOG(INFO, PMD, "%s: virtio %d is started\n", __func__, vtidx);
-	gntalloc_vring_flag(vtidx);
-
-	snprintf(key_str, sizeof(key_str), "%s%s%d",
-		DPDK_XENSTORE_PATH, EVENT_TYPE_START_STR,
-			vtidx);
-	snprintf(val_str, sizeof(val_str), "1");
-	xenstore_write(key_str, val_str);
-}
-
-/*
- * Notify host this virtio device is stopped.
- * Host could stop polling this device.
- */
-static void
-dev_stop_notify(int vtidx)
-{
-	RTE_SET_USED(vtidx);
-}
-
-
-static int
-update_mac_address(struct ether_addr *mac_addrs, int vtidx)
-{
-	char key_str[PATH_MAX];
-	char val_str[PATH_MAX];
-	int rv;
-
-	if (mac_addrs == NULL) {
-		RTE_LOG(ERR, PMD, "%s: NULL pointer mac specified\n", __func__);
-		return -1;
-	}
-	rv = snprintf(key_str, sizeof(key_str),
-			DPDK_XENSTORE_PATH"%d_ether_addr", vtidx);
-	if (rv == -1)
-		return rv;
-	rv = snprintf(val_str, sizeof(val_str), "%02x:%02x:%02x:%02x:%02x:%02x",
-			mac_addrs->addr_bytes[0],
-			mac_addrs->addr_bytes[1],
-			mac_addrs->addr_bytes[2],
-			mac_addrs->addr_bytes[3],
-			mac_addrs->addr_bytes[4],
-			mac_addrs->addr_bytes[5]);
-	if (rv == -1)
-		return rv;
-	if (xenstore_write(key_str, val_str))
-		return rv;
-	return 0;
-}
-
-
-static int
-eth_dev_start(struct rte_eth_dev *dev)
-{
-	struct virtqueue *rxvq = dev->data->rx_queues[0];
-	struct virtqueue *txvq = dev->data->tx_queues[0];
-	struct rte_mbuf *m;
-	struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private;
-	int rv;
-
-	dev->data->dev_link.link_status = ETH_LINK_UP;
-	while (!virtqueue_full(rxvq)) {
-		m = rte_mbuf_raw_alloc(rxvq->mpool);
-		if (m == NULL)
-			break;
-		/* Enqueue allocated buffers. */
-		if (virtqueue_enqueue_recv_refill(rxvq, m)) {
-			rte_pktmbuf_free_seg(m);
-			break;
-		}
-	}
-
-	rxvq->internals = pi;
-	txvq->internals = pi;
-
-	rv = update_mac_address(dev->data->mac_addrs, pi->virtio_idx);
-	if (rv)
-		return -1;
-	dev_start_notify(pi->virtio_idx);
-
-	return 0;
-}
-
-static void
-eth_dev_stop(struct rte_eth_dev *dev)
-{
-	struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private;
-
-	dev->data->dev_link.link_status = ETH_LINK_DOWN;
-	dev_stop_notify(pi->virtio_idx);
-}
-
-/*
- * Notify host this virtio device is closed.
- * Host could do necessary clean up to this device.
- */
-static void
-eth_dev_close(struct rte_eth_dev *dev)
-{
-	eth_xenvirt_free_queues(dev);
-}
-
-static void
-eth_dev_info(struct rte_eth_dev *dev,
-		struct rte_eth_dev_info *dev_info)
-{
-	struct pmd_internals *internals = dev->data->dev_private;
-
-	RTE_SET_USED(internals);
-	dev_info->max_mac_addrs = 1;
-	dev_info->max_rx_pktlen = (uint32_t)2048;
-	dev_info->max_rx_queues = (uint16_t)1;
-	dev_info->max_tx_queues = (uint16_t)1;
-	dev_info->min_rx_bufsize = 0;
-}
-
-static void
-eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
-{
-	struct pmd_internals *internals = dev->data->dev_private;
-	if(stats)
-		rte_memcpy(stats, &internals->eth_stats, sizeof(*stats));
-}
-
-static void
-eth_stats_reset(struct rte_eth_dev *dev)
-{
-	struct pmd_internals *internals = dev->data->dev_private;
-	/* Reset software totals */
-	memset(&internals->eth_stats, 0, sizeof(internals->eth_stats));
-}
-
-static void
-eth_queue_release(void *q)
-{
-	rte_free(q);
-}
-
-static int
-eth_link_update(struct rte_eth_dev *dev __rte_unused,
-		int wait_to_complete __rte_unused)
-{
-	return 0;
-}
-
-/*
- * Create shared vring between guest and host.
- * Memory is allocated through grant alloc driver, so it is not physical continuous.
- */
-static void *
-gntalloc_vring_create(int queue_type, uint32_t size, int vtidx)
-{
-	char key_str[PATH_MAX] = {0};
-	char val_str[PATH_MAX] = {0};
-	void *va = NULL;
-	int pg_size;
-	uint32_t pg_num;
-	uint32_t *gref_arr = NULL;
-	phys_addr_t *pa_arr = NULL;
-	uint64_t start_index;
-	int rv;
-
-	pg_size = getpagesize();
-	size    = RTE_ALIGN_CEIL(size, pg_size);
-	pg_num  = size / pg_size;
-
-	gref_arr = calloc(pg_num, sizeof(gref_arr[0]));
-	pa_arr  = calloc(pg_num, sizeof(pa_arr[0]));
-
-	if (gref_arr == NULL || pa_arr == NULL) {
-		RTE_LOG(ERR, PMD, "%s: calloc failed\n", __func__);
-		goto out;
-	}
-
-	va  = gntalloc(size, gref_arr, &start_index);
-	if (va == NULL) {
-		RTE_LOG(ERR, PMD, "%s: gntalloc failed\n", __func__);
-		goto out;
-	}
-
-	if (get_phys_map(va, pa_arr, pg_num, pg_size))
-		goto out;
-
-	/* write in xenstore gref and pfn for each page of vring */
-	if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) {
-		gntfree(va, size, start_index);
-		va = NULL;
-		goto out;
-	}
-
-	if (queue_type == VTNET_RQ)
-		rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"RXVRING_XENSTORE_STR, vtidx);
-	else
-		rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"TXVRING_XENSTORE_STR, vtidx);
-	if (rv == -1 || xenstore_write(key_str, val_str) == -1) {
-		gntfree(va, size, start_index);
-		va = NULL;
-	}
-out:
-	free(pa_arr);
-	free(gref_arr);
-
-	return va;
-}
-
-
-
-static struct virtqueue *
-virtio_queue_setup(struct rte_eth_dev *dev, int queue_type)
-{
-	struct virtqueue *vq = NULL;
-	uint16_t vq_size = VQ_DESC_NUM;
-	int i = 0;
-	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
-	size_t size;
-	struct vring *vr;
-
-	/* Allocate memory for virtqueue. */
-	if (queue_type == VTNET_RQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_rvq",
-				dev->data->port_id);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
-		if (vq == NULL) {
-			RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__);
-			return NULL;
-		}
-		memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
-	} else if(queue_type == VTNET_TQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_tvq",
-			dev->data->port_id);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
-		if (vq == NULL) {
-			RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__);
-			return NULL;
-		}
-		memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
-	}
-
-	memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
-
-	vq->vq_alignment = VIRTIO_PCI_VRING_ALIGN;
-	vq->vq_nentries = vq_size;
-	vq->vq_free_cnt = vq_size;
-	/* Calcuate vring size according to virtio spec */
-	size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
-	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
-	/* Allocate memory for virtio vring through gntalloc driver*/
-	vq->vq_ring_virt_mem = gntalloc_vring_create(queue_type, vq->vq_ring_size,
-		((struct pmd_internals *)dev->data->dev_private)->virtio_idx);
-	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
-	vr = &vq->vq_ring;
-	vring_init(vr, vq_size, vq->vq_ring_virt_mem, vq->vq_alignment);
-	/*
-	 * Locally maintained last consumed index, this idex trails
-	 * vq_ring.used->idx.
-	 */
-	vq->vq_used_cons_idx = 0;
-	vq->vq_desc_head_idx = 0;
-	vq->vq_free_cnt = vq->vq_nentries;
-	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
-
-	/* Chain all the descriptors in the ring with an END */
-	for (i = 0; i < vq_size - 1; i++)
-		vr->desc[i].next = (uint16_t)(i + 1);
-	vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
-
-	return vq;
-}
-
-static int
-eth_rx_queue_setup(struct rte_eth_dev *dev,uint16_t rx_queue_id,
-				uint16_t nb_rx_desc __rte_unused,
-				unsigned int socket_id __rte_unused,
-				const struct rte_eth_rxconf *rx_conf __rte_unused,
-				struct rte_mempool *mb_pool)
-{
-	struct virtqueue *vq;
-	vq = dev->data->rx_queues[rx_queue_id] = virtio_queue_setup(dev, VTNET_RQ);
-	vq->mpool = mb_pool;
-	return 0;
-}
-
-static int
-eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
-				uint16_t nb_tx_desc __rte_unused,
-				unsigned int socket_id __rte_unused,
-				const struct rte_eth_txconf *tx_conf __rte_unused)
-{
-	dev->data->tx_queues[tx_queue_id] = virtio_queue_setup(dev, VTNET_TQ);
-	return 0;
-}
-
-static void
-eth_xenvirt_free_queues(struct rte_eth_dev *dev)
-{
-	int i;
-
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		eth_queue_release(dev->data->rx_queues[i]);
-		dev->data->rx_queues[i] = NULL;
-	}
-	dev->data->nb_rx_queues = 0;
-
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		eth_queue_release(dev->data->tx_queues[i]);
-		dev->data->tx_queues[i] = NULL;
-	}
-	dev->data->nb_tx_queues = 0;
-}
-
-static const struct eth_dev_ops ops = {
-	.dev_start = eth_dev_start,
-	.dev_stop = eth_dev_stop,
-	.dev_close = eth_dev_close,
-	.dev_configure = eth_dev_configure,
-	.dev_infos_get = eth_dev_info,
-	.rx_queue_setup = eth_rx_queue_setup,
-	.tx_queue_setup = eth_tx_queue_setup,
-	.rx_queue_release = eth_queue_release,
-	.tx_queue_release = eth_queue_release,
-	.link_update = eth_link_update,
-	.stats_get = eth_stats_get,
-	.stats_reset = eth_stats_reset,
-};
-
-
-static int
-rte_eth_xenvirt_parse_args(struct xenvirt_dict *dict,
-			const char *name, const char *params)
-{
-	int i;
-	char *pairs[RTE_ETH_XENVIRT_MAX_ARGS];
-	int num_of_pairs;
-	char *pair[2];
-	char *args;
-	int ret = -1;
-
-	if (params == NULL)
-		return 0;
-
-	args = rte_zmalloc(NULL, strlen(params) + 1, RTE_CACHE_LINE_SIZE);
-	if (args == NULL) {
-		RTE_LOG(ERR, PMD, "Couldn't parse %s device \n", name);
-		return -1;
-	}
-	rte_memcpy(args, params, strlen(params));
-
-	num_of_pairs = rte_strsplit(args, strnlen(args, MAX_ARG_STRLEN),
-					pairs,
-					RTE_ETH_XENVIRT_MAX_ARGS ,
-					RTE_ETH_XENVIRT_PAIRS_DELIM);
-
-	for (i = 0; i < num_of_pairs; i++) {
-		pair[0] = NULL;
-		pair[1] = NULL;
-		rte_strsplit(pairs[i], strnlen(pairs[i], MAX_ARG_STRLEN),
-					pair, 2,
-					RTE_ETH_XENVIRT_KEY_VALUE_DELIM);
-
-		if (pair[0] == NULL || pair[1] == NULL || pair[0][0] == 0
-			|| pair[1][0] == 0) {
-			RTE_LOG(ERR, PMD,
-				"Couldn't parse %s device,"
-				"wrong key or value \n", name);
-			goto err;
-		}
-
-		if (!strncmp(pair[0], RTE_ETH_XENVIRT_MAC_PARAM,
-				sizeof(RTE_ETH_XENVIRT_MAC_PARAM))) {
-			if (cmdline_parse_etheraddr(NULL,
-						    pair[1],
-						    &dict->addr,
-						    sizeof(dict->addr)) < 0) {
-				RTE_LOG(ERR, PMD,
-					"Invalid %s device ether address\n",
-					name);
-				goto err;
-			}
-
-			dict->addr_valid = 1;
-		}
-	}
-
-	ret = 0;
-err:
-	rte_free(args);
-	return ret;
-}
-
-enum dev_action {
-	DEV_CREATE,
-	DEV_ATTACH
-};
-
-static struct rte_vdev_driver pmd_xenvirt_drv;
-
-static int
-eth_dev_xenvirt_create(const char *name, const char *params,
-		const unsigned numa_node,
-                enum dev_action action)
-{
-	struct rte_eth_dev_data *data = NULL;
-	struct pmd_internals *internals = NULL;
-	struct rte_eth_dev *eth_dev = NULL;
-	struct xenvirt_dict dict;
-
-	memset(&dict, 0, sizeof(struct xenvirt_dict));
-
-	RTE_LOG(INFO, PMD, "Creating virtio rings backed ethdev on numa socket %u\n",
-			numa_node);
-	RTE_SET_USED(action);
-
-	if (rte_eth_xenvirt_parse_args(&dict, name, params) < 0) {
-		RTE_LOG(ERR, PMD, "%s: Failed to parse ethdev parameters\n", __func__);
-		return -1;
-	}
-
-	/* now do all data allocation - for eth_dev structure, dummy pci driver
-	 * and internal (private) data
-	 */
-	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
-	if (data == NULL)
-		goto err;
-
-	internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
-	if (internals == NULL)
-		goto err;
-
-	/* reserve an ethdev entry */
-	eth_dev = rte_eth_dev_allocate(name);
-	if (eth_dev == NULL)
-		goto err;
-
-	data->dev_private = internals;
-	data->port_id = eth_dev->data->port_id;
-	data->nb_rx_queues = (uint16_t)1;
-	data->nb_tx_queues = (uint16_t)1;
-	data->dev_link = pmd_link;
-	data->mac_addrs = rte_zmalloc("xen_virtio", ETHER_ADDR_LEN, 0);
-
-	if(dict.addr_valid)
-		memcpy(&data->mac_addrs->addr_bytes, &dict.addr, sizeof(struct ether_addr));
-	else
-		eth_random_addr(&data->mac_addrs->addr_bytes[0]);
-
-	eth_dev->data = data;
-	eth_dev->dev_ops = &ops;
-
-	eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;
-	eth_dev->data->kdrv = RTE_KDRV_NONE;
-	eth_dev->data->numa_node = numa_node;
-
-	eth_dev->rx_pkt_burst = eth_xenvirt_rx;
-	eth_dev->tx_pkt_burst = eth_xenvirt_tx;
-
-	internals->virtio_idx = virtio_idx++;
-	internals->port_id = eth_dev->data->port_id;
-
-	return 0;
-
-err:
-	rte_free(data);
-	rte_free(internals);
-
-	return -1;
-}
-
-
-static int
-eth_dev_xenvirt_free(const char *name, const unsigned numa_node)
-{
-	struct rte_eth_dev *eth_dev = NULL;
-
-	RTE_LOG(DEBUG, PMD,
-		"Free virtio rings backed ethdev on numa socket %u\n",
-		numa_node);
-
-	/* find an ethdev entry */
-	eth_dev = rte_eth_dev_allocated(name);
-	if (eth_dev == NULL)
-		return -1;
-
-	if (eth_dev->data->dev_started == 1) {
-		eth_dev_stop(eth_dev);
-		eth_dev_close(eth_dev);
-	}
-
-	eth_dev->rx_pkt_burst = NULL;
-	eth_dev->tx_pkt_burst = NULL;
-	eth_dev->dev_ops = NULL;
-
-	rte_free(eth_dev->data);
-	rte_free(eth_dev->data->dev_private);
-	rte_free(eth_dev->data->mac_addrs);
-
-	virtio_idx--;
-
-	return 0;
-}
-
-/*TODO: Support multiple process model */
-static int
-rte_pmd_xenvirt_probe(struct rte_vdev_device *dev)
-{
-	if (virtio_idx == 0) {
-		if (xenstore_init() != 0) {
-			RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__);
-			return -1;
-		}
-		if (gntalloc_open() != 0) {
-			RTE_LOG(ERR, PMD, "%s: grant init failed\n", __func__);
-			return -1;
-		}
-	}
-	eth_dev_xenvirt_create(rte_vdev_device_name(dev),
-		rte_vdev_device_args(dev), rte_socket_id(), DEV_CREATE);
-	return 0;
-}
-
-static int
-rte_pmd_xenvirt_remove(struct rte_vdev_device *dev)
-{
-	eth_dev_xenvirt_free(rte_vdev_device_name(dev), rte_socket_id());
-
-	if (virtio_idx == 0) {
-		if (xenstore_uninit() != 0)
-			RTE_LOG(ERR, PMD, "%s: xenstore uninit failed\n", __func__);
-
-		gntalloc_close();
-	}
-	return 0;
-}
-
-static struct rte_vdev_driver pmd_xenvirt_drv = {
-	.probe = rte_pmd_xenvirt_probe,
-	.remove = rte_pmd_xenvirt_remove,
-};
-
-RTE_PMD_REGISTER_VDEV(net_xenvirt, pmd_xenvirt_drv);
-RTE_PMD_REGISTER_ALIAS(net_xenvirt, eth_xenvirt);
-RTE_PMD_REGISTER_PARAM_STRING(net_xenvirt,
-	"mac=<mac addr>");
diff --git a/drivers/net/xenvirt/rte_eth_xenvirt_version.map b/drivers/net/xenvirt/rte_eth_xenvirt_version.map
deleted file mode 100644
index dd636f72..00000000
--- a/drivers/net/xenvirt/rte_eth_xenvirt_version.map
+++ /dev/null
@@ -1,7 +0,0 @@
-DPDK_2.0 {
-	global:
-
-	rte_mempool_gntalloc_create;
-
-	local: *;
-};
diff --git a/drivers/net/xenvirt/rte_mempool_gntalloc.c b/drivers/net/xenvirt/rte_mempool_gntalloc.c
deleted file mode 100644
index 73e82f80..00000000
--- a/drivers/net/xenvirt/rte_mempool_gntalloc.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <string.h>
-#include <xen/sys/gntalloc.h>
-
-#include <rte_common.h>
-#include <rte_mempool.h>
-#include <rte_memory.h>
-#include <rte_errno.h>
-
-#include "rte_xen_lib.h"
-#include "rte_eth_xenvirt.h"
-
-struct _gntarr {
-	uint32_t gref;
-	phys_addr_t pa;
-	uint64_t index;
-	void *va;
-};
-
-struct _mempool_gntalloc_info {
-	struct rte_mempool *mp;
-	uint32_t pg_num;
-	uint32_t *gref_arr;
-	phys_addr_t *pa_arr;
-	void *va;
-	uint32_t mempool_idx;
-	uint64_t start_index;
-};
-
-
-static rte_atomic32_t global_xenvirt_mempool_idx = RTE_ATOMIC32_INIT(-1);
-
-static int
-compare(const void *p1, const void *p2)
-{
-	return ((const struct _gntarr *)p1)->pa  - ((const struct _gntarr *)p2)->pa;
-}
-
-
-static struct _mempool_gntalloc_info
-_create_mempool(const char *name, unsigned elt_num, unsigned elt_size,
-		   unsigned cache_size, unsigned private_data_size,
-		   rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		   rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
-		   int socket_id, unsigned flags)
-{
-	struct _mempool_gntalloc_info mgi;
-	struct rte_mempool *mp = NULL;
-	struct rte_mempool_objsz  objsz;
-	uint32_t pg_num, rpg_num, pg_shift, pg_sz;
-	char *va, *orig_va, *uv; /* uv: from which, the pages could be freed */
-	ssize_t sz, usz; /* usz: unused size */
-	/*
-	 * for each page allocated through xen_gntalloc driver,
-	 * gref_arr:stores grant references,
-	 * pa_arr: stores physical address,
-	 * gnt_arr: stores all meta dat
-	 */
-	uint32_t *gref_arr = NULL;
-	phys_addr_t *pa_arr = NULL;
-	struct _gntarr *gnt_arr = NULL;
-	/* start index of the grant referances, used for dealloc*/
-	uint64_t start_index;
-	uint32_t i, j;
-	int rv = 0;
-	struct ioctl_gntalloc_dealloc_gref arg;
-
-	mgi.mp = NULL;
-	va = orig_va = uv = NULL;
-	pg_num = rpg_num = 0;
-	sz = 0;
-
-	pg_sz = getpagesize();
-	if (rte_is_power_of_2(pg_sz) == 0) {
-		goto out;
-	}
-	pg_shift = rte_bsf32(pg_sz);
-
-	rte_mempool_calc_obj_size(elt_size, flags, &objsz);
-	sz = rte_mempool_xmem_size(elt_num, objsz.total_size, pg_shift);
-	pg_num = sz >> pg_shift;
-
-	pa_arr = calloc(pg_num, sizeof(pa_arr[0]));
-	gref_arr = calloc(pg_num, sizeof(gref_arr[0]));
-	gnt_arr  = calloc(pg_num, sizeof(gnt_arr[0]));
-	if ((gnt_arr == NULL) || (gref_arr == NULL) || (pa_arr == NULL))
-		goto out;
-
-	/* grant index is continuous in ascending order */
-	orig_va = gntalloc(sz, gref_arr, &start_index);
-	if (orig_va == NULL)
-		goto out;
-
-	get_phys_map(orig_va, pa_arr, pg_num, pg_sz);
-	for (i = 0; i < pg_num; i++) {
-		gnt_arr[i].index = start_index + i * pg_sz;
-		gnt_arr[i].gref = gref_arr[i];
-		gnt_arr[i].pa = pa_arr[i];
-		gnt_arr[i].va  = RTE_PTR_ADD(orig_va, i * pg_sz);
-	}
-	qsort(gnt_arr, pg_num, sizeof(struct _gntarr), compare);
-
-	va = get_xen_virtual(sz, pg_sz);
-	if (va == NULL) {
-		goto out;
-	}
-
-	/*
-	 * map one by one, as index isn't continuous now.
-	 * pg_num VMAs, doesn't linux has a limitation on this?
-	 */
-	for (i = 0; i < pg_num; i++) {
-	/* update gref_arr and pa_arr after sort */
-		gref_arr[i] = gnt_arr[i].gref;
-		pa_arr[i]   = gnt_arr[i].pa;
-		gnt_arr[i].va = mmap(va + i * pg_sz, pg_sz, PROT_READ | PROT_WRITE,
-			MAP_SHARED | MAP_FIXED, gntalloc_fd, gnt_arr[i].index);
-		if ((gnt_arr[i].va == MAP_FAILED) || (gnt_arr[i].va != (va + i * pg_sz))) {
-			RTE_LOG(ERR, PMD, "failed to map %d pages\n", i);
-			goto mmap_failed;
-		}
-	}
-
-	/*
-	 * Check that allocated size is big enough to hold elt_num
-	 * objects and a calcualte how many bytes are actually required.
-	 */
-	usz = rte_mempool_xmem_usage(va, elt_num, objsz.total_size, pa_arr, pg_num, pg_shift);
-	if (usz < 0) {
-		mp = NULL;
-		i = pg_num;
-		goto mmap_failed;
-	} else {
-		/* unmap unused pages if any */
-		uv = RTE_PTR_ADD(va, usz);
-		if ((usz = va + sz - uv) > 0) {
-
-			RTE_LOG(ERR, PMD,
-				"%s(%s): unmap unused %zu of %zu "
-				"mmaped bytes @%p orig:%p\n",
-				__func__, name, usz, sz, uv, va);
-			munmap(uv, usz);
-			i = (sz - usz) / pg_sz;
-			for (; i < pg_num; i++) {
-				arg.count = 1;
-				arg.index = gnt_arr[i].index;
-				rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg);
-				if (rv) {
-					/* shouldn't fail here */
-					RTE_LOG(ERR, PMD, "va=%p pa=%"PRIu64"x index=%"PRIu64" %s\n",
-						gnt_arr[i].va,
-						gnt_arr[i].pa,
-						arg.index, strerror(errno));
-					rte_panic("gntdealloc failed when freeing pages\n");
-				}
-			}
-
-			rpg_num = (sz - usz) >> pg_shift;
-		} else
-			rpg_num = pg_num;
-
-		mp = rte_mempool_xmem_create(name, elt_num, elt_size,
-				cache_size, private_data_size,
-				mp_init, mp_init_arg,
-				obj_init, obj_init_arg,
-				socket_id, flags, va, pa_arr, rpg_num, pg_shift);
-
-		RTE_ASSERT(elt_num == mp->size);
-	}
-	mgi.mp = mp;
-	mgi.pg_num = rpg_num;
-	mgi.gref_arr = gref_arr;
-	mgi.pa_arr = pa_arr;
-	if (mp)
-		mgi.mempool_idx = rte_atomic32_add_return(&global_xenvirt_mempool_idx, 1);
-	mgi.start_index = start_index;
-	mgi.va = va;
-
-	if (mp == NULL) {
-		i = pg_num;
-		goto mmap_failed;
-	}
-
-/*
- * unmap only, without deallocate grant reference.
- * unused pages have already been unmaped,
- * unmap twice will fail, but it is safe.
- */
-mmap_failed:
-	for (j = 0; j < i; j++) {
-		if (gnt_arr[i].va)
-			munmap(gnt_arr[i].va, pg_sz);
-	}
-out:
-	free(gnt_arr);
-	if (orig_va)
-		munmap(orig_va, sz);
-	if (mp == NULL) {
-		free(gref_arr);
-		free(pa_arr);
-
-		/* some gref has already been de-allocated from the list in the driver,
-		 * so dealloc one by one, and it is safe to deallocate twice
-		 */
-		if (orig_va) {
-			for (i = 0; i < pg_num; i++) {
-				arg.index = start_index + i * pg_sz;
-				rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg);
-			}
-		}
-	}
-	return mgi;
-}
-
-struct rte_mempool *
-rte_mempool_gntalloc_create(const char *name, unsigned elt_num, unsigned elt_size,
-		   unsigned cache_size, unsigned private_data_size,
-		   rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		   rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
-		   int socket_id, unsigned flags)
-{
-	int rv;
-	uint32_t i;
-	struct _mempool_gntalloc_info mgi;
-	struct ioctl_gntalloc_dealloc_gref arg;
-	int pg_sz = getpagesize();
-
-	mgi = _create_mempool(name, elt_num, elt_size,
-			cache_size, private_data_size,
-			mp_init, mp_init_arg,
-			obj_init, obj_init_arg,
-			socket_id, flags);
-	if (mgi.mp) {
-		rv = grant_gntalloc_mbuf_pool(mgi.mp,
-			mgi.pg_num,
-			mgi.gref_arr,
-			mgi.pa_arr,
-			mgi.mempool_idx);
-		free(mgi.gref_arr);
-		free(mgi.pa_arr);
-		if (rv == 0)
-			return mgi.mp;
-		/*
-		 * in _create_mempool, unused pages have already been unmapped, deallocagted
-		 * unmap and dealloc the remained ones here.
-		 */
-		munmap(mgi.va, pg_sz * mgi.pg_num);
-		for (i = 0; i < mgi.pg_num; i++) {
-			arg.index = mgi.start_index + i * pg_sz;
-			rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg);
-		}
-		return NULL;
-	}
-	return NULL;
-
-
-
-}
diff --git a/drivers/net/xenvirt/rte_xen_lib.c b/drivers/net/xenvirt/rte_xen_lib.c
deleted file mode 100644
index 6c9a1d49..00000000
--- a/drivers/net/xenvirt/rte_xen_lib.c
+++ /dev/null
@@ -1,454 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <xen/xen-compat.h>
-#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
-#include <xs.h>
-#else
-#include <xenstore.h>
-#endif
-#include <xen/sys/gntalloc.h>
-
-#include <rte_common.h>
-#include <rte_string_fns.h>
-#include <rte_malloc.h>
-
-#include "rte_xen_lib.h"
-
-/*
- * The grant node format in xenstore for vring/mpool is:
- * 0_rx_vring_gref = "gref1#, gref2#, gref3#"
- * 0_mempool_gref  = "gref1#, gref2#, gref3#"
- * each gref# is a grant reference for a shared page.
- * In each shared page, we store the grant_node_item items.
- */
-struct grant_node_item {
-	uint32_t gref;
-	uint32_t pfn;
-} __attribute__((packed));
-
-/* fd for xen_gntalloc driver, used to allocate grant pages*/
-int gntalloc_fd = -1;
-
-/* xenstore path for local domain, now it is '/local/domain/domid/' */
-static char *dompath = NULL;
-/* handle to xenstore read/write operations */
-static struct xs_handle *xs = NULL;
-/* flag to indicate if xenstore cleanup is required */
-static bool is_xenstore_cleaned_up;
-
-/*
- * Reserve a virtual address space.
- * On success, returns the pointer. On failure, returns NULL.
- */
-void *
-get_xen_virtual(size_t size, size_t page_sz)
-{
-	void *addr;
-	uintptr_t aligned_addr;
-
-	addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
-	if (addr == MAP_FAILED) {
-		RTE_LOG(ERR, PMD, "failed get a virtual area\n");
-		return NULL;
-	}
-
-	aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz);
-	addr = (void *)(aligned_addr);
-
-	return addr;
-}
-
-/*
- * Get the physical address for virtual memory starting at va.
- */
-int
-get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz)
-{
-	int32_t fd, rc = 0;
-	uint32_t i, nb;
-	off_t ofs;
-
-	ofs = (uintptr_t)va / pg_sz * sizeof(*pa);
-	nb = pg_num * sizeof(*pa);
-
-	if ((fd = open(PAGEMAP_FNAME, O_RDONLY)) < 0 ||
-			(rc = pread(fd, pa, nb, ofs)) < 0 ||
-			(rc -= nb) != 0) {
-		RTE_LOG(ERR, PMD, "%s: failed read of %u bytes from \'%s\' "
-			"at offset %lu, error code: %d\n",
-			__func__, nb, PAGEMAP_FNAME, (unsigned long)ofs, errno);
-		rc = ENOENT;
-	}
-
-	close(fd);
-	for (i = 0; i != pg_num; i++)
-		pa[i] = (pa[i] & PAGEMAP_PFN_MASK) * pg_sz;
-
-	return rc;
-}
-
-int
-gntalloc_open(void)
-{
-	gntalloc_fd = open(XEN_GNTALLOC_FNAME, O_RDWR);
-	return (gntalloc_fd != -1) ? 0 : -1;
-}
-
-void
-gntalloc_close(void)
-{
-	if (gntalloc_fd != -1)
-		close(gntalloc_fd);
-	gntalloc_fd = -1;
-}
-
-void *
-gntalloc(size_t size, uint32_t *gref, uint64_t *start_index)
-{
-	int page_size = getpagesize();
-	uint32_t i, pg_num;
-	void *va;
-	int rv;
-	struct ioctl_gntalloc_alloc_gref *arg;
-	struct ioctl_gntalloc_dealloc_gref arg_d;
-
-	if (size % page_size) {
-		RTE_LOG(ERR, PMD, "%s: %zu isn't multiple of page size\n",
-			__func__, size);
-		return NULL;
-	}
-
-	pg_num = size / page_size;
-	arg = malloc(sizeof(*arg) + (pg_num - 1) * sizeof(uint32_t));
-	if (arg == NULL)
-		return NULL;
-	arg->domid = DOM0_DOMID;
-	arg->flags = GNTALLOC_FLAG_WRITABLE;
-	arg->count = pg_num;
-
-	rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_ALLOC_GREF, arg);
-	if (rv) {
-		RTE_LOG(ERR, PMD, "%s: ioctl error\n", __func__);
-		free(arg);
-		return NULL;
-	}
-
-	va = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gntalloc_fd, arg->index);
-	if (va == MAP_FAILED) {
-		RTE_LOG(ERR, PMD, "%s: mmap failed\n", __func__);
-		arg_d.count = pg_num;
-		arg_d.index = arg->index;
-		ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg_d);
-		free(arg);
-		return NULL;
-	}
-
-	if (gref) {
-		for (i = 0; i < pg_num; i++) {
-			gref[i] = arg->gref_ids[i];
-		}
-	}
-	if (start_index)
-		*start_index = arg->index;
-
-	free(arg);
-
-	return va;
-}
-
-int
-grefwatch_from_alloc(uint32_t *gref, void **pptr)
-{
-	int rv;
-	void *ptr;
-	int pg_size = getpagesize();
-	struct ioctl_gntalloc_alloc_gref arg = {
-		.domid = DOM0_DOMID,
-		.flags = GNTALLOC_FLAG_WRITABLE,
-		.count = 1
-	};
-	struct ioctl_gntalloc_dealloc_gref arg_d;
-	struct ioctl_gntalloc_unmap_notify notify = {
-		.action = UNMAP_NOTIFY_CLEAR_BYTE
-	};
-
-	rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_ALLOC_GREF, &arg);
-	if (rv) {
-		RTE_LOG(ERR, PMD, "%s: ioctl error\n", __func__);
-		return -1;
-	}
-
-	ptr = (void *)mmap(NULL, pg_size, PROT_READ|PROT_WRITE, MAP_SHARED, gntalloc_fd, arg.index);
-	arg_d.index = arg.index;
-	arg_d.count = 1;
-	if (ptr == MAP_FAILED) {
-		RTE_LOG(ERR, PMD, "%s: mmap failed\n", __func__);
-		ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d);
-		return -1;
-	}
-	if (pptr)
-		*pptr = ptr;
-	if (gref)
-		*gref = arg.gref_ids[0];
-
-	notify.index = arg.index;
-	rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_SET_UNMAP_NOTIFY, &notify);
-	if (rv) {
-		RTE_LOG(ERR, PMD, "%s: unmap notify failed\n", __func__);
-		munmap(ptr, pg_size);
-		ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d);
-		return -1;
-	}
-
-	return 0;
-}
-
-void
-gntfree(void *va, size_t sz, uint64_t start_index)
-{
-	struct ioctl_gntalloc_dealloc_gref arg_d;
-
-	if (va && sz) {
-		munmap(va, sz);
-		arg_d.count = sz / getpagesize();
-		arg_d.index = start_index;
-		ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg_d);
-	}
-}
-
-static int
-xenstore_cleanup(void)
-{
-	char store_path[PATH_MAX] = {0};
-
-	if (snprintf(store_path, sizeof(store_path),
-		"%s%s", dompath, DPDK_XENSTORE_NODE) == -1)
-		return -1;
-
-	if (xs_rm(xs, XBT_NULL, store_path) == false) {
-		RTE_LOG(ERR, PMD, "%s: failed cleanup node\n", __func__);
-		return -1;
-	}
-
-	return 0;
-}
-
-int
-xenstore_init(void)
-{
-	unsigned int len, domid;
-	char *buf;
-	char *end;
-
-	xs = xs_domain_open();
-	if (xs == NULL) {
-		RTE_LOG(ERR, PMD,"%s: xs_domain_open failed\n", __func__);
-		return -1;
-	}
-	buf = xs_read(xs, XBT_NULL, "domid", &len);
-	if (buf == NULL) {
-		RTE_LOG(ERR, PMD, "%s: failed read domid\n", __func__);
-		return -1;
-	}
-	errno = 0;
-	domid = strtoul(buf, &end, 0);
-	if (errno != 0 || end == NULL || end == buf ||  domid == 0)
-		return -1;
-
-	RTE_LOG(INFO, PMD, "retrieved dom ID = %d\n", domid);
-
-	dompath = xs_get_domain_path(xs, domid);
-	if (dompath == NULL)
-		return -1;
-
-	xs_transaction_start(xs); /* When to stop transaction */
-
-	if (is_xenstore_cleaned_up == 0) {
-		if (xenstore_cleanup())
-			return -1;
-		is_xenstore_cleaned_up = 1;
-	}
-
-	return 0;
-}
-
-int
-xenstore_uninit(void)
-{
-	xs_close(xs);
-
-	if (is_xenstore_cleaned_up == 0) {
-		if (xenstore_cleanup())
-			return -1;
-		is_xenstore_cleaned_up = 1;
-	}
-	free(dompath);
-	dompath = NULL;
-
-	return 0;
-}
-
-int
-xenstore_write(const char *key_str, const char *val_str)
-{
-	char grant_path[PATH_MAX];
-	int rv, len;
-
-	if (xs == NULL) {
-		RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__);
-		return -1;
-	}
-	rv = snprintf(grant_path, sizeof(grant_path), "%s%s", dompath, key_str);
-	if (rv == -1) {
-		RTE_LOG(ERR, PMD, "%s: snprintf %s %s failed\n",
-			__func__, dompath, key_str);
-		return -1;
-	}
-	len = strnlen(val_str, PATH_MAX);
-
-	if (xs_write(xs, XBT_NULL, grant_path, val_str, len) == false) {
-		RTE_LOG(ERR, PMD, "%s: xs_write failed\n", __func__);
-		return -1;
-	}
-
-	return 0;
-}
-
-int
-grant_node_create(uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, char *val_str, size_t str_size)
-{
-	uint64_t start_index;
-	int pg_size;
-	uint32_t pg_shift;
-	void *ptr = NULL;
-	uint32_t count, entries_per_pg;
-	uint32_t i, j = 0, k = 0;
-	uint32_t *gref_tmp;
-	int first = 1;
-	char tmp_str[PATH_MAX] = {0};
-	int rv = -1;
-
-	pg_size = getpagesize();
-	if (rte_is_power_of_2(pg_size) == 0) {
-		return -1;
-	}
-	pg_shift = rte_bsf32(pg_size);
-	if (pg_size % sizeof(struct grant_node_item)) {
-		RTE_LOG(ERR, PMD, "pg_size isn't a multiple of grant node item\n");
-		return -1;
-	}
-
-	entries_per_pg = pg_size / sizeof(struct grant_node_item);
-	count  = (pg_num +  entries_per_pg - 1 ) / entries_per_pg;
-	gref_tmp = malloc(count * sizeof(uint32_t));
-	if (gref_tmp == NULL)
-		return -1;
-	ptr = gntalloc(pg_size * count, gref_tmp, &start_index);
-	if (ptr == NULL) {
-		RTE_LOG(ERR, PMD, "%s: gntalloc error of %d pages\n", __func__, count);
-		free(gref_tmp);
-		return -1;
-	}
-
-	while (j < pg_num) {
-		if (first) {
-			rv = snprintf(val_str, str_size, "%u", gref_tmp[k]);
-			first = 0;
-		} else {
-			snprintf(tmp_str, PATH_MAX, "%s", val_str);
-			rv = snprintf(val_str, str_size, "%s,%u", tmp_str, gref_tmp[k]);
-		}
-		k++;
-		if (rv == -1)
-			break;
-
-		for (i = 0; i < entries_per_pg && j < pg_num ; i++) {
-			((struct grant_node_item *)ptr)->gref = gref_arr[j];
-			((struct grant_node_item *)ptr)->pfn =  pa_arr[j] >> pg_shift;
-			ptr = RTE_PTR_ADD(ptr, sizeof(struct grant_node_item));
-			j++;
-		}
-	}
-	if (rv == -1) {
-		gntfree(ptr, pg_size * count, start_index);
-	} else
-		rv = 0;
-	free(gref_tmp);
-	return rv;
-}
-
-
-int
-grant_gntalloc_mbuf_pool(struct rte_mempool *mpool, uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, int mempool_idx)
-{
-	char key_str[PATH_MAX] = {0};
-	char val_str[PATH_MAX] = {0};
-	void *mempool_obj_va;
-
-	if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) {
-		return -1;
-	}
-
-	if (snprintf(key_str, sizeof(key_str),
-		DPDK_XENSTORE_PATH"%d"MEMPOOL_XENSTORE_STR, mempool_idx) == -1)
-		return -1;
-	if (xenstore_write(key_str, val_str) == -1)
-		return -1;
-
-	if (snprintf(key_str, sizeof(key_str),
-		DPDK_XENSTORE_PATH"%d"MEMPOOL_VA_XENSTORE_STR, mempool_idx) == -1)
-		return -1;
-	if (mpool->nb_mem_chunks != 1) {
-		RTE_LOG(ERR, PMD,
-			"mempool with more than 1 chunk is not supported\n");
-		return -1;
-	}
-	mempool_obj_va = STAILQ_FIRST(&mpool->mem_list)->addr;
-	if (snprintf(val_str, sizeof(val_str), "%"PRIxPTR,
-			(uintptr_t)mempool_obj_va) == -1)
-		return -1;
-	if (xenstore_write(key_str, val_str) == -1)
-		return -1;
-
-	return 0;
-}
diff --git a/drivers/net/xenvirt/rte_xen_lib.h b/drivers/net/xenvirt/rte_xen_lib.h
deleted file mode 100644
index d973eacb..00000000
--- a/drivers/net/xenvirt/rte_xen_lib.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RTE_XEN_DUMMY_PMD_H
-#define _RTE_XEN_DUMMY_PMD_H
-
-#include <stdint.h>
-
-#include <rte_common.h>
-#include <rte_mempool.h>
-#include <rte_ether.h>
-
-#define	PAGEMAP_FNAME           "/proc/self/pagemap"
-#define XEN_GNTALLOC_FNAME      "/dev/xen/gntalloc"
-#define DPDK_XENSTORE_PATH      "/control/dpdk/"
-#define DPDK_XENSTORE_NODE      "/control/dpdk"
-/*format 0_mempool_gref = "1537,1524,1533" */
-#define MEMPOOL_XENSTORE_STR    "_mempool_gref"
-/*format 0_mempool_va = 0x80340000 */
-#define MEMPOOL_VA_XENSTORE_STR "_mempool_va"
-/*format 0_rx_vring_gref  = "1537,1524,1533" */
-#define RXVRING_XENSTORE_STR    "_rx_vring_gref"
-/*format 0_tx_vring_gref  = "1537,1524,1533" */
-#define TXVRING_XENSTORE_STR    "_tx_vring_gref"
-#define VRING_FLAG_STR          "_vring_flag"
-/*format: event_type_start_0 = 1*/
-#define EVENT_TYPE_START_STR    "event_type_start_"
-
-#define DOM0_DOMID 0
-/*
- * the pfn (page frame number) are bits 0-54 (see pagemap.txt in linux
- * Documentation).
- */
-#define PAGEMAP_PFN_BITS	54
-#define PAGEMAP_PFN_MASK	RTE_LEN2MASK(PAGEMAP_PFN_BITS, phys_addr_t)
-
-#define MAP_FLAG	0xA5
-
-#define RTE_ETH_XENVIRT_PAIRS_DELIM ';'
-#define RTE_ETH_XENVIRT_KEY_VALUE_DELIM '='
-#define RTE_ETH_XENVIRT_MAX_ARGS 1
-#define RTE_ETH_XENVIRT_MAC_PARAM "mac"
-struct xenvirt_dict {
-	uint8_t addr_valid;
-	struct ether_addr addr;
-};
-
-extern int gntalloc_fd;
-
-int
-gntalloc_open(void);
-
-void
-gntalloc_close(void);
-
-void *
-gntalloc(size_t sz, uint32_t *gref, uint64_t *start_index);
-
-void
-gntfree(void *va, size_t sz, uint64_t start_index);
-
-int
-xenstore_init(void);
-
-int
-xenstore_uninit(void);
-
-int
-xenstore_write(const char *key_str, const char *val_str);
-
-int
-get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz);
-
-void *
-get_xen_virtual(size_t size, size_t page_sz);
-
-int
-grefwatch_from_alloc(uint32_t *gref, void **pptr);
-
-
-int grant_node_create(uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, char *val_str, size_t str_size);
-
-int
-grant_gntalloc_mbuf_pool(struct rte_mempool *mpool, uint32_t pg_num, uint32_t *gref_arr, phys_addr_t *pa_arr, int mempool_idx);
-
-#endif
diff --git a/drivers/net/xenvirt/virtqueue.h b/drivers/net/xenvirt/virtqueue.h
deleted file mode 100644
index 1bb6877c..00000000
--- a/drivers/net/xenvirt/virtqueue.h
+++ /dev/null
@@ -1,273 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VIRTQUEUE_H_
-#define _VIRTQUEUE_H_
-
-#include <stdint.h>
-#include <linux/virtio_ring.h>
-#include <linux/virtio_net.h>
-
-#include <rte_atomic.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_mempool.h>
-
-#include "virtio_logs.h"
-
-struct rte_mbuf;
-
-/* The alignment to use between consumer and producer parts of vring. */
-#define VIRTIO_PCI_VRING_ALIGN 4096
-
-enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
-
-/**
- * The maximum virtqueue size is 2^15. Use that value as the end of
- * descriptor chain terminator since it will never be a valid index
- * in the descriptor table. This is used to verify we are correctly
- * handling vq_free_cnt.
- */
-#define VQ_RING_DESC_CHAIN_END 32768
-
-#define VIRTQUEUE_MAX_NAME_SZ  32
-
-struct pmd_internals {
-	struct rte_eth_stats eth_stats;
-	int port_id;
-	int virtio_idx;
-};
-
-
-struct virtqueue {
-	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
-	struct rte_mempool       *mpool;  /**< mempool for mbuf allocation */
-	uint16_t    queue_id;             /**< DPDK queue index. */
-	uint16_t    vq_queue_index;       /**< PCI queue index */
-	uint8_t     port_id;              /**< Device port identifier. */
-
-	void        *vq_ring_virt_mem;    /**< virtual address of vring*/
-	int         vq_alignment;
-	int         vq_ring_size;
-
-	struct vring vq_ring;    /**< vring keeping desc, used and avail */
-	struct pmd_internals *internals;  /**< virtio device internal info. */
-	uint16_t    vq_nentries; /**< vring desc numbers */
-	uint16_t    vq_desc_head_idx;
-	uint16_t    vq_free_cnt; /**< num of desc available */
-	uint16_t vq_used_cons_idx; /**< Last consumed desc in used table, trails vq_ring.used->idx*/
-
-	struct vq_desc_extra {
-		void              *cookie;
-		uint16_t          ndescs;
-	} vq_descx[0] __rte_cache_aligned;
-};
-
-
-#ifdef  RTE_LIBRTE_XENVIRT_DEBUG_DUMP
-#define VIRTQUEUE_DUMP(vq) do { \
-	uint16_t used_idx, nused; \
-	used_idx = (vq)->vq_ring.used->idx; \
-	nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
-	PMD_INIT_LOG(DEBUG, \
-	  "VQ: %s - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
-	  " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
-	  " avail.flags=0x%x; used.flags=0x%x\n", \
-	  (vq)->vq_name, (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \
-	  (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \
-	  (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \
-	  (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \
-} while (0)
-#else
-#define VIRTQUEUE_DUMP(vq) do { } while (0)
-#endif
-
-
-/**
- *  Dump virtqueue internal structures, for debug purpose only.
- */
-void virtqueue_dump(struct virtqueue *vq);
-
-/**
- *  Get all mbufs to be freed.
- */
-struct rte_mbuf * virtqueue_detatch_unused(struct virtqueue *vq);
-
-static __rte_always_inline int
-virtqueue_full(const struct virtqueue *vq)
-{
-	return vq->vq_free_cnt == 0;
-}
-
-#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx))
-
-static __rte_always_inline void
-vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx)
-{
-	uint16_t avail_idx;
-	/*
-	 * Place the head of the descriptor chain into the next slot and make
-	 * it usable to the host. The chain is made available now rather than
-	 * deferring to virtqueue_notify() in the hopes that if the host is
-	 * currently running on another CPU, we can keep it processing the new
-	 * descriptor.
-	 */
-	avail_idx = (uint16_t)(vq->vq_ring.avail->idx & (vq->vq_nentries - 1));
-	vq->vq_ring.avail->ring[avail_idx] = desc_idx;
-	rte_smp_wmb();
-	vq->vq_ring.avail->idx++;
-}
-
-static __rte_always_inline void
-vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
-{
-	struct vring_desc *dp;
-	struct vq_desc_extra *dxp;
-
-	dp  = &vq->vq_ring.desc[desc_idx];
-	dxp = &vq->vq_descx[desc_idx];
-	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
-	while (dp->flags & VRING_DESC_F_NEXT) {
-		dp = &vq->vq_ring.desc[dp->next];
-	}
-	dxp->ndescs = 0;
-
-	/*
-	 * We must append the existing free chain, if any, to the end of
-	 * newly freed chain. If the virtqueue was completely used, then
-	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
-	 */
-	dp->next = vq->vq_desc_head_idx;
-	vq->vq_desc_head_idx = desc_idx;
-}
-
-static __rte_always_inline int
-virtqueue_enqueue_recv_refill(struct virtqueue *rxvq, struct rte_mbuf *cookie)
-{
-	const uint16_t needed = 1;
-	const uint16_t head_idx = rxvq->vq_desc_head_idx;
-	struct vring_desc *start_dp = rxvq->vq_ring.desc;
-	struct vq_desc_extra *dxp;
-
-	if (unlikely(rxvq->vq_free_cnt == 0))
-		return -ENOSPC;
-	if (unlikely(rxvq->vq_free_cnt < needed))
-		return -EMSGSIZE;
-	if (unlikely(head_idx >= rxvq->vq_nentries))
-		return -EFAULT;
-
-	dxp = &rxvq->vq_descx[head_idx];
-	dxp->cookie = (void *)cookie;
-	dxp->ndescs = needed;
-
-	start_dp[head_idx].addr  =
-		(uint64_t) ((uintptr_t)cookie->buf_addr + RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
-	start_dp[head_idx].len   = cookie->buf_len - RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
-	start_dp[head_idx].flags = VRING_DESC_F_WRITE;
-	rxvq->vq_desc_head_idx   = start_dp[head_idx].next;
-	rxvq->vq_free_cnt        = (uint16_t)(rxvq->vq_free_cnt - needed);
-	vq_ring_update_avail(rxvq, head_idx);
-
-	return 0;
-}
-
-static __rte_always_inline int
-virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
-{
-
-	const uint16_t needed = 2;
-	struct vring_desc *start_dp =  txvq->vq_ring.desc;
-	uint16_t head_idx = txvq->vq_desc_head_idx;
-	uint16_t idx      = head_idx;
-	struct vq_desc_extra *dxp;
-
-	if (unlikely(txvq->vq_free_cnt == 0))
-		return -ENOSPC;
-	if (unlikely(txvq->vq_free_cnt < needed))
-		return -EMSGSIZE;
-	if (unlikely(head_idx >= txvq->vq_nentries))
-		return -EFAULT;
-
-	dxp = &txvq->vq_descx[idx];
-	dxp->cookie = (void *)cookie;
-	dxp->ndescs = needed;
-
-	start_dp = txvq->vq_ring.desc;
-	start_dp[idx].addr  = 0;
-/*
- * TODO: save one desc here?
- */
-	start_dp[idx].len   = sizeof(struct virtio_net_hdr);
-	start_dp[idx].flags = VRING_DESC_F_NEXT;
-	start_dp[idx].addr  = (uintptr_t)NULL;
-	idx = start_dp[idx].next;
-	start_dp[idx].addr  = (uint64_t)rte_pktmbuf_mtod(cookie, uintptr_t);
-	start_dp[idx].len   = cookie->data_len;
-	start_dp[idx].flags = 0;
-	idx = start_dp[idx].next;
-	txvq->vq_desc_head_idx = idx;
-	txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
-	vq_ring_update_avail(txvq, head_idx);
-
-	return 0;
-}
-
-static __rte_always_inline uint16_t
-virtqueue_dequeue_burst(struct virtqueue *vq, struct rte_mbuf **rx_pkts, uint32_t *len, uint16_t num)
-{
-	struct vring_used_elem *uep;
-	struct rte_mbuf *cookie;
-	uint16_t used_idx, desc_idx;
-	uint16_t i;
-	/*  Caller does the check */
-	for (i = 0; i < num ; i ++) {
-		used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
-		uep = &vq->vq_ring.used->ring[used_idx];
-		desc_idx = (uint16_t) uep->id;
-		cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
-		if (unlikely(cookie == NULL)) {
-			PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n",
-				vq->vq_used_cons_idx);
-			RTE_LOG(ERR, PMD, "%s: inconsistent (%u, %u)\n", __func__, used_idx , desc_idx);
-			break;
-		}
-		len[i] = uep->len;
-		rx_pkts[i]  = cookie;
-		vq->vq_used_cons_idx++;
-		vq_ring_free_chain(vq, desc_idx);
-		vq->vq_descx[desc_idx].cookie = NULL;
-	}
-	return i;
-}
-
-#endif /* _VIRTQUEUE_H_ */