89 files changed, 1565 insertions, 651 deletions
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 5e08a1b9..12750741 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -17805,10 +17805,7 @@ search_rx_offload(const char *name)
 		if (!strcasecmp(single_name, name)) {
 			found = 1;
 			break;
-		} else if (!strcasecmp(single_name, "UNKNOWN"))
-			break;
-		else if (single_name == NULL)
-			break;
+		}
 		single_offload <<= 1;
 	}
 
diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 91e2e350..23ea7cc8 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -3248,15 +3248,26 @@ parse_vc_action_rss(struct context *ctx, const struct token *token,
 			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
 			.level = 0,
 			.types = rss_hf,
-			.key_len = 0,
+			.key_len = sizeof(action_rss_data->key),
 			.queue_num = RTE_MIN(nb_rxq, ACTION_RSS_QUEUE_NUM),
-			.key = NULL,
+			.key = action_rss_data->key,
 			.queue = action_rss_data->queue,
 		},
+		.key = "testpmd's default RSS hash key, "
+			"override it for better balancing",
 		.queue = { 0 },
 	};
 	for (i = 0; i < action_rss_data->conf.queue_num; ++i)
 		action_rss_data->queue[i] = i;
+	if (!port_id_is_invalid(ctx->port, DISABLED_WARN) &&
+	    ctx->port != (portid_t)RTE_PORT_ALL) {
+		struct rte_eth_dev_info info;
+
+		rte_eth_dev_info_get(ctx->port, &info);
+		action_rss_data->conf.key_len =
+			RTE_MIN(sizeof(action_rss_data->key),
+				info.hash_key_size);
+	}
 	action->conf = &action_rss_data->conf;
 	return ret;
 }
diff --git a/app/test-pmd/cmdline_mtr.c b/app/test-pmd/cmdline_mtr.c
index 63f32828..846de88d 100644
--- a/app/test-pmd/cmdline_mtr.c
+++ b/app/test-pmd/cmdline_mtr.c
@@ -74,7 +74,7 @@ parse_uint(uint64_t *value, const char *str)
 }
 
 static int
-parse_dscp_table_entries(char *str, enum rte_mtr_color *dscp_table)
+parse_dscp_table_entries(char *str, enum rte_mtr_color **dscp_table)
 {
 	char *token;
 	int i = 0;
@@ -84,23 +84,23 @@ parse_dscp_table_entries(char *str, enum rte_mtr_color *dscp_table)
 		return 0;
 
 	/* Allocate memory for dscp table */
-	dscp_table = (enum rte_mtr_color *)malloc(MAX_DSCP_TABLE_ENTRIES *
+	*dscp_table = (enum rte_mtr_color *)malloc(MAX_DSCP_TABLE_ENTRIES *
 		sizeof(enum rte_mtr_color));
-	if (dscp_table == NULL)
+	if (*dscp_table == NULL)
 		return -1;
 
 	while (1) {
 		if (strcmp(token, "G") == 0 ||
 			strcmp(token, "g") == 0)
-			dscp_table[i++] = RTE_MTR_GREEN;
+			*dscp_table[i++] = RTE_MTR_GREEN;
 		else if (strcmp(token, "Y") == 0 ||
 			strcmp(token, "y") == 0)
-			dscp_table[i++] = RTE_MTR_YELLOW;
+			*dscp_table[i++] = RTE_MTR_YELLOW;
 		else if (strcmp(token, "R") == 0 ||
 			strcmp(token, "r") == 0)
-			dscp_table[i++] = RTE_MTR_RED;
+			*dscp_table[i++] = RTE_MTR_RED;
 		else {
-			free(dscp_table);
+			free(*dscp_table);
 			return -1;
 		}
 		if (i == MAX_DSCP_TABLE_ENTRIES)
@@ -108,7 +108,7 @@ parse_dscp_table_entries(char *str, enum rte_mtr_color *dscp_table)
 
 		token = strtok_r(str, PARSE_DELIMITER, &str);
 		if (token == NULL) {
-			free(dscp_table);
+			free(*dscp_table);
 			return -1;
 		}
 	}
@@ -117,7 +117,7 @@ parse_dscp_table_entries(char *str, enum rte_mtr_color *dscp_table)
 
 static int
 parse_meter_color_str(char *c_str, uint32_t *use_prev_meter_color,
-	enum rte_mtr_color *dscp_table)
+	enum rte_mtr_color **dscp_table)
 {
 	char *token;
 	uint64_t previous_mtr_color = 0;
@@ -195,7 +195,7 @@ parse_policer_action_string(char *p_str, uint32_t action_mask,
 
 static int
 parse_multi_token_string(char *t_str, uint16_t *port_id,
-	uint32_t *mtr_id, enum rte_mtr_color *dscp_table)
+	uint32_t *mtr_id, enum rte_mtr_color **dscp_table)
 {
 	char *token;
 	uint64_t val;
@@ -794,7 +794,7 @@ static void cmd_create_port_meter_parsed(void *parsed_result,
 	params.meter_profile_id = res->profile_id;
 
 	/* Parse meter input color string params */
-	ret = parse_meter_color_str(c_str, &use_prev_meter_color, dscp_table);
+	ret = parse_meter_color_str(c_str, &use_prev_meter_color, &dscp_table);
 	if (ret) {
 		printf(" Meter input color params string parse error\n");
 		return;
@@ -1141,7 +1141,7 @@ static void cmd_set_port_meter_dscp_table_parsed(void *parsed_result,
 	int ret;
 
 	/* Parse string */
-	ret = parse_multi_token_string(t_str, &port_id, &mtr_id, dscp_table);
+	ret = parse_multi_token_string(t_str, &port_id, &mtr_id, &dscp_table);
 	if (ret) {
 		printf(" Multi token string parse error\n");
 		return;
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index dce4b9be..ffeee205 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -111,7 +111,9 @@ parse_ipv4(struct ipv4_hdr *ipv4_hdr, struct testpmd_offload_info *info)
 	if (info->l4_proto == IPPROTO_TCP) {
 		tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + info->l3_len);
 		info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
-	} else
+	} else if (info->l4_proto == IPPROTO_UDP)
+		info->l4_len = sizeof(struct udp_hdr);
+	else
 		info->l4_len = 0;
 }
 
@@ -128,7 +130,9 @@ parse_ipv6(struct ipv6_hdr *ipv6_hdr, struct testpmd_offload_info *info)
 	if (info->l4_proto == IPPROTO_TCP) {
 		tcp_hdr = (struct tcp_hdr *)((char *)ipv6_hdr + info->l3_len);
 		info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
-	} else
+	} else if (info->l4_proto == IPPROTO_UDP)
+		info->l4_len = sizeof(struct udp_hdr);
+	else
 		info->l4_len = 0;
 }
 
diff --git a/app/test-pmd/softnicfwd.c b/app/test-pmd/softnicfwd.c
index 7ff62280..94e6669d 100644
--- a/app/test-pmd/softnicfwd.c
+++ b/app/test-pmd/softnicfwd.c
@@ -458,6 +458,7 @@ softport_tm_tc_node_add(portid_t port_id,
 						error->message,
 						shaper_profile_id);
 
+					free(tnp.shared_shaper_id);
 					return -1;
 				}
 				tnp.shaper_profile_id = shaper_profile_id;
@@ -473,6 +474,7 @@ softport_tm_tc_node_add(portid_t port_id,
 						error->message,
 						h->tc_node_id[pos][k]);
 
+					free(tnp.shared_shaper_id);
 					return -1;
 				}
 				shaper_profile_id++;
diff --git a/buildtools/check-experimental-syms.sh b/buildtools/check-experimental-syms.sh
index d0915102..7d1f3a56 100755
--- a/buildtools/check-experimental-syms.sh
+++ b/buildtools/check-experimental-syms.sh
@@ -5,6 +5,12 @@
 MAPFILE=$1
 OBJFILE=$2
 
+# added check for "make -C test/" usage
+if [ ! -e $MAPFILE ] || [ ! -f $OBJFILE ]
+then
+	exit 0
+fi
+
 if [ -d $MAPFILE ]
 then
 	exit 0
diff --git a/buildtools/gen-build-mk.sh b/buildtools/gen-build-mk.sh
index c18b205e..636920b6 100755
--- a/buildtools/gen-build-mk.sh
+++ b/buildtools/gen-build-mk.sh
@@ -5,7 +5,6 @@
 # Auto-generate a Makefile in build directory
 # Args:
 #   $1: path of project src root
-#   $2: path of build dir (can be relative to $1)
 
 echo "# Automatically generated by gen-build-mk.sh"
 echo
@@ -18,7 +17,7 @@ echo
 echo "MAKEFLAGS += --no-print-directory"
 echo
 echo "all:"
-echo "	@\$(MAKE) -C $1 O=$2"
+echo "	@\$(MAKE) -C $1 O=\$(CURDIR)"
 echo
 echo "%::"
-echo "	@\$(MAKE) -C $1 O=$2 \$@"
+echo "	@\$(MAKE) -C $1 O=\$(CURDIR) \$@"
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 7af5ead8..3610e008 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -137,6 +137,11 @@ Limitations
   enabled (``rxq_cqe_comp_en``) at the same time, RSS hash result is not fully
   supported. Some Rx packets may not have PKT_RX_RSS_HASH.
 
+- IPv6 Multicast messages are not supported on VM, while promiscuous mode
+  and allmulticast mode are both set to off.
+  To receive IPv6 Multicast messages on VM, explicitly set the relevant
+  MAC address using rte_eth_dev_mac_addr_add() API.
+
 Statistics
 ----------
 
@@ -434,6 +439,7 @@ Run-time configuration
 
   A nonzero value enables the DV flow steering assuming it is supported
   by the driver.
+  The DV flow steering is not supported on switchdev mode.
 
   Disabled by default.
 
diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c
index 9c66bdc7..2c03ca41 100644
--- a/drivers/bus/vdev/vdev.c
+++ b/drivers/bus/vdev/vdev.c
@@ -224,7 +224,6 @@ insert_vdev(const char *name, const char *args,
 	}
 
 	dev->device.bus = &rte_vdev_bus;
-	dev->device.devargs = devargs;
 	dev->device.numa_node = SOCKET_ID_ANY;
 	dev->device.name = devargs->name;
 
@@ -238,9 +237,10 @@ insert_vdev(const char *name, const char *args,
 		goto fail;
 	}
 
-	TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
 	if (init)
-		rte_devargs_insert(devargs);
+		rte_devargs_insert(&devargs);
+	dev->device.devargs = devargs;
+	TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
 
 	if (p_dev)
 		*p_dev = dev;
diff --git a/drivers/bus/vmbus/linux/vmbus_uio.c b/drivers/bus/vmbus/linux/vmbus_uio.c
index 856c6d66..12e97e3a 100644
--- a/drivers/bus/vmbus/linux/vmbus_uio.c
+++ b/drivers/bus/vmbus/linux/vmbus_uio.c
@@ -329,6 +329,7 @@ int vmbus_uio_get_subchan(struct vmbus_channel *primary,
 	char chan_path[PATH_MAX], subchan_path[PATH_MAX];
 	struct dirent *ent;
 	DIR *chan_dir;
+	int err;
 
 	snprintf(chan_path, sizeof(chan_path),
 		 "%s/%s/channels",
@@ -344,7 +345,6 @@ int vmbus_uio_get_subchan(struct vmbus_channel *primary,
 	while ((ent = readdir(chan_dir))) {
 		unsigned long relid, subid, monid;
 		char *endp;
-		int err;
 
 		if (ent->d_name[0] == '.')
 			continue;
@@ -364,8 +364,7 @@ int vmbus_uio_get_subchan(struct vmbus_channel *primary,
 		if (err) {
 			VMBUS_LOG(NOTICE, "invalid subchannel id %lu",
 				  subid);
-			closedir(chan_dir);
-			return err;
+			goto fail;
 		}
 
 		if (subid == 0)
@@ -382,17 +381,20 @@ int vmbus_uio_get_subchan(struct vmbus_channel *primary,
 		if (err) {
 			VMBUS_LOG(NOTICE, "invalid monitor id %lu",
 				  monid);
-			return err;
+			goto fail;
 		}
 
 		err = vmbus_chan_create(dev, relid, subid, monid, subchan);
 		if (err) {
 			VMBUS_LOG(NOTICE, "subchannel setup failed");
-			return err;
+			goto fail;
 		}
 		break;
 	}
 	closedir(chan_dir);
 
 	return (ent == NULL) ? -ENOENT : 0;
+fail:
+	closedir(chan_dir);
+	return err;
 }
diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c
index 43f6c26e..f3eff268 100644
--- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c
+++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c
@@ -535,14 +535,12 @@ aesni_mb_pmd_qp_set_unique_name(struct rte_cryptodev *dev,
 /** Create a ring to place processed operations on */
 static struct rte_ring *
 aesni_mb_pmd_qp_create_processed_ops_ring(struct aesni_mb_qp *qp,
-		const char *str, unsigned int ring_size, int socket_id)
+		unsigned int ring_size, int socket_id)
 {
 	struct rte_ring *r;
 	char ring_name[RTE_CRYPTODEV_NAME_MAX_LEN];
 
-	unsigned int n = snprintf(ring_name, sizeof(ring_name),
-				"%s_%s",
-				qp->name, str);
+	unsigned int n = snprintf(ring_name, sizeof(ring_name), "%s", qp->name);
 
 	if (n >= sizeof(ring_name))
 		return NULL;
@@ -600,7 +598,7 @@ aesni_mb_pmd_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 	qp->op_fns = &job_ops[internals->vector_mode];
 
 	qp->ingress_queue = aesni_mb_pmd_qp_create_processed_ops_ring(qp,
-			"ingress", qp_conf->nb_descriptors, socket_id);
+			qp_conf->nb_descriptors, socket_id);
 	if (qp->ingress_queue == NULL) {
 		ret = -1;
 		goto qp_setup_cleanup;
diff --git a/drivers/crypto/ccp/ccp_pci.c b/drivers/crypto/ccp/ccp_pci.c
index 59152ca5..1702a09c 100644
--- a/drivers/crypto/ccp/ccp_pci.c
+++ b/drivers/crypto/ccp/ccp_pci.c
@@ -31,12 +31,15 @@ ccp_check_pci_uio_module(void)
 	while (uio_module_names[i] != NULL) {
 		while (fgets(buf, sizeof(buf), fp) != NULL) {
 			if (!strncmp(buf, uio_module_names[i],
-				     strlen(uio_module_names[i])))
+				     strlen(uio_module_names[i]))) {
+				fclose(fp);
 				return i;
+			}
 		}
 		i++;
 		rewind(fp);
 	}
+	fclose(fp);
 	printf("Insert igb_uio or uio_pci_generic kernel module(s)");
 	return -1;/* uio not inserted */
 }
diff --git a/drivers/crypto/octeontx/otx_cryptodev.c b/drivers/crypto/octeontx/otx_cryptodev.c
index 269f0456..b201e0a1 100644
--- a/drivers/crypto/octeontx/otx_cryptodev.c
+++ b/drivers/crypto/octeontx/otx_cryptodev.c
@@ -100,8 +100,8 @@ otx_cpt_pci_remove(struct rte_pci_device *pci_dev)
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
 		rte_free(cryptodev->data->dev_private);
 
-	cryptodev->device = NULL;
 	cryptodev->device->driver = NULL;
+	cryptodev->device = NULL;
 	cryptodev->data = NULL;
 
 	/* free metapool memory */
diff --git a/drivers/crypto/octeontx/otx_cryptodev_hw_access.c b/drivers/crypto/octeontx/otx_cryptodev_hw_access.c
index 5e705a83..18f2e6b1 100644
--- a/drivers/crypto/octeontx/otx_cryptodev_hw_access.c
+++ b/drivers/crypto/octeontx/otx_cryptodev_hw_access.c
@@ -9,6 +9,7 @@
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_memzone.h>
+#include <rte_string_fns.h>
 
 #include "otx_cryptodev_hw_access.h"
 #include "otx_cryptodev_mbox.h"
@@ -366,7 +367,9 @@ otx_cpt_hw_init(struct cpt_vf *cptvf, void *pdev, void *reg_base, char *name)
 
 	/* Bar0 base address */
 	cptvf->reg_base = reg_base;
-	strncpy(cptvf->dev_name, name, 32);
+
+	/* Save device name */
+	strlcpy(cptvf->dev_name, name, (sizeof(cptvf->dev_name)));
 
 	cptvf->pdev = pdev;
 
diff --git a/drivers/crypto/octeontx/otx_cryptodev_ops.c b/drivers/crypto/octeontx/otx_cryptodev_ops.c
index 23f96591..90d0c14b 100644
--- a/drivers/crypto/octeontx/otx_cryptodev_ops.c
+++ b/drivers/crypto/octeontx/otx_cryptodev_ops.c
@@ -216,7 +216,7 @@ otx_cpt_que_pair_setup(struct rte_cryptodev *dev,
 	}
 
 	ret = otx_cpt_get_resource(cptvf, 0, &instance);
-	if (ret != 0) {
+	if (ret != 0 || instance == NULL) {
 		CPT_LOG_ERR("Error getting instance handle from device %s : "
 			    "ret = %d", dev->data->name, ret);
 		return ret;
diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c
index 27975936..a6d2687a 100644
--- a/drivers/net/bnx2x/bnx2x.c
+++ b/drivers/net/bnx2x/bnx2x.c
@@ -199,8 +199,12 @@ static int bnx2x_acquire_hw_lock(struct bnx2x_softc *sc, uint32_t resource)
 	uint32_t hw_lock_control_reg;
 	int cnt;
 
+#ifndef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
 	if (resource)
 		PMD_INIT_FUNC_TRACE(sc);
+#else
+	PMD_INIT_FUNC_TRACE(sc);
+#endif
 
 	/* validate the resource is within range */
 	if (resource > HW_LOCK_MAX_RESOURCE_VALUE) {
@@ -248,8 +252,12 @@ static int bnx2x_release_hw_lock(struct bnx2x_softc *sc, uint32_t resource)
 	int func = SC_FUNC(sc);
 	uint32_t hw_lock_control_reg;
 
+#ifndef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
 	if (resource)
 		PMD_INIT_FUNC_TRACE(sc);
+#else
+	PMD_INIT_FUNC_TRACE(sc);
+#endif
 
 	/* validate the resource is within range */
 	if (resource > HW_LOCK_MAX_RESOURCE_VALUE) {
@@ -7041,7 +7049,7 @@ void bnx2x_link_status_update(struct bnx2x_softc *sc)
 		}
 		bnx2x_link_report(sc);
 	} else {
-		bnx2x_link_report(sc);
+		bnx2x_link_report_locked(sc);
 		bnx2x_stats_handle(sc, STATS_EVENT_LINK_UP);
 	}
 }
@@ -9388,6 +9396,8 @@ static int bnx2x_prev_unload(struct bnx2x_softc *sc)
 	uint32_t fw, hw_lock_reg, hw_lock_val;
 	uint32_t rc = 0;
 
+	PMD_INIT_FUNC_TRACE(sc);
+
 	/*
 	 * Clear HW from errors which may have resulted from an interrupted
 	 * DMAE transaction.
@@ -9395,22 +9405,23 @@ static int bnx2x_prev_unload(struct bnx2x_softc *sc)
 	bnx2x_prev_interrupted_dmae(sc);
 
 	/* Release previously held locks */
-	if (SC_FUNC(sc) <= 5)
-		hw_lock_reg = (MISC_REG_DRIVER_CONTROL_1 + SC_FUNC(sc) * 8);
-	else
-		hw_lock_reg =
-		    (MISC_REG_DRIVER_CONTROL_7 + (SC_FUNC(sc) - 6) * 8);
+	hw_lock_reg = (SC_FUNC(sc) <= 5) ?
+			(MISC_REG_DRIVER_CONTROL_1 + SC_FUNC(sc) * 8) :
+			(MISC_REG_DRIVER_CONTROL_7 + (SC_FUNC(sc) - 6) * 8);
 
 	hw_lock_val = (REG_RD(sc, hw_lock_reg));
 	if (hw_lock_val) {
 		if (hw_lock_val & HW_LOCK_RESOURCE_NVRAM) {
+			PMD_DRV_LOG(DEBUG, sc, "Releasing previously held NVRAM lock\n");
 			REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB,
 			       (MCPR_NVM_SW_ARB_ARB_REQ_CLR1 << SC_PORT(sc)));
 		}
+		PMD_DRV_LOG(DEBUG, sc, "Releasing previously held HW lock\n");
 		REG_WR(sc, hw_lock_reg, 0xffffffff);
 	}
 
 	if (MCPR_ACCESS_LOCK_LOCK & REG_RD(sc, MCP_REG_MCPR_ACCESS_LOCK)) {
+		PMD_DRV_LOG(DEBUG, sc, "Releasing previously held ALR\n");
 		REG_WR(sc, MCP_REG_MCPR_ACCESS_LOCK, 0);
 	}
 
@@ -9740,6 +9751,8 @@ int bnx2x_attach(struct bnx2x_softc *sc)
 		sc->fw_seq =
 		    (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) &
 		     DRV_MSG_SEQ_NUMBER_MASK);
+		PMD_DRV_LOG(DEBUG, sc, "prev unload fw_seq 0x%04x",
+			    sc->fw_seq);
 		bnx2x_prev_unload(sc);
 	}
 
diff --git a/drivers/net/bnx2x/bnx2x_logs.h b/drivers/net/bnx2x/bnx2x_logs.h
index 753bccdf..f0cf69c1 100644
--- a/drivers/net/bnx2x/bnx2x_logs.h
+++ b/drivers/net/bnx2x/bnx2x_logs.h
@@ -10,43 +10,40 @@
 
 extern int bnx2x_logtype_init;
 #define PMD_INIT_LOG(level, sc, fmt, args...) \
-	RTE_LOG(level, PMD, \
+	rte_log(RTE_LOG_ ## level, bnx2x_logtype_init, \
 	"[bnx2x_pmd: %s] %s() " fmt "\n", (sc)->devinfo.name, __func__, ##args)
 
 #define PMD_INIT_FUNC_TRACE(sc) PMD_INIT_LOG(DEBUG, sc, " >>")
 
+extern int bnx2x_logtype_driver;
+#define PMD_DRV_LOG_RAW(level, sc, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, bnx2x_logtype_driver, \
+		"[%s:%d(%s)] " fmt,	__func__, __LINE__, \
+		(sc)->devinfo.name ? (sc)->devinfo.name : "", ## args)
+
+#define PMD_DRV_LOG(level, sc, fmt, args...) \
+	PMD_DRV_LOG_RAW(level, sc, fmt "\n", ## args)
+
 #ifdef RTE_LIBRTE_BNX2X_DEBUG_RX
 #define PMD_RX_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+	rte_log(RTE_LOG_ ## level, bnx2x_logtype_driver, \
+	"%s(): " fmt "\n", __func__, ## args)
 #else
 #define PMD_RX_LOG(level, fmt, args...) do { } while(0)
 #endif
 
 #ifdef RTE_LIBRTE_BNX2X_DEBUG_TX
 #define PMD_TX_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+	rte_log(RTE_LOG_ ## level, bnx2x_logtype_driver, \
+		"%s(): " fmt "\n", __func__, ## args)
 #else
 #define PMD_TX_LOG(level, fmt, args...) do { } while(0)
 #endif
 
-#ifdef RTE_LIBRTE_BNX2X_DEBUG_TX_FREE
-#define PMD_TX_FREE_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
-#else
-#define PMD_TX_FREE_LOG(level, fmt, args...) do { } while(0)
-#endif
-
-extern int bnx2x_logtype_driver;
-#define PMD_DRV_LOG_RAW(level, sc, fmt, args...) \
-	RTE_LOG(level, PMD, "[%s:%d(%s)] " fmt,	__func__, __LINE__, \
-		(sc)->devinfo.name ? (sc)->devinfo.name : "", ## args)
-
-#define PMD_DRV_LOG(level, sc, fmt, args...) \
-	PMD_DRV_LOG_RAW(level, sc, fmt "\n", ## args)
-
 #ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
 #define PMD_DEBUG_PERIODIC_LOG(level, sc, fmt, args...) \
-	RTE_LOG(level, PMD, "%s(%s): " fmt "\n", __func__, \
+	rte_log(RTE_LOG_ ## level, bnx2x_logtype_driver, \
+		"%s(%s): " fmt "\n", __func__, \
 		(sc)->devinfo.name ? (sc)->devinfo.name : "", ## args)
 #else
 #define PMD_DEBUG_PERIODIC_LOG(level, sc, fmt, args...) do { } while (0)
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index 1a6d8e4d..2661620a 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -2181,9 +2181,14 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
 
 	internals->link_status_polling_enabled = 0;
 	for (i = 0; i < internals->slave_count; i++) {
-		internals->slaves[i].last_link_status = 0;
-		rte_eth_dev_stop(internals->slaves[i].port_id);
-		deactivate_slave(eth_dev, internals->slaves[i].port_id);
+		uint16_t slave_id = internals->slaves[i].port_id;
+		if (find_slave_by_id(internals->active_slaves,
+				internals->active_slave_count, slave_id) !=
+						internals->active_slave_count) {
+			internals->slaves[i].last_link_status = 0;
+			rte_eth_dev_stop(slave_id);
+			deactivate_slave(eth_dev, slave_id);
+		}
 	}
 }
 
diff --git a/drivers/net/cxgbe/cxgbe_filter.c b/drivers/net/cxgbe/cxgbe_filter.c
index ef1102be..3a7912e4 100644
--- a/drivers/net/cxgbe/cxgbe_filter.c
+++ b/drivers/net/cxgbe/cxgbe_filter.c
@@ -263,8 +263,8 @@ static u64 hash_filter_ntuple(const struct filter_entry *f)
 	u64 ntuple = 0;
 	u16 tcp_proto = IPPROTO_TCP; /* TCP Protocol Number */
 
-	if (tp->port_shift >= 0)
-		ntuple |= (u64)f->fs.mask.iport << tp->port_shift;
+	if (tp->port_shift >= 0 && f->fs.mask.iport)
+		ntuple |= (u64)f->fs.val.iport << tp->port_shift;
 
 	if (tp->protocol_shift >= 0) {
 		if (!f->fs.val.proto)
@@ -278,9 +278,6 @@ static u64 hash_filter_ntuple(const struct filter_entry *f)
 	if (tp->macmatch_shift >= 0 && f->fs.mask.macidx)
 		ntuple |= (u64)(f->fs.val.macidx) << tp->macmatch_shift;
 
-	if (ntuple != tp->hash_filter_mask)
-		return 0;
-
 	return ntuple;
 }
 
diff --git a/drivers/net/cxgbe/cxgbe_flow.c b/drivers/net/cxgbe/cxgbe_flow.c
index 54ec7e59..4deaff8f 100644
--- a/drivers/net/cxgbe/cxgbe_flow.c
+++ b/drivers/net/cxgbe/cxgbe_flow.c
@@ -7,14 +7,12 @@
 
 #define __CXGBE_FILL_FS(__v, __m, fs, elem, e) \
 do { \
-	if (!((fs)->val.elem || (fs)->mask.elem)) { \
-		(fs)->val.elem = (__v); \
-		(fs)->mask.elem = (__m); \
-	} else { \
+	if ((fs)->mask.elem && ((fs)->val.elem != (__v))) \
 		return rte_flow_error_set(e, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, \
-					  NULL, "a filter can be specified" \
-					  " only once"); \
-	} \
+					  NULL, "Redefined match item with" \
+					  " different values found"); \
+	(fs)->val.elem = (__v); \
+	(fs)->mask.elem = (__m); \
 } while (0)
 
 #define __CXGBE_FILL_FS_MEMCPY(__v, __m, fs, elem) \
@@ -799,7 +797,7 @@ static int __cxgbe_flow_create(struct rte_eth_dev *dev, struct rte_flow *flow)
 
 	/* Poll the FW for reply */
 	err = cxgbe_poll_for_completion(&adap->sge.fw_evtq,
-					CXGBE_FLOW_POLL_US,
+					CXGBE_FLOW_POLL_MS,
 					CXGBE_FLOW_POLL_CNT,
 					&ctx.completion);
 	if (err) {
@@ -885,7 +883,7 @@ static int __cxgbe_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 
 	/* Poll the FW for reply */
 	err = cxgbe_poll_for_completion(&adap->sge.fw_evtq,
-					CXGBE_FLOW_POLL_US,
+					CXGBE_FLOW_POLL_MS,
 					CXGBE_FLOW_POLL_CNT,
 					&ctx.completion);
 	if (err) {
diff --git a/drivers/net/cxgbe/cxgbe_flow.h b/drivers/net/cxgbe/cxgbe_flow.h
index 718bf3d0..ec8e47ae 100644
--- a/drivers/net/cxgbe/cxgbe_flow.h
+++ b/drivers/net/cxgbe/cxgbe_flow.h
@@ -10,8 +10,9 @@
 #include "mps_tcam.h"
 #include "cxgbe.h"
 
-#define CXGBE_FLOW_POLL_US  10
-#define CXGBE_FLOW_POLL_CNT 10
+/* Max poll time is 100 * 100msec = 10 sec */
+#define CXGBE_FLOW_POLL_MS  100 /* 100 milliseconds */
+#define CXGBE_FLOW_POLL_CNT 100 /* Max number of times to poll */
 
 struct chrte_fparse {
 	int (*fptr)(const void *mask, /* currently supported mask */
diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c
index 88dc851f..ec080e5d 100644
--- a/drivers/net/cxgbe/cxgbe_main.c
+++ b/drivers/net/cxgbe/cxgbe_main.c
@@ -157,18 +157,18 @@ out:
 /**
  * cxgbe_poll_for_completion: Poll rxq for completion
  * @q: rxq to poll
- * @us: microseconds to delay
+ * @ms: milliseconds to delay
  * @cnt: number of times to poll
  * @c: completion to check for 'done' status
  *
  * Polls the rxq for reples until completion is done or the count
  * expires.
  */
-int cxgbe_poll_for_completion(struct sge_rspq *q, unsigned int us,
+int cxgbe_poll_for_completion(struct sge_rspq *q, unsigned int ms,
 			      unsigned int cnt, struct t4_completion *c)
 {
 	unsigned int i;
-	unsigned int work_done, budget = 4;
+	unsigned int work_done, budget = 32;
 
 	if (!c)
 		return -EINVAL;
@@ -181,7 +181,7 @@ int cxgbe_poll_for_completion(struct sge_rspq *q, unsigned int us,
 			return 0;
 		}
 		t4_os_unlock(&c->lock);
-		udelay(us);
+		rte_delay_ms(ms);
 	}
 	return -ETIMEDOUT;
 }
@@ -1339,18 +1339,22 @@ inline bool force_linkup(struct adapter *adap)
 int link_start(struct port_info *pi)
 {
 	struct adapter *adapter = pi->adapter;
-	int ret;
+	u64 conf_offloads;
 	unsigned int mtu;
+	int ret;
 
 	mtu = pi->eth_dev->data->dev_conf.rxmode.max_rx_pkt_len -
 	      (ETHER_HDR_LEN + ETHER_CRC_LEN);
 
+	conf_offloads = pi->eth_dev->data->dev_conf.rxmode.offloads;
+
 	/*
 	 * We do not set address filters and promiscuity here, the stack does
 	 * that step explicitly.
 	 */
-	ret = t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu, -1, -1,
-			    -1, 1, true);
+	ret = t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu, -1, -1, -1,
+			    !!(conf_offloads & DEV_RX_OFFLOAD_VLAN_STRIP),
+			    true);
 	if (ret == 0) {
 		ret = cxgbe_mpstcam_modify(pi, (int)pi->xact_addr_filt,
 				(u8 *)&pi->eth_dev->data->mac_addrs[0]);
diff --git a/drivers/net/cxgbe/cxgbevf_ethdev.c b/drivers/net/cxgbe/cxgbevf_ethdev.c
index 3b32ca9d..a6458d53 100644
--- a/drivers/net/cxgbe/cxgbevf_ethdev.c
+++ b/drivers/net/cxgbe/cxgbevf_ethdev.c
@@ -177,6 +177,16 @@ out_free_adapter:
 	return err;
 }
 
+static int eth_cxgbevf_dev_uninit(struct rte_eth_dev *eth_dev)
+{
+	struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
+	struct adapter *adap = pi->adapter;
+
+	/* Free up other ports and all resources */
+	cxgbe_close(adap);
+	return 0;
+}
+
 static int eth_cxgbevf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 				 struct rte_pci_device *pci_dev)
 {
@@ -186,7 +196,7 @@ static int eth_cxgbevf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 
 static int eth_cxgbevf_pci_remove(struct rte_pci_device *pci_dev)
 {
-	return rte_eth_dev_pci_generic_remove(pci_dev, NULL);
+	return rte_eth_dev_pci_generic_remove(pci_dev, eth_cxgbevf_dev_uninit);
 }
 
 static struct rte_pci_driver rte_cxgbevf_pmd = {
diff --git a/drivers/net/cxgbe/cxgbevf_main.c b/drivers/net/cxgbe/cxgbevf_main.c
index 6223e125..61bd8519 100644
--- a/drivers/net/cxgbe/cxgbevf_main.c
+++ b/drivers/net/cxgbe/cxgbevf_main.c
@@ -11,6 +11,7 @@
 #include "t4_regs.h"
 #include "t4_msg.h"
 #include "cxgbe.h"
+#include "mps_tcam.h"
 
 /*
  * Figure out how many Ports and Queue Sets we can support.  This depends on
@@ -271,6 +272,11 @@ allocate_mac:
 	print_adapter_info(adapter);
 	print_port_info(adapter);
 
+	adapter->mpstcam = t4_init_mpstcam(adapter);
+	if (!adapter->mpstcam)
+		dev_warn(adapter,
+			 "VF could not allocate mps tcam table. Continuing\n");
+
 	err = init_rss(adapter);
 	if (err)
 		goto out_free;
diff --git a/drivers/net/cxgbe/mps_tcam.c b/drivers/net/cxgbe/mps_tcam.c
index 02ec69a9..71c8070b 100644
--- a/drivers/net/cxgbe/mps_tcam.c
+++ b/drivers/net/cxgbe/mps_tcam.c
@@ -236,8 +236,6 @@ struct mpstcam_table *t4_init_mpstcam(struct adapter *adap)
 
 void t4_cleanup_mpstcam(struct adapter *adap)
 {
-	if (adap->mpstcam) {
-		t4_os_free(adap->mpstcam->entry);
+	if (adap->mpstcam)
 		t4_os_free(adap->mpstcam);
-	}
 }
diff --git a/drivers/net/e1000/base/e1000_i210.c b/drivers/net/e1000/base/e1000_i210.c
index 277331c4..c2abb43f 100644
--- a/drivers/net/e1000/base/e1000_i210.c
+++ b/drivers/net/e1000/base/e1000_i210.c
@@ -941,6 +941,7 @@ STATIC s32 e1000_pll_workaround_i210(struct e1000_hw *hw)
 	if (ret_val != E1000_SUCCESS)
 		nvm_word = E1000_INVM_DEFAULT_AL;
 	tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL;
+	phy_word = E1000_PHY_PLL_UNCONF;
 	for (i = 0; i < E1000_MAX_PLL_TRIES; i++) {
 		/* check current state directly from internal PHY */
 		e1000_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE |
diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index abe1e7bd..05a4fbe0 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -530,11 +530,6 @@ static void ena_close(struct rte_eth_dev *dev)
 				     adapter);
 
 	/*
-	 * Pass the information to the rte_eth_dev_close() that it should also
-	 * release the private port resources.
-	 */
-	dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
-	/*
 	 * MAC is not allocated dynamically. Setting NULL should prevent from
 	 * release of the resource in the rte_eth_dev_release_port().
 	 */
@@ -1666,6 +1661,12 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev)
 	ether_addr_copy((struct ether_addr *)get_feat_ctx.dev_attr.mac_addr,
 			(struct ether_addr *)adapter->mac_addr);
 
+	/*
+	 * Pass the information to the rte_eth_dev_close() that it should also
+	 * release the private port resources.
+	 */
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
+
 	adapter->drv_stats = rte_zmalloc("adapter stats",
 					 sizeof(*adapter->drv_stats),
 					 RTE_CACHE_LINE_SIZE);
diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c
index 5189ee63..0aadd342 100644
--- a/drivers/net/enic/enic_rxtx.c
+++ b/drivers/net/enic/enic_rxtx.c
@@ -393,11 +393,22 @@ uint16_t enic_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	for (i = 0; i != nb_pkts; i++) {
 		m = tx_pkts[i];
-		if (unlikely(m->pkt_len > ENIC_TX_MAX_PKT_SIZE)) {
-			rte_errno = EINVAL;
-			return i;
-		}
 		ol_flags = m->ol_flags;
+		if (!(ol_flags & PKT_TX_TCP_SEG)) {
+			if (unlikely(m->pkt_len > ENIC_TX_MAX_PKT_SIZE)) {
+				rte_errno = EINVAL;
+				return i;
+			}
+		} else {
+			uint16_t header_len;
+
+			header_len = m->l2_len + m->l3_len + m->l4_len;
+			if (m->tso_segsz + header_len > ENIC_TX_MAX_PKT_SIZE) {
+				rte_errno = EINVAL;
+				return i;
+			}
+		}
+
 		if (ol_flags & wq->tx_offload_notsup_mask) {
 			rte_errno = ENOTSUP;
 			return i;
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index c852022d..85fb6c5c 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -464,11 +464,6 @@ fm10k_dev_configure(struct rte_eth_dev *dev)
 	return 0;
 }
 
-/* fls = find last set bit = 32 minus the number of leading zeros */
-#ifndef fls
-#define fls(x) (((x) == 0) ? 0 : (32 - __builtin_clz((x))))
-#endif
-
 static void
 fm10k_dev_vmdq_rx_configure(struct rte_eth_dev *dev)
 {
@@ -1030,8 +1025,8 @@ fm10k_dev_dglort_map_configure(struct rte_eth_dev *dev)
 
 	macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private);
 	nb_queue_pools = macvlan->nb_queue_pools;
-	pool_len = nb_queue_pools ? fls(nb_queue_pools - 1) : 0;
-	rss_len = fls(dev->data->nb_rx_queues - 1) - pool_len;
+	pool_len = nb_queue_pools ? rte_fls_u32(nb_queue_pools - 1) : 0;
+	rss_len = rte_fls_u32(dev->data->nb_rx_queues - 1) - pool_len;
 
 	/* GLORT 0x0-0x3F are used by PF and VMDQ,  0x40-0x7F used by FD */
 	dglortdec = (rss_len << FM10K_DGLORTDEC_RSSLENGTH_SHIFT) | pool_len;
@@ -1042,7 +1037,7 @@ fm10k_dev_dglort_map_configure(struct rte_eth_dev *dev)
 	FM10K_WRITE_REG(hw, FM10K_DGLORTDEC(0), dglortdec);
 
 	/* Flow Director configurations, only queue number is valid. */
-	dglortdec = fls(dev->data->nb_rx_queues - 1);
+	dglortdec = rte_fls_u32(dev->data->nb_rx_queues - 1);
 	dglortmask = (GLORT_FD_MASK << FM10K_DGLORTMAP_MASK_SHIFT) |
 			(hw->mac.dglort_map + GLORT_FD_Q_BASE);
 	FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(1), dglortmask);
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 1c779068..790ecc3c 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -12552,13 +12552,16 @@ i40e_rss_conf_init(struct i40e_rte_flow_rss_conf *out,
 	if (in->key_len > RTE_DIM(out->key) ||
 	    in->queue_num > RTE_DIM(out->queue))
 		return -EINVAL;
+	if (!in->key && in->key_len)
+		return -EINVAL;
+	if (in->key)
+		out->conf.key = memcpy(out->key, in->key, in->key_len);
 	out->conf = (struct rte_flow_action_rss){
 		.func = in->func,
 		.level = in->level,
 		.types = in->types,
 		.key_len = in->key_len,
 		.queue_num = in->queue_num,
-		.key = memcpy(out->key, in->key, in->key_len),
 		.queue = memcpy(out->queue, in->queue,
 				sizeof(*in->queue) * in->queue_num),
 	};
diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c
index c9e82d51..91ba6201 100644
--- a/drivers/net/ixgbe/ixgbe_ethdev.c
+++ b/drivers/net/ixgbe/ixgbe_ethdev.c
@@ -2549,6 +2549,9 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
 		return -EINVAL;
 	}
 
+	/* Stop the link setup handler before resetting the HW. */
+	rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
+
 	/* disable uio/vfio intr/eventfd mapping */
 	rte_intr_disable(intr_handle);
 
@@ -2731,8 +2734,6 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
 	if (err)
 		goto error;
 
-	ixgbe_dev_link_update(dev, 0);
-
 skip_link_setup:
 
 	if (rte_intr_allow_others(intr_handle)) {
@@ -2768,6 +2769,12 @@ skip_link_setup:
 			    "please call hierarchy_commit() "
 			    "before starting the port");
 
+	/*
+	 * Update link status right before return, because it may
+	 * start link configuration process in a separate thread.
+	 */
+	ixgbe_dev_link_update(dev, 0);
+
 	return 0;
 
 error:
@@ -3873,11 +3880,6 @@ static int
 ixgbevf_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
 		   int *link_up, int wait_to_complete)
 {
-	/**
-	 * for a quick link status checking, wait_to_compelet == 0,
-	 * skip PF link status checking
-	 */
-	bool no_pflink_check = wait_to_complete == 0;
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
 	struct ixgbe_mac_info *mac = &hw->mac;
 	uint32_t links_reg, in_msg;
@@ -3938,14 +3940,6 @@ ixgbevf_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
 		*speed = IXGBE_LINK_SPEED_UNKNOWN;
 	}
 
-	if (no_pflink_check) {
-		if (*speed == IXGBE_LINK_SPEED_UNKNOWN)
-			mac->get_link_status = true;
-		else
-			mac->get_link_status = false;
-
-		goto out;
-	}
 	/* if the read failed it could just be a mailbox collision, best wait
 	 * until we are called again and don't report an error
 	 */
@@ -3955,7 +3949,7 @@ ixgbevf_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
 	if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
 		/* msg is not CTS and is NACK we must have lost CTS status */
 		if (in_msg & IXGBE_VT_MSGTYPE_NACK)
-			ret_val = -1;
+			mac->get_link_status = false;
 		goto out;
 	}
 
@@ -5061,6 +5055,9 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
 
 	PMD_INIT_FUNC_TRACE();
 
+	/* Stop the link setup handler before resetting the HW. */
+	rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev);
+
 	err = hw->mac.ops.reset_hw(hw);
 	if (err) {
 		PMD_INIT_LOG(ERR, "Unable to reset vf hardware (%d)", err);
@@ -5096,8 +5093,6 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
 
 	ixgbevf_dev_rxtx_start(dev);
 
-	ixgbevf_dev_link_update(dev, 0);
-
 	/* check and configure queue intr-vector mapping */
 	if (rte_intr_cap_multiple(intr_handle) &&
 	    dev->data->dev_conf.intr_conf.rxq) {
@@ -5135,6 +5130,12 @@ ixgbevf_dev_start(struct rte_eth_dev *dev)
 	/* Re-enable interrupt for VF */
 	ixgbevf_intr_enable(dev);
 
+	/*
+	 * Update link status right before return, because it may
+	 * start link configuration process in a separate thread.
+	 */
+	ixgbevf_dev_link_update(dev, 0);
+
 	return 0;
 }
 
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 7a50bccd..895cdfee 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -51,7 +51,7 @@ CFLAGS += -D_DEFAULT_SOURCE
 CFLAGS += -D_XOPEN_SOURCE=600
 CFLAGS += $(WERROR_FLAGS)
 CFLAGS += -Wno-strict-prototypes
-CFLAGS += $(shell pkg-config --cflags libmnl)
+CFLAGS += $(shell command -v pkg-config > /dev/null 2>&1 && pkg-config --cflags libmnl)
 ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
 CFLAGS += -DMLX5_GLUE='"$(LIB_GLUE)"'
 CFLAGS += -DMLX5_GLUE_VERSION='"$(LIB_GLUE_VERSION)"'
@@ -60,7 +60,7 @@ LDLIBS += -ldl
 else
 LDLIBS += -libverbs -lmlx5
 endif
-LDLIBS += $(shell pkg-config --libs libmnl)
+LDLIBS += $(shell command -v pkg-config > /dev/null 2>&1 && pkg-config --libs libmnl || echo "-lmnl")
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
 LDLIBS += -lrte_bus_pci
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index ed1fcfc7..9e5cab16 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -347,11 +347,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	memset(priv, 0, sizeof(*priv));
 	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
 	/*
-	 * flag to rte_eth_dev_close() that it should release the port resources
-	 * (calling rte_eth_dev_release_port()) in addition to closing it.
-	 */
-	dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
-	/*
 	 * Reset mac_addrs to NULL such that it is not freed as part of
 	 * rte_eth_dev_release_port(). mac_addrs is part of dev_private so
 	 * it is freed when dev_private is freed.
@@ -1114,6 +1109,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		err = ENOMEM;
 		goto error;
 	}
+	/* Flag to call rte_eth_dev_release_port() in rte_eth_dev_close(). */
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
 	if (priv->representor) {
 		eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
 		eth_dev->data->representor_id = priv->representor_id;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 3c2ac4b3..5ad3a11a 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1178,6 +1178,12 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
 					  "L3 cannot follow an L4 layer.");
 	if (!mask)
 		mask = &rte_flow_item_ipv4_mask;
+	else if (mask->hdr.next_proto_id != 0 &&
+		 mask->hdr.next_proto_id != 0xff)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+					  "partial mask is not supported"
+					  " for protocol");
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
 					(const uint8_t *)&nic_mask,
 					sizeof(struct rte_flow_item_ipv4),
@@ -1234,17 +1240,6 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
 					  "L3 cannot follow an L4 layer.");
-	/*
-	 * IPv6 is not recognised by the NIC inside a GRE tunnel.
-	 * Such support has to be disabled as the rule will be
-	 * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
-	 * Mellanox OFED 4.4-1.0.0.0.
-	 */
-	if (tunnel && item_flags & MLX5_FLOW_LAYER_GRE)
-		return rte_flow_error_set(error, ENOTSUP,
-					  RTE_FLOW_ERROR_TYPE_ITEM, item,
-					  "IPv6 inside a GRE tunnel is"
-					  " not recognised.");
 	if (!mask)
 		mask = &rte_flow_item_ipv6_mask;
 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
@@ -2657,7 +2652,7 @@ flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2)
 	    FLOW_FDIR_CMP(f1, f2, l3_mask) ||
 	    FLOW_FDIR_CMP(f1, f2, l4) ||
 	    FLOW_FDIR_CMP(f1, f2, l4_mask) ||
-	    FLOW_FDIR_CMP(f1, f2, actions[0]))
+	    FLOW_FDIR_CMP(f1, f2, actions[0].type))
 		return 1;
 	if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE &&
 	    FLOW_FDIR_CMP(f1, f2, queue))
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 79096153..a2edd168 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -814,10 +814,17 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
 			if (items->mask != NULL &&
 			    ((const struct rte_flow_item_ipv4 *)
-			     items->mask)->hdr.next_proto_id)
+			     items->mask)->hdr.next_proto_id) {
 				next_protocol =
 					((const struct rte_flow_item_ipv4 *)
 					 (items->spec))->hdr.next_proto_id;
+				next_protocol &=
+					((const struct rte_flow_item_ipv4 *)
+					 (items->mask))->hdr.next_proto_id;
+			} else {
+				/* Reset for inner layer. */
+				next_protocol = 0xff;
+			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
 			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
@@ -828,10 +835,17 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
 			if (items->mask != NULL &&
 			    ((const struct rte_flow_item_ipv6 *)
-			     items->mask)->hdr.proto)
+			     items->mask)->hdr.proto) {
 				next_protocol =
 					((const struct rte_flow_item_ipv6 *)
 					 items->spec)->hdr.proto;
+				next_protocol &=
+					((const struct rte_flow_item_ipv6 *)
+					 items->mask)->hdr.proto;
+			} else {
+				/* Reset for inner layer. */
+				next_protocol = 0xff;
+			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_TCP:
 			ret = mlx5_flow_validate_item_tcp
@@ -1041,6 +1055,39 @@ flow_dv_prepare(const struct rte_flow_attr *attr __rte_unused,
 	return flow;
 }
 
+#ifndef NDEBUG
+/**
+ * Sanity check for match mask and value. Similar to check_valid_spec() in
+ * kernel driver. If unmasked bit is present in value, it returns failure.
+ *
+ * @param match_mask
+ *   pointer to match mask buffer.
+ * @param match_value
+ *   pointer to match value buffer.
+ *
+ * @return
+ *   0 if valid, -EINVAL otherwise.
+ */
+static int
+flow_dv_check_valid_spec(void *match_mask, void *match_value)
+{
+	uint8_t *m = match_mask;
+	uint8_t *v = match_value;
+	unsigned int i;
+
+	for (i = 0; i < MLX5_ST_SZ_DB(fte_match_param); ++i) {
+		if (v[i] & ~m[i]) {
+			DRV_LOG(ERR,
+				"match_value differs from match_criteria"
+				" %p[%u] != %p[%u]",
+				match_value, i, match_mask, i);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+#endif
+
 /**
  * Add Ethernet item to matcher and to the value.
  *
@@ -1750,114 +1797,6 @@ flow_dv_translate(struct rte_eth_dev *dev,
 
 	if (priority == MLX5_FLOW_PRIO_RSVD)
 		priority = priv->config.flow_prio - 1;
-	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
-		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
-		void *match_mask = matcher.mask.buf;
-		void *match_value = dev_flow->dv.value.buf;
-
-		switch (items->type) {
-		case RTE_FLOW_ITEM_TYPE_ETH:
-			flow_dv_translate_item_eth(match_mask, match_value,
-						   items, tunnel);
-			matcher.priority = MLX5_PRIORITY_MAP_L2;
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
-					       MLX5_FLOW_LAYER_OUTER_L2;
-			break;
-		case RTE_FLOW_ITEM_TYPE_VLAN:
-			flow_dv_translate_item_vlan(match_mask, match_value,
-						    items, tunnel);
-			matcher.priority = MLX5_PRIORITY_MAP_L2;
-			item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
-						MLX5_FLOW_LAYER_INNER_VLAN) :
-					       (MLX5_FLOW_LAYER_OUTER_L2 |
-						MLX5_FLOW_LAYER_OUTER_VLAN);
-			break;
-		case RTE_FLOW_ITEM_TYPE_IPV4:
-			flow_dv_translate_item_ipv4(match_mask, match_value,
-						    items, tunnel);
-			matcher.priority = MLX5_PRIORITY_MAP_L3;
-			dev_flow->dv.hash_fields |=
-				mlx5_flow_hashfields_adjust
-					(dev_flow, tunnel,
-					 MLX5_IPV4_LAYER_TYPES,
-					 MLX5_IPV4_IBV_RX_HASH);
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
-					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
-			break;
-		case RTE_FLOW_ITEM_TYPE_IPV6:
-			flow_dv_translate_item_ipv6(match_mask, match_value,
-						    items, tunnel);
-			matcher.priority = MLX5_PRIORITY_MAP_L3;
-			dev_flow->dv.hash_fields |=
-				mlx5_flow_hashfields_adjust
-					(dev_flow, tunnel,
-					 MLX5_IPV6_LAYER_TYPES,
-					 MLX5_IPV6_IBV_RX_HASH);
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
-					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
-			break;
-		case RTE_FLOW_ITEM_TYPE_TCP:
-			flow_dv_translate_item_tcp(match_mask, match_value,
-						   items, tunnel);
-			matcher.priority = MLX5_PRIORITY_MAP_L4;
-			dev_flow->dv.hash_fields |=
-				mlx5_flow_hashfields_adjust
-					(dev_flow, tunnel, ETH_RSS_TCP,
-					 IBV_RX_HASH_SRC_PORT_TCP |
-					 IBV_RX_HASH_DST_PORT_TCP);
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
-					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
-			break;
-		case RTE_FLOW_ITEM_TYPE_UDP:
-			flow_dv_translate_item_udp(match_mask, match_value,
-						   items, tunnel);
-			matcher.priority = MLX5_PRIORITY_MAP_L4;
-			dev_flow->verbs.hash_fields |=
-				mlx5_flow_hashfields_adjust
-					(dev_flow, tunnel, ETH_RSS_UDP,
-					 IBV_RX_HASH_SRC_PORT_UDP |
-					 IBV_RX_HASH_DST_PORT_UDP);
-			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
-					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
-			break;
-		case RTE_FLOW_ITEM_TYPE_GRE:
-			flow_dv_translate_item_gre(match_mask, match_value,
-						   items, tunnel);
-			item_flags |= MLX5_FLOW_LAYER_GRE;
-			break;
-		case RTE_FLOW_ITEM_TYPE_NVGRE:
-			flow_dv_translate_item_nvgre(match_mask, match_value,
-						     items, tunnel);
-			item_flags |= MLX5_FLOW_LAYER_GRE;
-			break;
-		case RTE_FLOW_ITEM_TYPE_VXLAN:
-			flow_dv_translate_item_vxlan(match_mask, match_value,
-						     items, tunnel);
-			item_flags |= MLX5_FLOW_LAYER_VXLAN;
-			break;
-		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
-			flow_dv_translate_item_vxlan(match_mask, match_value,
-						     items, tunnel);
-			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
-			break;
-		case RTE_FLOW_ITEM_TYPE_META:
-			flow_dv_translate_item_meta(match_mask, match_value,
-						    items);
-			item_flags |= MLX5_FLOW_ITEM_METADATA;
-			break;
-		default:
-			break;
-		}
-	}
-	dev_flow->layers = item_flags;
-	/* Register matcher. */
-	matcher.crc = rte_raw_cksum((const void *)matcher.mask.buf,
-				    matcher.mask.size);
-	matcher.priority = mlx5_flow_adjust_priority(dev, priority,
-						     matcher.priority);
-	matcher.egress = attr->egress;
-	if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
-		return -rte_errno;
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
 		const struct rte_flow_action_queue *queue;
 		const struct rte_flow_action_rss *rss;
@@ -1991,6 +1930,116 @@ flow_dv_translate(struct rte_eth_dev *dev,
 	}
 	dev_flow->dv.actions_n = actions_n;
 	flow->actions = action_flags;
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+		int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+		void *match_mask = matcher.mask.buf;
+		void *match_value = dev_flow->dv.value.buf;
+
+		switch (items->type) {
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			flow_dv_translate_item_eth(match_mask, match_value,
+						   items, tunnel);
+			matcher.priority = MLX5_PRIORITY_MAP_L2;
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+					       MLX5_FLOW_LAYER_OUTER_L2;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VLAN:
+			flow_dv_translate_item_vlan(match_mask, match_value,
+						    items, tunnel);
+			matcher.priority = MLX5_PRIORITY_MAP_L2;
+			item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 |
+						MLX5_FLOW_LAYER_INNER_VLAN) :
+					       (MLX5_FLOW_LAYER_OUTER_L2 |
+						MLX5_FLOW_LAYER_OUTER_VLAN);
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			flow_dv_translate_item_ipv4(match_mask, match_value,
+						    items, tunnel);
+			matcher.priority = MLX5_PRIORITY_MAP_L3;
+			dev_flow->dv.hash_fields |=
+				mlx5_flow_hashfields_adjust
+					(dev_flow, tunnel,
+					 MLX5_IPV4_LAYER_TYPES,
+					 MLX5_IPV4_IBV_RX_HASH);
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			flow_dv_translate_item_ipv6(match_mask, match_value,
+						    items, tunnel);
+			matcher.priority = MLX5_PRIORITY_MAP_L3;
+			dev_flow->dv.hash_fields |=
+				mlx5_flow_hashfields_adjust
+					(dev_flow, tunnel,
+					 MLX5_IPV6_LAYER_TYPES,
+					 MLX5_IPV6_IBV_RX_HASH);
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			flow_dv_translate_item_tcp(match_mask, match_value,
+						   items, tunnel);
+			matcher.priority = MLX5_PRIORITY_MAP_L4;
+			dev_flow->dv.hash_fields |=
+				mlx5_flow_hashfields_adjust
+					(dev_flow, tunnel, ETH_RSS_TCP,
+					 IBV_RX_HASH_SRC_PORT_TCP |
+					 IBV_RX_HASH_DST_PORT_TCP);
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+					       MLX5_FLOW_LAYER_OUTER_L4_TCP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			flow_dv_translate_item_udp(match_mask, match_value,
+						   items, tunnel);
+			matcher.priority = MLX5_PRIORITY_MAP_L4;
+			dev_flow->dv.hash_fields |=
+				mlx5_flow_hashfields_adjust
+					(dev_flow, tunnel, ETH_RSS_UDP,
+					 IBV_RX_HASH_SRC_PORT_UDP |
+					 IBV_RX_HASH_DST_PORT_UDP);
+			item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+					       MLX5_FLOW_LAYER_OUTER_L4_UDP;
+			break;
+		case RTE_FLOW_ITEM_TYPE_GRE:
+			flow_dv_translate_item_gre(match_mask, match_value,
+						   items, tunnel);
+			item_flags |= MLX5_FLOW_LAYER_GRE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_NVGRE:
+			flow_dv_translate_item_nvgre(match_mask, match_value,
+						     items, tunnel);
+			item_flags |= MLX5_FLOW_LAYER_GRE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			flow_dv_translate_item_vxlan(match_mask, match_value,
+						     items, tunnel);
+			item_flags |= MLX5_FLOW_LAYER_VXLAN;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+			flow_dv_translate_item_vxlan(match_mask, match_value,
+						     items, tunnel);
+			item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+			break;
+		case RTE_FLOW_ITEM_TYPE_META:
+			flow_dv_translate_item_meta(match_mask, match_value,
+						    items);
+			item_flags |= MLX5_FLOW_ITEM_METADATA;
+			break;
+		default:
+			break;
+		}
+	}
+	assert(!flow_dv_check_valid_spec(matcher.mask.buf,
+					 dev_flow->dv.value.buf));
+	dev_flow->layers = item_flags;
+	/* Register matcher. */
+	matcher.crc = rte_raw_cksum((const void *)matcher.mask.buf,
+				    matcher.mask.size);
+	matcher.priority = mlx5_flow_adjust_priority(dev, priority,
+						     matcher.priority);
+	matcher.egress = attr->egress;
+	if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
+		return -rte_errno;
 	return 0;
 }
 
@@ -2034,6 +2083,7 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
 		} else if (flow->actions &
 			   (MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)) {
 			struct mlx5_hrxq *hrxq;
+
 			hrxq = mlx5_hrxq_get(dev, flow->key,
 					     MLX5_RSS_HASH_KEY_LEN,
 					     dv->hash_fields,
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
index fb817b23..97d2a54c 100644
--- a/drivers/net/mlx5/mlx5_flow_tcf.c
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -3847,30 +3847,6 @@ flow_tcf_alloc_nlcmd(struct tcf_nlcb_context *ctx, uint32_t size)
 }
 
 /**
- * Set NLM_F_ACK flags in the last netlink command in buffer.
- * Only last command in the buffer will be acked by system.
- *
- * @param[in, out] buf
- *   Pointer to buffer with netlink commands.
- */
-static void
-flow_tcf_setack_nlcmd(struct tcf_nlcb_buf *buf)
-{
-	struct nlmsghdr *nlh;
-	uint32_t size = 0;
-
-	assert(buf->size);
-	do {
-		nlh = (struct nlmsghdr *)&buf->msg[size];
-		size += NLMSG_ALIGN(nlh->nlmsg_len);
-		if (size >= buf->size) {
-			nlh->nlmsg_flags |= NLM_F_ACK;
-			break;
-		}
-	} while (true);
-}
-
-/**
  * Send the buffers with prepared netlink commands. Scans the list and
  * sends all found buffers. Buffers are sent and freed anyway in order
  * to prevent memory leakage if some every message in received packet.
@@ -3888,21 +3864,35 @@ static int
 flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context *tcf,
 		    struct tcf_nlcb_context *ctx)
 {
-	struct tcf_nlcb_buf *bc, *bn;
-	struct nlmsghdr *nlh;
+	struct tcf_nlcb_buf *bc = LIST_FIRST(&ctx->nlbuf);
 	int ret = 0;
 
-	bc = LIST_FIRST(&ctx->nlbuf);
 	while (bc) {
+		struct tcf_nlcb_buf *bn = LIST_NEXT(bc, next);
+		struct nlmsghdr *nlh;
+		uint32_t msg = 0;
 		int rc;
 
-		bn = LIST_NEXT(bc, next);
-		if (bc->size) {
-			flow_tcf_setack_nlcmd(bc);
-			nlh = (struct nlmsghdr *)&bc->msg;
-			rc = flow_tcf_nl_ack(tcf, nlh, bc->size, NULL, NULL);
-			if (rc && !ret)
-				ret = rc;
+		while (msg < bc->size) {
+			/*
+			 * Send Netlink commands from buffer in one by one
+			 * fashion. If we send multiple rule deletion commands
+			 * in one Netlink message and some error occurs it may
+			 * cause multiple ACK error messages and break sequence
+			 * numbers of Netlink communication, because we expect
+			 * the only one ACK reply.
+			 */
+			assert((bc->size - msg) >= sizeof(struct nlmsghdr));
+			nlh = (struct nlmsghdr *)&bc->msg[msg];
+			assert((bc->size - msg) >= nlh->nlmsg_len);
+			msg += nlh->nlmsg_len;
+			rc = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL);
+			if (rc) {
+				DRV_LOG(WARNING,
+					"netlink: cleanup error %d", rc);
+				if (!ret)
+					ret = rc;
+			}
 		}
 		rte_free(bc);
 		bc = bn;
@@ -3935,6 +3925,7 @@ flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg)
 	struct nlattr *na_local = NULL;
 	struct nlattr *na_peer = NULL;
 	unsigned char family;
+	uint32_t size;
 
 	if (nlh->nlmsg_type != RTM_NEWADDR) {
 		rte_errno = EINVAL;
@@ -3962,11 +3953,11 @@ flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg)
 	if (!na_local || !na_peer)
 		return 1;
 	/* Local rule found with scope link, permanent and assigned peer. */
-	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
-					MNL_ALIGN(sizeof(struct ifaddrmsg)) +
-					(family == AF_INET6
-					? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
-					: 2 * SZ_NLATTR_TYPE_OF(uint32_t)));
+	size = MNL_ALIGN(sizeof(struct nlmsghdr)) +
+	       MNL_ALIGN(sizeof(struct ifaddrmsg)) +
+	       (family == AF_INET6 ? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
+				   : 2 * SZ_NLATTR_TYPE_OF(uint32_t));
+	cmd = flow_tcf_alloc_nlcmd(ctx, size);
 	if (!cmd) {
 		rte_errno = ENOMEM;
 		return -rte_errno;
@@ -3991,6 +3982,7 @@ flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg)
 		mnl_attr_put(cmd, IFA_ADDRESS, IPV6_ADDR_LEN,
 			mnl_attr_get_payload(na_peer));
 	}
+	assert(size == cmd->nlmsg_len);
 	return 1;
 }
 
@@ -4059,6 +4051,7 @@ flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg)
 	struct nlattr *na_ip = NULL;
 	struct nlattr *na_mac = NULL;
 	unsigned char family;
+	uint32_t size;
 
 	if (nlh->nlmsg_type != RTM_NEWNEIGH) {
 		rte_errno = EINVAL;
@@ -4085,12 +4078,12 @@ flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg)
 	if (!na_mac || !na_ip)
 		return 1;
 	/* Neigh rule with permenent attribute found. */
-	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
-					MNL_ALIGN(sizeof(struct ndmsg)) +
-					SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) +
-					(family == AF_INET6
-					? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
-					: SZ_NLATTR_TYPE_OF(uint32_t)));
+	size = MNL_ALIGN(sizeof(struct nlmsghdr)) +
+	       MNL_ALIGN(sizeof(struct ndmsg)) +
+	       SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) +
+	       (family == AF_INET6 ? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN)
+				   : SZ_NLATTR_TYPE_OF(uint32_t));
+	cmd = flow_tcf_alloc_nlcmd(ctx, size);
 	if (!cmd) {
 		rte_errno = ENOMEM;
 		return -rte_errno;
@@ -4113,6 +4106,7 @@ flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg)
 	}
 	mnl_attr_put(cmd, NDA_LLADDR, ETHER_ADDR_LEN,
 		     mnl_attr_get_payload(na_mac));
+	assert(size == cmd->nlmsg_len);
 	return 1;
 }
 
@@ -4179,6 +4173,7 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg)
 	struct nlattr *na_vxlan = NULL;
 	bool found = false;
 	unsigned int vxindex;
+	uint32_t size;
 
 	if (nlh->nlmsg_type != RTM_NEWLINK) {
 		rte_errno = EINVAL;
@@ -4224,9 +4219,10 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg)
 		return 1;
 	/* Attached VXLAN device found, store the command to delete. */
 	vxindex = ifm->ifi_index;
-	cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) +
-					MNL_ALIGN(sizeof(struct ifinfomsg)));
-	if (!nlh) {
+	size = MNL_ALIGN(sizeof(struct nlmsghdr)) +
+	       MNL_ALIGN(sizeof(struct ifinfomsg));
+	cmd = flow_tcf_alloc_nlcmd(ctx, size);
+	if (!cmd) {
 		rte_errno = ENOMEM;
 		return -rte_errno;
 	}
@@ -4236,6 +4232,7 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg)
 	ifm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifm));
 	ifm->ifi_family = AF_UNSPEC;
 	ifm->ifi_index = vxindex;
+	assert(size == cmd->nlmsg_len);
 	return 1;
 }
 
@@ -5127,6 +5124,13 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
 		dev_flow->tcf.applied = 1;
 		return 0;
 	}
+	if (dev_flow->tcf.tunnel) {
+		/* Rollback the VTEP configuration if rule apply failed. */
+		assert(dev_flow->tcf.tunnel->vtep);
+		flow_tcf_vtep_release(ctx, dev_flow->tcf.tunnel->vtep,
+				      dev_flow);
+		dev_flow->tcf.tunnel->vtep = NULL;
+	}
 	return rte_flow_error_set(error, rte_errno,
 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 				  "netlink: failed to create TC flow rule");
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 699cc88c..d6d95db5 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1058,10 +1058,17 @@ flow_verbs_validate(struct rte_eth_dev *dev,
 					       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
 			if (items->mask != NULL &&
 			    ((const struct rte_flow_item_ipv4 *)
-			     items->mask)->hdr.next_proto_id)
+			     items->mask)->hdr.next_proto_id) {
 				next_protocol =
 					((const struct rte_flow_item_ipv4 *)
 					 (items->spec))->hdr.next_proto_id;
+				next_protocol &=
+					((const struct rte_flow_item_ipv4 *)
+					 (items->mask))->hdr.next_proto_id;
+			} else {
+				/* Reset for inner layer. */
+				next_protocol = 0xff;
+			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_IPV6:
 			ret = mlx5_flow_validate_item_ipv6(items, item_flags,
@@ -1072,10 +1079,17 @@ flow_verbs_validate(struct rte_eth_dev *dev,
 					       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
 			if (items->mask != NULL &&
 			    ((const struct rte_flow_item_ipv6 *)
-			     items->mask)->hdr.proto)
+			     items->mask)->hdr.proto) {
 				next_protocol =
 					((const struct rte_flow_item_ipv6 *)
 					 items->spec)->hdr.proto;
+				next_protocol &=
+					((const struct rte_flow_item_ipv6 *)
+					 items->mask)->hdr.proto;
+			} else {
+				/* Reset for inner layer. */
+				next_protocol = 0xff;
+			}
 			break;
 		case RTE_FLOW_ITEM_TYPE_UDP:
 			ret = mlx5_flow_validate_item_udp(items, item_flags,
@@ -1125,13 +1139,6 @@ flow_verbs_validate(struct rte_eth_dev *dev,
 							   error);
 			if (ret < 0)
 				return ret;
-			if (next_protocol != 0xff &&
-			    next_protocol != IPPROTO_MPLS)
-				return rte_flow_error_set
-					(error, EINVAL,
-					 RTE_FLOW_ERROR_TYPE_ITEM, items,
-					 "protocol filtering not compatible"
-					 " with MPLS layer");
 			item_flags |= MLX5_FLOW_LAYER_MPLS;
 			break;
 		default:
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index 886f60e6..97092c74 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -15,6 +15,16 @@
 
 #include "mlx5_defs.h"
 
+/*
+ * Compilation workaround for PPC64 when AltiVec is fully enabled, e.g. std=c11.
+ * Otherwise there would be a type conflict between stdbool and altivec.
+ */
+#if defined(__PPC64__) && !defined(__APPLE_ALTIVEC__)
+#undef bool
+/* redefine as in stdbool.h */
+#define bool _Bool
+#endif
+
 /* Bit-field manipulation. */
 #define BITFIELD_DECLARE(bf, type, size) \
 	type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \
diff --git a/drivers/net/octeontx/base/octeontx_pki_var.h b/drivers/net/octeontx/base/octeontx_pki_var.h
index c793b655..f4661d24 100644
--- a/drivers/net/octeontx/base/octeontx_pki_var.h
+++ b/drivers/net/octeontx/base/octeontx_pki_var.h
@@ -7,8 +7,17 @@
 
 #include <rte_byteorder.h>
 
-#define OCTTX_PACKET_WQE_SKIP		128
-#define OCTTX_PACKET_FIRST_SKIP		240
+#define OCTTX_PACKET_WQE_SKIP			128
+#define OCTTX_PACKET_FIRST_SKIP_MAXREGVAL	496
+#define OCTTX_PACKET_FIRST_SKIP_MAXLEN		512
+#define OCTTX_PACKET_FIRST_SKIP_ADJUST(x)				\
+		(RTE_MIN(x, OCTTX_PACKET_FIRST_SKIP_MAXREGVAL))
+#define OCTTX_PACKET_FIRST_SKIP_SUM(p)					\
+				(OCTTX_PACKET_WQE_SKIP			\
+				+ rte_pktmbuf_priv_size(p)		\
+				+ RTE_PKTMBUF_HEADROOM)
+#define OCTTX_PACKET_FIRST_SKIP(p)					\
+	OCTTX_PACKET_FIRST_SKIP_ADJUST(OCTTX_PACKET_FIRST_SKIP_SUM(p))
 #define OCTTX_PACKET_LATER_SKIP		128
 
 /* WQE descriptor */
diff --git a/drivers/net/octeontx/octeontx_ethdev.c b/drivers/net/octeontx/octeontx_ethdev.c
index 06814862..a3063be4 100644
--- a/drivers/net/octeontx/octeontx_ethdev.c
+++ b/drivers/net/octeontx/octeontx_ethdev.c
@@ -844,10 +844,11 @@ octeontx_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx,
 		pktbuf_conf.mmask.f_cache_mode = 1;
 
 		pktbuf_conf.wqe_skip = OCTTX_PACKET_WQE_SKIP;
-		pktbuf_conf.first_skip = OCTTX_PACKET_FIRST_SKIP;
+		pktbuf_conf.first_skip = OCTTX_PACKET_FIRST_SKIP(mb_pool);
 		pktbuf_conf.later_skip = OCTTX_PACKET_LATER_SKIP;
 		pktbuf_conf.mbuff_size = (mb_pool->elt_size -
 					RTE_PKTMBUF_HEADROOM -
+					rte_pktmbuf_priv_size(mb_pool) -
 					sizeof(struct rte_mbuf));
 
 		pktbuf_conf.cache_mode = PKI_OPC_MODE_STF2_STT;
diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index 8a4772f4..0e33be1a 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -235,12 +235,13 @@ static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq)
 void qede_rx_queue_release(void *rx_queue)
 {
 	struct qede_rx_queue *rxq = rx_queue;
-	struct qede_dev *qdev = rxq->qdev;
-	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-
-	PMD_INIT_FUNC_TRACE(edev);
+	struct qede_dev *qdev;
+	struct ecore_dev *edev;
 
 	if (rxq) {
+		qdev = rxq->qdev;
+		edev = QEDE_INIT_EDEV(qdev);
+		PMD_INIT_FUNC_TRACE(edev);
 		qede_rx_queue_release_mbufs(rxq);
 		qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring);
 		qdev->ops->common->chain_free(edev, &rxq->rx_comp_ring);
@@ -399,12 +400,13 @@ static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq)
 void qede_tx_queue_release(void *tx_queue)
 {
 	struct qede_tx_queue *txq = tx_queue;
-	struct qede_dev *qdev = txq->qdev;
-	struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-
-	PMD_INIT_FUNC_TRACE(edev);
+	struct qede_dev *qdev;
+	struct ecore_dev *edev;
 
 	if (txq) {
+		qdev = txq->qdev;
+		edev = QEDE_INIT_EDEV(qdev);
+		PMD_INIT_FUNC_TRACE(edev);
 		qede_tx_queue_release_mbufs(txq);
 		qdev->ops->common->chain_free(edev, &txq->tx_pbl);
 		rte_free(txq->sw_tx_ring);
@@ -1759,6 +1761,18 @@ qede_xmit_prep_pkts(__rte_unused void *p_txq, struct rte_mbuf **tx_pkts,
 			}
 		}
 		if (ol_flags & QEDE_TX_OFFLOAD_NOTSUP_MASK) {
+			/* We support only limited tunnel protocols */
+			if (ol_flags & PKT_TX_TUNNEL_MASK) {
+				uint64_t temp;
+
+				temp = ol_flags & PKT_TX_TUNNEL_MASK;
+				if (temp == PKT_TX_TUNNEL_VXLAN ||
+				    temp == PKT_TX_TUNNEL_GENEVE ||
+				    temp == PKT_TX_TUNNEL_MPLSINUDP ||
+				    temp == PKT_TX_TUNNEL_GRE)
+					break;
+			}
+
 			rte_errno = -ENOTSUP;
 			break;
 		}
diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h
index d3a41e92..0afadd8d 100644
--- a/drivers/net/qede/qede_rxtx.h
+++ b/drivers/net/qede/qede_rxtx.h
@@ -153,10 +153,7 @@
 
 #define QEDE_TX_OFFLOAD_MASK (QEDE_TX_CSUM_OFFLOAD_MASK | \
 			      PKT_TX_VLAN_PKT		| \
-			      PKT_TX_TUNNEL_VXLAN	| \
-			      PKT_TX_TUNNEL_GENEVE	| \
-			      PKT_TX_TUNNEL_MPLSINUDP   | \
-			      PKT_TX_TUNNEL_GRE)
+			      PKT_TX_TUNNEL_MASK)
 
 #define QEDE_TX_OFFLOAD_NOTSUP_MASK \
 	(PKT_TX_OFFLOAD_MASK ^ QEDE_TX_OFFLOAD_MASK)
diff --git a/drivers/net/sfc/base/efx.h b/drivers/net/sfc/base/efx.h
index 8e10e893..2e847b6c 100644
--- a/drivers/net/sfc/base/efx.h
+++ b/drivers/net/sfc/base/efx.h
@@ -2878,6 +2878,8 @@ typedef struct efx_filter_spec_s {
 	efx_filter_flags_t		efs_flags;
 	uint16_t			efs_dmaq_id;
 	uint32_t			efs_rss_context;
+	uint32_t			efs_mark;
+	/* Fields below here are hashed for software filter lookup */
 	uint16_t			efs_outer_vid;
 	uint16_t			efs_inner_vid;
 	uint8_t				efs_loc_mac[EFX_MAC_ADDR_LEN];
@@ -2891,7 +2893,6 @@ typedef struct efx_filter_spec_s {
 	efx_oword_t			efs_loc_host;
 	uint8_t				efs_vni_or_vsid[EFX_VNI_OR_VSID_LEN];
 	uint8_t				efs_ifrm_loc_mac[EFX_MAC_ADDR_LEN];
-	uint32_t			efs_mark;
 } efx_filter_spec_t;
 
 
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index e7817e89..49afd38d 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -248,7 +248,7 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive)
 	return fd;
 
 error:
-	if (fd > 0)
+	if (fd >= 0)
 		close(fd);
 	return -1;
 }
@@ -1848,6 +1848,7 @@ disable_rte_flow:
 		TAP_LOG(ERR, "Remote feature requires flow support.");
 		goto error_exit;
 	}
+	rte_eth_dev_probing_finish(dev);
 	return 0;
 
 error_remote:
diff --git a/drivers/net/tap/tap_netlink.c b/drivers/net/tap/tap_netlink.c
index 6cb51009..14bbbec7 100644
--- a/drivers/net/tap/tap_netlink.c
+++ b/drivers/net/tap/tap_netlink.c
@@ -51,14 +51,17 @@ tap_nl_init(uint32_t nl_groups)
 	}
 	if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) {
 		TAP_LOG(ERR, "Unable to set socket buffer send size");
+		close(fd);
 		return -1;
 	}
 	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) {
 		TAP_LOG(ERR, "Unable to set socket buffer receive size");
+		close(fd);
 		return -1;
 	}
 	if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
 		TAP_LOG(ERR, "Unable to bind to the netlink socket");
+		close(fd);
 		return -1;
 	}
 	return fd;
diff --git a/drivers/net/thunderx/nicvf_rxtx.c b/drivers/net/thunderx/nicvf_rxtx.c
index 247c3568..1c428743 100644
--- a/drivers/net/thunderx/nicvf_rxtx.c
+++ b/drivers/net/thunderx/nicvf_rxtx.c
@@ -61,6 +61,14 @@ fill_sq_desc_header(union sq_entry_t *entry, struct rte_mbuf *pkt)
 	entry->buff[0] = sqe.buff[0];
 }
 
+static inline void __hot
+fill_sq_desc_header_zero_w1(union sq_entry_t *entry,
+				struct rte_mbuf *pkt)
+{
+	fill_sq_desc_header(entry, pkt);
+	entry->buff[1] = 0ULL;
+}
+
 void __hot
 nicvf_single_pool_free_xmited_buffers(struct nicvf_txq *sq)
 {
@@ -204,7 +212,7 @@ nicvf_xmit_pkts_multiseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 		used_bufs += nb_segs;
 
 		txbuffs[tail] = NULL;
-		fill_sq_desc_header(desc_ptr + tail, pkt);
+		fill_sq_desc_header_zero_w1(desc_ptr + tail, pkt);
 		tail = (tail + 1) & qlen_mask;
 
 		txbuffs[tail] = pkt;
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 21110cd6..c8883c32 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -614,9 +614,15 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
 			hw->common_cfg = get_cfg_addr(dev, &cap);
 			break;
 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
-			rte_pci_read_config(dev, &hw->notify_off_multiplier,
+			ret = rte_pci_read_config(dev,
+					&hw->notify_off_multiplier,
 					4, pos + sizeof(cap));
-			hw->notify_base = get_cfg_addr(dev, &cap);
+			if (ret != 4)
+				PMD_INIT_LOG(DEBUG,
+					"failed to read notify_off_multiplier, ret %d",
+					ret);
+			else
+				hw->notify_base = get_cfg_addr(dev, &cap);
 			break;
 		case VIRTIO_PCI_CAP_DEVICE_CFG:
 			hw->dev_cfg = get_cfg_addr(dev, &cap);
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 61b7c0a3..f8791391 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -484,7 +484,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
 		}
 	} else {
 		PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user",
-			  VIRTIO_USER_ARG_QUEUE_SIZE);
+			     VIRTIO_USER_ARG_PATH);
 		goto end;
 	}
 
diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index 84acd9db..93e5de9a 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -318,6 +318,9 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
 		     hw->perm_addr[0], hw->perm_addr[1], hw->perm_addr[2],
 		     hw->perm_addr[3], hw->perm_addr[4], hw->perm_addr[5]);
 
+	/* Flag to call rte_eth_dev_release_port() in rte_eth_dev_close(). */
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
+
 	/* Put device in Quiesce Mode */
 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV);
 
@@ -876,12 +879,6 @@ vmxnet3_dev_close(struct rte_eth_dev *dev)
 
 	vmxnet3_dev_stop(dev);
 	vmxnet3_free_queues(dev);
-
-	/*
-	 * flag to rte_eth_dev_close() that it should release the port resources
-	 * (calling rte_eth_dev_release_port()) in addition to closing it.
-	 */
-	dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
 }
 
 static void
diff --git a/examples/fips_validation/fips_validation_tdes.c b/examples/fips_validation/fips_validation_tdes.c
index 5064ff3b..15ee434e 100644
--- a/examples/fips_validation/fips_validation_tdes.c
+++ b/examples/fips_validation/fips_validation_tdes.c
@@ -202,7 +202,7 @@ parse_test_tdes_writeback(struct fips_val *val)
 static int
 writeback_tdes_hex_str(const char *key, char *dst, struct fips_val *val)
 {
-	struct fips_val tmp_val;
+	struct fips_val tmp_val = {0};
 
 	tmp_val.len = 8;
 
diff --git a/examples/fips_validation/main.c b/examples/fips_validation/main.c
index 85f54cbf..e7559c63 100644
--- a/examples/fips_validation/main.c
+++ b/examples/fips_validation/main.c
@@ -887,9 +887,9 @@ fips_mct_tdes_test(void)
 #define TDES_EXTERN_ITER	400
 #define TDES_INTERN_ITER	10000
 	struct fips_val val, val_key;
-	uint8_t prev_out[TDES_BLOCK_SIZE];
-	uint8_t prev_prev_out[TDES_BLOCK_SIZE];
-	uint8_t prev_in[TDES_BLOCK_SIZE];
+	uint8_t prev_out[TDES_BLOCK_SIZE] = {0};
+	uint8_t prev_prev_out[TDES_BLOCK_SIZE] = {0};
+	uint8_t prev_in[TDES_BLOCK_SIZE] = {0};
 	uint32_t i, j, k;
 	int ret;
 
diff --git a/examples/flow_filtering/main.c b/examples/flow_filtering/main.c
index 27e287ae..a582ac07 100644
--- a/examples/flow_filtering/main.c
+++ b/examples/flow_filtering/main.c
@@ -137,7 +137,7 @@ init_port(void)
 	struct rte_eth_dev_info dev_info;
 
 	rte_eth_dev_info_get(port_id, &dev_info);
-	port_conf.txmode.offloads &= dev_info.rx_offload_capa;
+	port_conf.txmode.offloads &= dev_info.tx_offload_capa;
 	printf(":: initializing port: %d\n", port_id);
 	ret = rte_eth_dev_configure(port_id,
 				nr_queues, nr_queues, &port_conf);
diff --git a/examples/ip_pipeline/cli.c b/examples/ip_pipeline/cli.c
index 3de62068..91038628 100644
--- a/examples/ip_pipeline/cli.c
+++ b/examples/ip_pipeline/cli.c
@@ -6841,20 +6841,26 @@ cli_rule_file_process(const char *file_name,
 	return 0;
 
 cli_rule_file_process_free:
-	*rule_list = NULL;
-	*n_rules = rule_id;
-	*line_number = line_id;
+	if (rule_list != NULL)
+		*rule_list = NULL;
 
-	for ( ; ; ) {
-		struct table_rule *rule;
+	if (n_rules != NULL)
+		*n_rules = rule_id;
 
-		rule = TAILQ_FIRST(list);
-		if (rule == NULL)
-			break;
+	if (line_number != NULL)
+		*line_number = line_id;
 
-		TAILQ_REMOVE(list, rule, node);
-		free(rule);
-	}
+	if (list != NULL)
+		for ( ; ; ) {
+			struct table_rule *rule;
+
+			rule = TAILQ_FIRST(list);
+			if (rule == NULL)
+				break;
+
+			TAILQ_REMOVE(list, rule, node);
+			free(rule);
+		}
 
 	if (f)
 		fclose(f);
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index 0b3f8fe6..9c7b3156 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -1957,7 +1957,7 @@ main(int argc, char **argv)
 		rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
 
 	if (init_power_library())
-		rte_exit(EXIT_FAILURE, "init_power_library failed\n");
+		RTE_LOG(ERR, L3FWD_POWER, "init_power_library failed\n");
 
 	if (update_lcore_params() < 0)
 		rte_exit(EXIT_FAILURE, "update_lcore_params failed\n");
diff --git a/lib/librte_bpf/bpf_jit_x86.c b/lib/librte_bpf/bpf_jit_x86.c
index 68ea389f..f70cd6be 100644
--- a/lib/librte_bpf/bpf_jit_x86.c
+++ b/lib/librte_bpf/bpf_jit_x86.c
@@ -209,6 +209,19 @@ emit_sib(struct bpf_jit_state *st, uint32_t scale, uint32_t idx, uint32_t base)
 }
 
 /*
+ * emit OPCODE+REGIDX byte
+ */
+static void
+emit_opcode(struct bpf_jit_state *st, uint8_t ops, uint32_t reg)
+{
+	uint8_t v;
+
+	v = ops | (reg & 7);
+	emit_bytes(st, &v, sizeof(v));
+}
+
+
+/*
  * emit xchg %<sreg>, %<dreg>
  */
 static void
@@ -472,19 +485,18 @@ static void
 emit_ld_imm64(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm0,
 	uint32_t imm1)
 {
+	uint32_t op;
+
 	const uint8_t ops = 0xB8;
 
-	if (imm1 == 0) {
-		emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, dreg, imm0);
-		return;
-	}
+	op = (imm1 == 0) ? BPF_ALU : EBPF_ALU64;
 
-	emit_rex(st, EBPF_ALU64, 0, dreg);
-	emit_bytes(st, &ops, sizeof(ops));
-	emit_modregrm(st, MOD_DIRECT, 0, dreg);
+	emit_rex(st, op, 0, dreg);
+	emit_opcode(st, ops, dreg);
 
 	emit_imm(st, imm0, sizeof(imm0));
-	emit_imm(st, imm1, sizeof(imm1));
+	if (imm1 != 0)
+		emit_imm(st, imm1, sizeof(imm1));
 }
 
 /*
diff --git a/lib/librte_eal/common/arch/x86/rte_memcpy.c b/lib/librte_eal/common/arch/x86/rte_memcpy.c
deleted file mode 100644
index 648c8f68..00000000
--- a/lib/librte_eal/common/arch/x86/rte_memcpy.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2017 Intel Corporation
- */
-
-#include <rte_memcpy.h>
-#include <rte_cpuflags.h>
-#include <rte_log.h>
-
-void *(*rte_memcpy_ptr)(void *dst, const void *src, size_t n) = NULL;
-
-RTE_INIT(rte_memcpy_init)
-{
-#ifdef CC_SUPPORT_AVX512F
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) {
-		rte_memcpy_ptr = rte_memcpy_avx512f;
-		RTE_LOG(DEBUG, EAL, "AVX512 memcpy is using!\n");
-		return;
-	}
-#endif
-#ifdef CC_SUPPORT_AVX2
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
-		rte_memcpy_ptr = rte_memcpy_avx2;
-		RTE_LOG(DEBUG, EAL, "AVX2 memcpy is using!\n");
-		return;
-	}
-#endif
-	rte_memcpy_ptr = rte_memcpy_sse;
-	RTE_LOG(DEBUG, EAL, "Default SSE/AVX memcpy is using!\n");
-}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 5759ec2d..1fdc9ab1 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -150,10 +150,11 @@ local_dev_probe(const char *devargs, struct rte_device **new_dev)
 		goto err_devarg;
 	}
 
-	ret = rte_devargs_insert(da);
+	ret = rte_devargs_insert(&da);
 	if (ret)
 		goto err_devarg;
 
+	/* the rte_devargs will be referenced in the matching rte_device */
 	ret = da->bus->scan();
 	if (ret)
 		goto err_devarg;
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index b7b9cb69..1ccf12dc 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -263,14 +263,38 @@ rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
 }
 
 int __rte_experimental
-rte_devargs_insert(struct rte_devargs *da)
+rte_devargs_insert(struct rte_devargs **da)
 {
-	int ret;
+	struct rte_devargs *listed_da;
+	void *tmp;
+
+	if (*da == NULL || (*da)->bus == NULL)
+		return -1;
 
-	ret = rte_devargs_remove(da);
-	if (ret < 0)
-		return ret;
-	TAILQ_INSERT_TAIL(&devargs_list, da, next);
+	TAILQ_FOREACH_SAFE(listed_da, &devargs_list, next, tmp) {
+		if (listed_da == *da)
+			/* devargs already in the list */
+			return 0;
+		if (strcmp(listed_da->bus->name, (*da)->bus->name) == 0 &&
+				strcmp(listed_da->name, (*da)->name) == 0) {
+			/* device already in devargs list, must be updated */
+			listed_da->type = (*da)->type;
+			listed_da->policy = (*da)->policy;
+			free(listed_da->args);
+			listed_da->args = (*da)->args;
+			listed_da->bus = (*da)->bus;
+			listed_da->cls = (*da)->cls;
+			listed_da->bus_str = (*da)->bus_str;
+			listed_da->cls_str = (*da)->cls_str;
+			listed_da->data = (*da)->data;
+			/* replace provided devargs with found one */
+			free(*da);
+			*da = listed_da;
+			return 0;
+		}
+	}
+	/* new device in the list */
+	TAILQ_INSERT_TAIL(&devargs_list, *da, next);
 	return 0;
 }
 
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 87fd9921..d47ea493 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -439,11 +439,7 @@ check_iova(const struct rte_memseg_list *msl __rte_unused,
 	return 1;
 }
 
-#if defined(RTE_ARCH_64)
 #define MAX_DMA_MASK_BITS 63
-#else
-#define MAX_DMA_MASK_BITS 31
-#endif
 
 /* check memseg iovas are within the required range based on dma mask */
 static int __rte_experimental
@@ -453,7 +449,8 @@ check_dma_mask(uint8_t maskbits, bool thread_unsafe)
 	uint64_t mask;
 	int ret;
 
-	/* sanity check */
+	/* Sanity check. We only check width can be managed with 64 bits
+	 * variables. Indeed any higher value is likely wrong. */
 	if (maskbits > MAX_DMA_MASK_BITS) {
 		RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n",
 				   maskbits, MAX_DMA_MASK_BITS);
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 97663d3b..f65ef56c 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -800,7 +800,7 @@ mp_request_async(const char *dst, struct rte_mp_msg *req,
 {
 	struct rte_mp_msg *reply_msg;
 	struct pending_request *pending_req, *exist;
-	int ret;
+	int ret = -1;
 
 	pending_req = calloc(1, sizeof(*pending_req));
 	reply_msg = calloc(1, sizeof(*reply_msg));
@@ -827,6 +827,28 @@ mp_request_async(const char *dst, struct rte_mp_msg *req,
 		goto fail;
 	}
 
+	/*
+	 * set the alarm before sending message. there are two possible error
+	 * scenarios to consider here:
+	 *
+	 * - if the alarm set fails, we free the memory right there
+	 * - if the alarm set succeeds but sending message fails, then the alarm
+	 *   will trigger and clean up the memory
+	 *
+	 * Even if the alarm triggers too early (i.e. immediately), we're still
+	 * holding the lock to pending requests queue, so the interrupt thread
+	 * will just spin until we release the lock, and either release the
+	 * memory, or doesn't find any pending requests in the queue because we
+	 * never added any due to send message failure.
+	 */
+	if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
+			      async_reply_handle, pending_req) < 0) {
+		RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
+			dst, req->name);
+		ret = -1;
+		goto fail;
+	}
+
 	ret = send_msg(dst, req, MP_REQ);
 	if (ret < 0) {
 		RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
@@ -841,13 +863,6 @@ mp_request_async(const char *dst, struct rte_mp_msg *req,
 
 	param->user_reply.nb_sent++;
 
-	if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
-			      async_reply_handle, pending_req) < 0) {
-		RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
-			dst, req->name);
-		rte_panic("Fix the above shit to properly free all memory\n");
-	}
-
 	return 0;
 fail:
 	free(pending_req);
diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
index ab099952..eb0f8e81 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_rtm.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
@@ -1,21 +1,10 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2012,2013 Intel Corporation
+ */
+
 #ifndef _RTE_RTM_H_
 #define _RTE_RTM_H_ 1
 
-/*
- * Copyright (c) 2012,2013 Intel Corporation
- * Author: Andi Kleen
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that: (1) source code distributions
- * retain the above copyright notice and this paragraph in its entirety, (2)
- * distributions including binary code include the above copyright notice and
- * this paragraph in its entirety in the documentation or other materials
- * provided with the distribution
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- */
 
 /* Official RTM intrinsics interface matching gcc/icc, but works
    on older gcc compatible compilers and binutils. */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
index 60321da0..e2e2b264 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
@@ -15,8 +15,9 @@ extern "C" {
 #include "rte_branch_prediction.h"
 #include "rte_common.h"
 #include "rte_pause.h"
+#include "rte_cycles.h"
 
-#define RTE_RTM_MAX_RETRIES (10)
+#define RTE_RTM_MAX_RETRIES (20)
 #define RTE_XABORT_LOCK_BUSY (0xff)
 
 #ifndef RTE_FORCE_INTRINSICS
@@ -76,7 +77,7 @@ static inline int rte_tm_supported(void)
 static inline int
 rte_try_tm(volatile int *lock)
 {
-	int retries;
+	int i, retries;
 
 	if (!rte_rtm_supported)
 		return 0;
@@ -96,9 +97,21 @@ rte_try_tm(volatile int *lock)
 		while (*lock)
 			rte_pause();
 
-		if ((status & RTE_XABORT_EXPLICIT) &&
-			(RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY))
+		if ((status & RTE_XABORT_CONFLICT) ||
+		   ((status & RTE_XABORT_EXPLICIT) &&
+		    (RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY))) {
+			/* add a small delay before retrying, basing the
+			 * delay on the number of times we've already tried,
+			 * to give a back-off type of behaviour. We
+			 * randomize trycount by taking bits from the tsc count
+			 */
+			int try_count = RTE_RTM_MAX_RETRIES - retries;
+			int pause_count = (rte_rdtsc() & 0x7) | 1;
+			pause_count <<= try_count;
+			for (i = 0; i < pause_count; i++)
+				rte_pause();
 			continue;
+		}
 
 		if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */
 			break;
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index cba7bbc1..87f0f630 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -473,6 +473,25 @@ rte_log2_u32(uint32_t v)
 	return rte_bsf32(v);
 }
 
+
+/**
+ * Return the last (most-significant) bit set.
+ *
+ * @note The last (most significant) bit is at position 32.
+ * @note rte_fls_u32(0) = 0, rte_fls_u32(1) = 1, rte_fls_u32(0x80000000) = 32
+ *
+ * @param x
+ *     The input parameter.
+ * @return
+ *     The last (most-significant) bit set, or 0 if the input is 0.
+ */
+static inline int
+rte_fls_u32(uint32_t x)
+{
+	return (x == 0) ? 0 : 32 - __builtin_clz(x);
+}
+
+
 #ifndef offsetof
 /** Return the offset of a field in a structure. */
 #define offsetof(TYPE, MEMBER)  __builtin_offsetof (TYPE, MEMBER)
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index b1f121f8..29b3fb7c 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -146,6 +146,8 @@ __attribute__((format(printf, 2, 0)));
  *
  * @param da
  *  The devargs structure to insert.
+ *  If a devargs for the same device is already inserted,
+ *  it will be updated and returned. It means *da pointer can change.
  *
  * @return
  *   - 0 on success
@@ -153,7 +155,7 @@ __attribute__((format(printf, 2, 0)));
  */
 __rte_experimental
 int
-rte_devargs_insert(struct rte_devargs *da);
+rte_devargs_insert(struct rte_devargs **da);
 
 /**
  * Add a device to the user device list
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 80c516d3..fc26e97a 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -49,7 +49,7 @@ extern "C" {
  *   0-15 = release candidates
  *   16   = release
  */
-#define RTE_VER_RELEASE 2
+#define RTE_VER_RELEASE 3
 
 /**
  * Macro to compute a version number usable for comparisons
diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c
index d81b11db..f017d0c2 100644
--- a/lib/librte_eal/common/rte_reciprocal.c
+++ b/lib/librte_eal/common/rte_reciprocal.c
@@ -41,28 +41,13 @@
 
 #include "rte_reciprocal.h"
 
-/* find largest set bit.
- * portable and slow but does not matter for this usage.
- */
-static inline int fls(uint32_t x)
-{
-	int b;
-
-	for (b = 31; b >= 0; --b) {
-		if (x & (1u << b))
-			return b + 1;
-	}
-
-	return 0;
-}
-
 struct rte_reciprocal rte_reciprocal_value(uint32_t d)
 {
 	struct rte_reciprocal R;
 	uint64_t m;
 	int l;
 
-	l = fls(d - 1);
+	l = rte_fls_u32(d - 1);
 	m = ((1ULL << 32) * ((1ULL << l) - d));
 	m /= d;
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
index 391d2a65..840ede78 100644
--- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -30,7 +30,9 @@
 #define NS_PER_US 1000
 #define US_PER_MS 1000
 #define MS_PER_S 1000
+#ifndef US_PER_S
 #define US_PER_S (US_PER_MS * MS_PER_S)
+#endif
 
 #ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
 #define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index c1b5e079..48b23ce1 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -1617,6 +1617,7 @@ eal_legacy_hugepage_init(void)
 	tmp_hp = NULL;
 
 	munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
+	hugepage = NULL;
 
 	/* we're not going to allocate more pages, so release VA space for
 	 * unused memseg lists
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 8eaa5fcc..5f858174 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -1092,8 +1092,9 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 {
 	struct rte_eth_dev *dev;
 	struct rte_eth_dev_info dev_info;
-	struct rte_eth_conf local_conf = *dev_conf;
+	struct rte_eth_conf orig_conf;
 	int diag;
+	int ret;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
@@ -1102,6 +1103,22 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP);
 
+	if (dev->data->dev_started) {
+		RTE_ETHDEV_LOG(ERR,
+			"Port %u must be stopped to allow configuration\n",
+			port_id);
+		return -EBUSY;
+	}
+
+	 /* Store original config, as rollback required on failure */
+	memcpy(&orig_conf, &dev->data->dev_conf, sizeof(dev->data->dev_conf));
+
+	/*
+	 * Copy the dev_conf parameter into the dev structure.
+	 * rte_eth_dev_info_get() requires dev_conf, copy it before dev_info get
+	 */
+	memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf));
+
 	rte_eth_dev_info_get(port_id, &dev_info);
 
 	/* If number of queues specified by application for both Rx and Tx is
@@ -1123,26 +1140,18 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		RTE_ETHDEV_LOG(ERR,
 			"Number of RX queues requested (%u) is greater than max supported(%d)\n",
 			nb_rx_q, RTE_MAX_QUEUES_PER_PORT);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto rollback;
 	}
 
 	if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) {
 		RTE_ETHDEV_LOG(ERR,
 			"Number of TX queues requested (%u) is greater than max supported(%d)\n",
 			nb_tx_q, RTE_MAX_QUEUES_PER_PORT);
-		return -EINVAL;
-	}
-
-	if (dev->data->dev_started) {
-		RTE_ETHDEV_LOG(ERR,
-			"Port %u must be stopped to allow configuration\n",
-			port_id);
-		return -EBUSY;
+		ret = -EINVAL;
+		goto rollback;
 	}
 
-	/* Copy the dev_conf parameter into the dev structure */
-	memcpy(&dev->data->dev_conf, &local_conf, sizeof(dev->data->dev_conf));
-
 	/*
 	 * Check that the numbers of RX and TX queues are not greater
 	 * than the maximum number of RX and TX queues supported by the
@@ -1151,13 +1160,15 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	if (nb_rx_q > dev_info.max_rx_queues) {
 		RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_rx_queues=%u > %u\n",
 			port_id, nb_rx_q, dev_info.max_rx_queues);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto rollback;
 	}
 
 	if (nb_tx_q > dev_info.max_tx_queues) {
 		RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_tx_queues=%u > %u\n",
 			port_id, nb_tx_q, dev_info.max_tx_queues);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto rollback;
 	}
 
 	/* Check that the device supports requested interrupts */
@@ -1165,32 +1176,36 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 			(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
 		RTE_ETHDEV_LOG(ERR, "Driver %s does not support lsc\n",
 			dev->device->driver->name);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto rollback;
 	}
 	if ((dev_conf->intr_conf.rmv == 1) &&
 			(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) {
 		RTE_ETHDEV_LOG(ERR, "Driver %s does not support rmv\n",
 			dev->device->driver->name);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto rollback;
 	}
 
 	/*
 	 * If jumbo frames are enabled, check that the maximum RX packet
 	 * length is supported by the configured device.
 	 */
-	if (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
+	if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
 		if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) {
 			RTE_ETHDEV_LOG(ERR,
 				"Ethdev port_id=%u max_rx_pkt_len %u > max valid value %u\n",
 				port_id, dev_conf->rxmode.max_rx_pkt_len,
 				dev_info.max_rx_pktlen);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto rollback;
 		} else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) {
 			RTE_ETHDEV_LOG(ERR,
 				"Ethdev port_id=%u max_rx_pkt_len %u < min valid value %u\n",
 				port_id, dev_conf->rxmode.max_rx_pkt_len,
 				(unsigned)ETHER_MIN_LEN);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto rollback;
 		}
 	} else {
 		if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN ||
@@ -1201,25 +1216,27 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	}
 
 	/* Any requested offloading must be within its device capabilities */
-	if ((local_conf.rxmode.offloads & dev_info.rx_offload_capa) !=
-	     local_conf.rxmode.offloads) {
+	if ((dev_conf->rxmode.offloads & dev_info.rx_offload_capa) !=
+	     dev_conf->rxmode.offloads) {
 		RTE_ETHDEV_LOG(ERR,
 			"Ethdev port_id=%u requested Rx offloads 0x%"PRIx64" doesn't match Rx offloads "
 			"capabilities 0x%"PRIx64" in %s()\n",
-			port_id, local_conf.rxmode.offloads,
+			port_id, dev_conf->rxmode.offloads,
 			dev_info.rx_offload_capa,
 			__func__);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto rollback;
 	}
-	if ((local_conf.txmode.offloads & dev_info.tx_offload_capa) !=
-	     local_conf.txmode.offloads) {
+	if ((dev_conf->txmode.offloads & dev_info.tx_offload_capa) !=
+	     dev_conf->txmode.offloads) {
 		RTE_ETHDEV_LOG(ERR,
 			"Ethdev port_id=%u requested Tx offloads 0x%"PRIx64" doesn't match Tx offloads "
 			"capabilities 0x%"PRIx64" in %s()\n",
-			port_id, local_conf.txmode.offloads,
+			port_id, dev_conf->txmode.offloads,
 			dev_info.tx_offload_capa,
 			__func__);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto rollback;
 	}
 
 	/* Check that device supports requested rss hash functions. */
@@ -1230,7 +1247,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 			"Ethdev port_id=%u invalid rss_hf: 0x%"PRIx64", valid value: 0x%"PRIx64"\n",
 			port_id, dev_conf->rx_adv_conf.rss_conf.rss_hf,
 			dev_info.flow_type_rss_offloads);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto rollback;
 	}
 
 	/*
@@ -1241,7 +1259,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		RTE_ETHDEV_LOG(ERR,
 			"Port%u rte_eth_dev_rx_queue_config = %d\n",
 			port_id, diag);
-		return diag;
+		ret = diag;
+		goto rollback;
 	}
 
 	diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q);
@@ -1250,7 +1269,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 			"Port%u rte_eth_dev_tx_queue_config = %d\n",
 			port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
-		return diag;
+		ret = diag;
+		goto rollback;
 	}
 
 	diag = (*dev->dev_ops->dev_configure)(dev);
@@ -1259,7 +1279,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 			port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
 		rte_eth_dev_tx_queue_config(dev, 0);
-		return eth_err(port_id, diag);
+		ret = eth_err(port_id, diag);
+		goto rollback;
 	}
 
 	/* Initialize Rx profiling if enabled at compilation time. */
@@ -1269,10 +1290,16 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 			port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
 		rte_eth_dev_tx_queue_config(dev, 0);
-		return eth_err(port_id, diag);
+		ret = eth_err(port_id, diag);
+		goto rollback;
 	}
 
 	return 0;
+
+rollback:
+	memcpy(&dev->data->dev_conf, &orig_conf, sizeof(dev->data->dev_conf));
+
+	return ret;
 }
 
 void
diff --git a/lib/librte_hash/rte_cmp_x86.h b/lib/librte_hash/rte_cmp_x86.h
index e82b4c08..13a58363 100644
--- a/lib/librte_hash/rte_cmp_x86.h
+++ b/lib/librte_hash/rte_cmp_x86.h
@@ -2,6 +2,8 @@
  * Copyright(c) 2015 Intel Corporation
  */
 
+#include <rte_vect.h>
+
 /* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
 static int
 rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused)
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index 5ddcccd8..c55a4f26 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -13,7 +13,6 @@
 #include <rte_common.h>
 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
 #include <rte_log.h>
-#include <rte_memcpy.h>
 #include <rte_prefetch.h>
 #include <rte_branch_prediction.h>
 #include <rte_malloc.h>
@@ -982,7 +981,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
 	new_idx = (uint32_t)((uintptr_t) slot_id);
 	/* Copy key */
-	rte_memcpy(new_k->key, key, h->key_len);
+	memcpy(new_k->key, key, h->key_len);
 	/* Key can be of arbitrary length, so it is not possible to store
 	 * it atomically. Hence the new key element's memory stores
 	 * (key as well as data) should be complete before it is referenced.
@@ -1129,9 +1128,38 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
 		return ret;
 }
 
+/* Search one bucket to find the match key - uses rw lock */
+static inline int32_t
+search_one_bucket_l(const struct rte_hash *h, const void *key,
+		uint16_t sig, void **data,
+		const struct rte_hash_bucket *bkt)
+{
+	int i;
+	struct rte_hash_key *k, *keys = h->key_store;
+
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		if (bkt->sig_current[i] == sig &&
+				bkt->key_idx[i] != EMPTY_SLOT) {
+			k = (struct rte_hash_key *) ((char *)keys +
+					bkt->key_idx[i] * h->key_entry_size);
+
+			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
+				if (data != NULL)
+					*data = k->pdata;
+				/*
+				 * Return index where key is stored,
+				 * subtracting the first dummy index
+				 */
+				return bkt->key_idx[i] - 1;
+			}
+		}
+	}
+	return -1;
+}
+
 /* Search one bucket to find the match key */
 static inline int32_t
-search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig,
+search_one_bucket_lf(const struct rte_hash *h, const void *key, uint16_t sig,
 			void **data, const struct rte_hash_bucket *bkt)
 {
 	int i;
@@ -1163,12 +1191,11 @@ search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig,
 }
 
 static inline int32_t
-__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
-					hash_sig_t sig, void **data)
+__rte_hash_lookup_with_hash_l(const struct rte_hash *h, const void *key,
+				hash_sig_t sig, void **data)
 {
 	uint32_t prim_bucket_idx, sec_bucket_idx;
 	struct rte_hash_bucket *bkt, *cur_bkt;
-	uint32_t cnt_b, cnt_a;
 	int ret;
 	uint16_t short_sig;
 
@@ -1176,8 +1203,48 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 	prim_bucket_idx = get_prim_bucket_index(h, sig);
 	sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
 
+	bkt = &h->buckets[prim_bucket_idx];
+
 	__hash_rw_reader_lock(h);
 
+	/* Check if key is in primary location */
+	ret = search_one_bucket_l(h, key, short_sig, data, bkt);
+	if (ret != -1) {
+		__hash_rw_reader_unlock(h);
+		return ret;
+	}
+	/* Calculate secondary hash */
+	bkt = &h->buckets[sec_bucket_idx];
+
+	/* Check if key is in secondary location */
+	FOR_EACH_BUCKET(cur_bkt, bkt) {
+		ret = search_one_bucket_l(h, key, short_sig,
+					data, cur_bkt);
+		if (ret != -1) {
+			__hash_rw_reader_unlock(h);
+			return ret;
+		}
+	}
+
+	__hash_rw_reader_unlock(h);
+
+	return -ENOENT;
+}
+
+static inline int32_t
+__rte_hash_lookup_with_hash_lf(const struct rte_hash *h, const void *key,
+					hash_sig_t sig, void **data)
+{
+	uint32_t prim_bucket_idx, sec_bucket_idx;
+	struct rte_hash_bucket *bkt, *cur_bkt;
+	uint32_t cnt_b, cnt_a;
+	int ret;
+	uint16_t short_sig;
+
+	short_sig = get_short_sig(sig);
+	prim_bucket_idx = get_prim_bucket_index(h, sig);
+	sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
+
 	do {
 		/* Load the table change counter before the lookup
 		 * starts. Acquire semantics will make sure that
@@ -1188,7 +1255,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 
 		/* Check if key is in primary location */
 		bkt = &h->buckets[prim_bucket_idx];
-		ret = search_one_bucket(h, key, short_sig, data, bkt);
+		ret = search_one_bucket_lf(h, key, short_sig, data, bkt);
 		if (ret != -1) {
 			__hash_rw_reader_unlock(h);
 			return ret;
@@ -1198,7 +1265,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 
 		/* Check if key is in secondary location */
 		FOR_EACH_BUCKET(cur_bkt, bkt) {
-			ret = search_one_bucket(h, key, short_sig,
+			ret = search_one_bucket_lf(h, key, short_sig,
 						data, cur_bkt);
 			if (ret != -1) {
 				__hash_rw_reader_unlock(h);
@@ -1222,11 +1289,19 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 					__ATOMIC_ACQUIRE);
 	} while (cnt_b != cnt_a);
 
-	__hash_rw_reader_unlock(h);
-
 	return -ENOENT;
 }
 
+static inline int32_t
+__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
+					hash_sig_t sig, void **data)
+{
+	if (h->readwrite_concur_lf_support)
+		return __rte_hash_lookup_with_hash_lf(h, key, sig, data);
+	else
+		return __rte_hash_lookup_with_hash_l(h, key, sig, data);
+}
+
 int32_t
 rte_hash_lookup_with_hash(const struct rte_hash *h,
 			const void *key, hash_sig_t sig)
@@ -1528,7 +1603,197 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
 
 #define PREFETCH_OFFSET 4
 static inline void
-__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
+__rte_hash_lookup_bulk_l(const struct rte_hash *h, const void **keys,
+			int32_t num_keys, int32_t *positions,
+			uint64_t *hit_mask, void *data[])
+{
+	uint64_t hits = 0;
+	int32_t i;
+	int32_t ret;
+	uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
+	uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
+	uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
+	uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
+	const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+	const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+	uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+	uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+	struct rte_hash_bucket *cur_bkt, *next_bkt;
+
+	/* Prefetch first keys */
+	for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
+		rte_prefetch0(keys[i]);
+
+	/*
+	 * Prefetch rest of the keys, calculate primary and
+	 * secondary bucket and prefetch them
+	 */
+	for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
+		rte_prefetch0(keys[i + PREFETCH_OFFSET]);
+
+		prim_hash[i] = rte_hash_hash(h, keys[i]);
+
+		sig[i] = get_short_sig(prim_hash[i]);
+		prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+		sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+		primary_bkt[i] = &h->buckets[prim_index[i]];
+		secondary_bkt[i] = &h->buckets[sec_index[i]];
+
+		rte_prefetch0(primary_bkt[i]);
+		rte_prefetch0(secondary_bkt[i]);
+	}
+
+	/* Calculate and prefetch rest of the buckets */
+	for (; i < num_keys; i++) {
+		prim_hash[i] = rte_hash_hash(h, keys[i]);
+
+		sig[i] = get_short_sig(prim_hash[i]);
+		prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+		sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+		primary_bkt[i] = &h->buckets[prim_index[i]];
+		secondary_bkt[i] = &h->buckets[sec_index[i]];
+
+		rte_prefetch0(primary_bkt[i]);
+		rte_prefetch0(secondary_bkt[i]);
+	}
+
+	__hash_rw_reader_lock(h);
+
+	/* Compare signatures and prefetch key slot of first hit */
+	for (i = 0; i < num_keys; i++) {
+		compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
+			primary_bkt[i], secondary_bkt[i],
+			sig[i], h->sig_cmp_fn);
+
+		if (prim_hitmask[i]) {
+			uint32_t first_hit =
+					__builtin_ctzl(prim_hitmask[i])
+					>> 1;
+			uint32_t key_idx =
+				primary_bkt[i]->key_idx[first_hit];
+			const struct rte_hash_key *key_slot =
+				(const struct rte_hash_key *)(
+				(const char *)h->key_store +
+				key_idx * h->key_entry_size);
+			rte_prefetch0(key_slot);
+			continue;
+		}
+
+		if (sec_hitmask[i]) {
+			uint32_t first_hit =
+					__builtin_ctzl(sec_hitmask[i])
+					>> 1;
+			uint32_t key_idx =
+				secondary_bkt[i]->key_idx[first_hit];
+			const struct rte_hash_key *key_slot =
+				(const struct rte_hash_key *)(
+				(const char *)h->key_store +
+				key_idx * h->key_entry_size);
+			rte_prefetch0(key_slot);
+		}
+	}
+
+	/* Compare keys, first hits in primary first */
+	for (i = 0; i < num_keys; i++) {
+		positions[i] = -ENOENT;
+		while (prim_hitmask[i]) {
+			uint32_t hit_index =
+					__builtin_ctzl(prim_hitmask[i])
+					>> 1;
+			uint32_t key_idx =
+				primary_bkt[i]->key_idx[hit_index];
+			const struct rte_hash_key *key_slot =
+				(const struct rte_hash_key *)(
+				(const char *)h->key_store +
+				key_idx * h->key_entry_size);
+
+			/*
+			 * If key index is 0, do not compare key,
+			 * as it is checking the dummy slot
+			 */
+			if (!!key_idx &
+				!rte_hash_cmp_eq(
+					key_slot->key, keys[i], h)) {
+				if (data != NULL)
+					data[i] = key_slot->pdata;
+
+				hits |= 1ULL << i;
+				positions[i] = key_idx - 1;
+				goto next_key;
+			}
+			prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
+		}
+
+		while (sec_hitmask[i]) {
+			uint32_t hit_index =
+					__builtin_ctzl(sec_hitmask[i])
+					>> 1;
+			uint32_t key_idx =
+				secondary_bkt[i]->key_idx[hit_index];
+			const struct rte_hash_key *key_slot =
+				(const struct rte_hash_key *)(
+				(const char *)h->key_store +
+				key_idx * h->key_entry_size);
+
+			/*
+			 * If key index is 0, do not compare key,
+			 * as it is checking the dummy slot
+			 */
+
+			if (!!key_idx &
+				!rte_hash_cmp_eq(
+					key_slot->key, keys[i], h)) {
+				if (data != NULL)
+					data[i] = key_slot->pdata;
+
+				hits |= 1ULL << i;
+				positions[i] = key_idx - 1;
+				goto next_key;
+			}
+			sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
+		}
+next_key:
+		continue;
+	}
+
+	/* all found, do not need to go through ext bkt */
+	if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
+		if (hit_mask != NULL)
+			*hit_mask = hits;
+		__hash_rw_reader_unlock(h);
+		return;
+	}
+
+	/* need to check ext buckets for match */
+	for (i = 0; i < num_keys; i++) {
+		if ((hits & (1ULL << i)) != 0)
+			continue;
+		next_bkt = secondary_bkt[i]->next;
+		FOR_EACH_BUCKET(cur_bkt, next_bkt) {
+			if (data != NULL)
+				ret = search_one_bucket_l(h, keys[i],
+						sig[i], &data[i], cur_bkt);
+			else
+				ret = search_one_bucket_l(h, keys[i],
+						sig[i], NULL, cur_bkt);
+			if (ret != -1) {
+				positions[i] = ret;
+				hits |= 1ULL << i;
+				break;
+			}
+		}
+	}
+
+	__hash_rw_reader_unlock(h);
+
+	if (hit_mask != NULL)
+		*hit_mask = hits;
+}
+
+static inline void
+__rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
 			int32_t num_keys, int32_t *positions,
 			uint64_t *hit_mask, void *data[])
 {
@@ -1586,7 +1851,6 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 		rte_prefetch0(secondary_bkt[i]);
 	}
 
-	__hash_rw_reader_lock(h);
 	do {
 		/* Load the table change counter before the lookup
 		 * starts. Acquire semantics will make sure that
@@ -1735,10 +1999,10 @@ next_key:
 		next_bkt = secondary_bkt[i]->next;
 		FOR_EACH_BUCKET(cur_bkt, next_bkt) {
 			if (data != NULL)
-				ret = search_one_bucket(h, keys[i],
+				ret = search_one_bucket_lf(h, keys[i],
 						sig[i], &data[i], cur_bkt);
 			else
-				ret = search_one_bucket(h, keys[i],
+				ret = search_one_bucket_lf(h, keys[i],
 						sig[i], NULL, cur_bkt);
 			if (ret != -1) {
 				positions[i] = ret;
@@ -1748,12 +2012,23 @@ next_key:
 		}
 	}
 
-	__hash_rw_reader_unlock(h);
-
 	if (hit_mask != NULL)
 		*hit_mask = hits;
 }
 
+static inline void
+__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
+			int32_t num_keys, int32_t *positions,
+			uint64_t *hit_mask, void *data[])
+{
+	if (h->readwrite_concur_lf_support)
+		return __rte_hash_lookup_bulk_lf(h, keys, num_keys,
+						positions, hit_mask, data);
+	else
+		return __rte_hash_lookup_bulk_l(h, keys, num_keys,
+						positions, hit_mask, data);
+}
+
 int
 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 		      uint32_t num_keys, int32_t *positions)
diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c
index 530738db..f400178b 100644
--- a/lib/librte_pci/rte_pci.c
+++ b/lib/librte_pci/rte_pci.c
@@ -30,6 +30,10 @@ get_u8_pciaddr_field(const char *in, void *_u8, char dlm)
 	uint8_t *u8 = _u8;
 	char *end;
 
+	/* empty string is an error though strtoul() returns 0 */
+	if (*in == '\0')
+		return NULL;
+
 	errno = 0;
 	val = strtoul(in, &end, 16);
 	if (errno != 0 || end[0] != dlm || val > UINT8_MAX) {
diff --git a/lib/librte_pipeline/rte_table_action.c b/lib/librte_pipeline/rte_table_action.c
index 537e6593..7c7c8dd8 100644
--- a/lib/librte_pipeline/rte_table_action.c
+++ b/lib/librte_pipeline/rte_table_action.c
@@ -1694,10 +1694,9 @@ get_block_size(const struct rte_crypto_sym_xform *xform, uint8_t cdev_id)
 
 	rte_cryptodev_info_get(cdev_id, &dev_info);
 
-	for (i = 0;; i++) {
+	for (i = 0; dev_info.capabilities[i].op != RTE_CRYPTO_OP_TYPE_UNDEFINED;
+			i++) {
 		cap = &dev_info.capabilities[i];
-		if (!cap)
-			break;
 
 		if (cap->sym.xform_type != xform->type)
 			continue;
diff --git a/lib/librte_ring/rte_ring_c11_mem.h b/lib/librte_ring/rte_ring_c11_mem.h
index 7bc74a4c..0fb73a33 100644
--- a/lib/librte_ring/rte_ring_c11_mem.h
+++ b/lib/librte_ring/rte_ring_c11_mem.h
@@ -61,11 +61,14 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp,
 	unsigned int max = n;
 	int success;
 
-	*old_head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE);
+	*old_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED);
 	do {
 		/* Reset n to the initial burst count */
 		n = max;
 
+		/* Ensure the head is read before tail */
+		__atomic_thread_fence(__ATOMIC_ACQUIRE);
+
 		/* load-acquire synchronize with store-release of ht->tail
 		 * in update_tail.
 		 */
@@ -94,7 +97,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp,
 			/* on failure, *old_head is updated */
 			success = __atomic_compare_exchange_n(&r->prod.head,
 					old_head, *new_head,
-					0, __ATOMIC_ACQUIRE,
+					0, __ATOMIC_RELAXED,
 					__ATOMIC_RELAXED);
 	} while (unlikely(success == 0));
 	return n;
@@ -134,11 +137,14 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
 	int success;
 
 	/* move cons.head atomically */
-	*old_head = __atomic_load_n(&r->cons.head, __ATOMIC_ACQUIRE);
+	*old_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED);
 	do {
 		/* Restore n as it may change every loop */
 		n = max;
 
+		/* Ensure the head is read before tail */
+		__atomic_thread_fence(__ATOMIC_ACQUIRE);
+
 		/* this load-acquire synchronize with store-release of ht->tail
 		 * in update_tail.
 		 */
@@ -166,7 +172,7 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
 			/* on failure, *old_head will be updated */
 			success = __atomic_compare_exchange_n(&r->cons.head,
 							old_head, *new_head,
-							0, __ATOMIC_ACQUIRE,
+							0, __ATOMIC_RELAXED,
 							__ATOMIC_RELAXED);
 	} while (unlikely(success == 0));
 	return n;
diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c
index 5472bead..dd01afc0 100644
--- a/lib/librte_vhost/vhost_crypto.c
+++ b/lib/librte_vhost/vhost_crypto.c
@@ -198,6 +198,7 @@ struct vhost_crypto {
 	struct rte_hash *session_map;
 	struct rte_mempool *mbuf_pool;
 	struct rte_mempool *sess_pool;
+	struct rte_mempool *wb_pool;
 
 	/** DPDK cryptodev ID */
 	uint8_t cid;
@@ -215,13 +216,20 @@ struct vhost_crypto {
 	uint8_t option;
 } __rte_cache_aligned;
 
+struct vhost_crypto_writeback_data {
+	uint8_t *src;
+	uint8_t *dst;
+	uint64_t len;
+	struct vhost_crypto_writeback_data *next;
+};
+
 struct vhost_crypto_data_req {
 	struct vring_desc *head;
 	struct virtio_net *dev;
 	struct virtio_crypto_inhdr *inhdr;
 	struct vhost_virtqueue *vq;
-	struct vring_desc *wb_desc;
-	uint16_t wb_len;
+	struct vhost_crypto_writeback_data *wb;
+	struct rte_mempool *wb_pool;
 	uint16_t desc_idx;
 	uint16_t len;
 	uint16_t zero_copy;
@@ -506,15 +514,29 @@ move_desc(struct vring_desc *head, struct vring_desc **cur_desc,
 		left -= desc->len;
 	}
 
-	if (unlikely(left > 0)) {
-		VC_LOG_ERR("Incorrect virtio descriptor");
+	if (unlikely(left > 0))
 		return -1;
-	}
 
 	*cur_desc = &head[desc->next];
 	return 0;
 }
 
+static __rte_always_inline void *
+get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc *cur_desc,
+		uint8_t perm)
+{
+	void *data;
+	uint64_t dlen = cur_desc->len;
+
+	data = IOVA_TO_VVA(void *, vc_req, cur_desc->addr, &dlen, perm);
+	if (unlikely(!data || dlen != cur_desc->len)) {
+		VC_LOG_ERR("Failed to map object");
+		return NULL;
+	}
+
+	return data;
+}
+
 static int
 copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
 		struct vring_desc **cur_desc, uint32_t size)
@@ -531,10 +553,8 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
 	dlen = to_copy;
 	src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
 			VHOST_ACCESS_RO);
-	if (unlikely(!src || !dlen)) {
-		VC_LOG_ERR("Failed to map descriptor");
+	if (unlikely(!src || !dlen))
 		return -1;
-	}
 
 	rte_memcpy((uint8_t *)data, src, dlen);
 	data += dlen;
@@ -609,73 +629,158 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
 	return 0;
 }
 
-static __rte_always_inline void *
-get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc **cur_desc,
-		uint32_t size, uint8_t perm)
+static void
+write_back_data(struct vhost_crypto_data_req *vc_req)
 {
-	void *data;
-	uint64_t dlen = (*cur_desc)->len;
-
-	data = IOVA_TO_VVA(void *, vc_req, (*cur_desc)->addr, &dlen, perm);
-	if (unlikely(!data || dlen != (*cur_desc)->len)) {
-		VC_LOG_ERR("Failed to map object");
-		return NULL;
+	struct vhost_crypto_writeback_data *wb_data = vc_req->wb, *wb_last;
+
+	while (wb_data) {
+		rte_prefetch0(wb_data->next);
+		rte_memcpy(wb_data->dst, wb_data->src, wb_data->len);
+		wb_last = wb_data;
+		wb_data = wb_data->next;
+		rte_mempool_put(vc_req->wb_pool, wb_last);
 	}
+}
 
-	if (unlikely(move_desc(vc_req->head, cur_desc, size) < 0))
-		return NULL;
+static void
+free_wb_data(struct vhost_crypto_writeback_data *wb_data,
+		struct rte_mempool *mp)
+{
+	while (wb_data->next != NULL)
+		free_wb_data(wb_data->next, mp);
 
-	return data;
+	rte_mempool_put(mp, wb_data);
 }
 
-static int
-write_back_data(struct rte_crypto_op *op, struct vhost_crypto_data_req *vc_req)
+/**
+ * The function will allocate a vhost_crypto_writeback_data linked list
+ * containing the source and destination data pointers for the write back
+ * operation after dequeued from Cryptodev PMD queues.
+ *
+ * @param vc_req
+ *   The vhost crypto data request pointer
+ * @param cur_desc
+ *   The pointer of the current in use descriptor pointer. The content of
+ *   cur_desc is expected to be updated after the function execution.
+ * @param end_wb_data
+ *   The last write back data element to be returned. It is used only in cipher
+ *   and hash chain operations.
+ * @param src
+ *   The source data pointer
+ * @param offset
+ *   The offset to both source and destination data. For source data the offset
+ *   is the number of bytes between src and start point of cipher operation. For
+ *   destination data the offset is the number of bytes from *cur_desc->addr
+ *   to the point where the src will be written to.
+ * @param write_back_len
+ *   The size of the write back length.
+ * @return
+ *   The pointer to the start of the write back data linked list.
+ */
+static struct vhost_crypto_writeback_data *
+prepare_write_back_data(struct vhost_crypto_data_req *vc_req,
+		struct vring_desc **cur_desc,
+		struct vhost_crypto_writeback_data **end_wb_data,
+		uint8_t *src,
+		uint32_t offset,
+		uint64_t write_back_len)
 {
-	struct rte_mbuf *mbuf = op->sym->m_dst;
-	struct vring_desc *head = vc_req->head;
-	struct vring_desc *desc = vc_req->wb_desc;
-	int left = vc_req->wb_len;
-	uint32_t to_write;
-	uint8_t *src_data = mbuf->buf_addr, *dst;
+	struct vhost_crypto_writeback_data *wb_data, *head;
+	struct vring_desc *desc = *cur_desc;
 	uint64_t dlen;
+	uint8_t *dst;
+	int ret;
 
-	rte_prefetch0(&head[desc->next]);
-	to_write = RTE_MIN(desc->len, (uint32_t)left);
-	dlen = desc->len;
-	dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
-			VHOST_ACCESS_RW);
-	if (unlikely(!dst || dlen != desc->len)) {
-		VC_LOG_ERR("Failed to map descriptor");
-		return -1;
+	ret = rte_mempool_get(vc_req->wb_pool, (void **)&head);
+	if (unlikely(ret < 0)) {
+		VC_LOG_ERR("no memory");
+		goto error_exit;
 	}
 
-	rte_memcpy(dst, src_data, to_write);
-	left -= to_write;
-	src_data += to_write;
+	wb_data = head;
 
-	while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
-		desc = &head[desc->next];
-		rte_prefetch0(&head[desc->next]);
-		to_write = RTE_MIN(desc->len, (uint32_t)left);
+	if (likely(desc->len > offset)) {
+		wb_data->src = src + offset;
 		dlen = desc->len;
-		dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
-				VHOST_ACCESS_RW);
+		dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr,
+			&dlen, VHOST_ACCESS_RW) + offset;
 		if (unlikely(!dst || dlen != desc->len)) {
 			VC_LOG_ERR("Failed to map descriptor");
-			return -1;
+			goto error_exit;
 		}
 
-		rte_memcpy(dst, src_data, to_write);
-		left -= to_write;
-		src_data += to_write;
-	}
+		wb_data->dst = dst;
+		wb_data->len = desc->len - offset;
+		write_back_len -= wb_data->len;
+		src += offset + wb_data->len;
+		offset = 0;
+
+		if (unlikely(write_back_len)) {
+			ret = rte_mempool_get(vc_req->wb_pool,
+					(void **)&(wb_data->next));
+			if (unlikely(ret < 0)) {
+				VC_LOG_ERR("no memory");
+				goto error_exit;
+			}
 
-	if (unlikely(left < 0)) {
-		VC_LOG_ERR("Incorrect virtio descriptor");
-		return -1;
+			wb_data = wb_data->next;
+		} else
+			wb_data->next = NULL;
+	} else
+		offset -= desc->len;
+
+	while (write_back_len) {
+		desc = &vc_req->head[desc->next];
+		if (unlikely(!(desc->flags & VRING_DESC_F_WRITE))) {
+			VC_LOG_ERR("incorrect descriptor");
+			goto error_exit;
+		}
+
+		if (desc->len <= offset) {
+			offset -= desc->len;
+			continue;
+		}
+
+		dlen = desc->len;
+		dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
+				VHOST_ACCESS_RW) + offset;
+		if (unlikely(dst == NULL || dlen != desc->len)) {
+			VC_LOG_ERR("Failed to map descriptor");
+			goto error_exit;
+		}
+
+		wb_data->src = src;
+		wb_data->dst = dst;
+		wb_data->len = RTE_MIN(desc->len - offset, write_back_len);
+		write_back_len -= wb_data->len;
+		src += wb_data->len;
+		offset = 0;
+
+		if (write_back_len) {
+			ret = rte_mempool_get(vc_req->wb_pool,
+					(void **)&(wb_data->next));
+			if (unlikely(ret < 0)) {
+				VC_LOG_ERR("no memory");
+				goto error_exit;
+			}
+
+			wb_data = wb_data->next;
+		} else
+			wb_data->next = NULL;
 	}
 
-	return 0;
+	*cur_desc = &vc_req->head[desc->next];
+
+	*end_wb_data = wb_data;
+
+	return head;
+
+error_exit:
+	if (head)
+		free_wb_data(head, vc_req->wb_pool);
+
+	return NULL;
 }
 
 static uint8_t
@@ -685,6 +790,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 		struct vring_desc *cur_desc)
 {
 	struct vring_desc *desc = cur_desc;
+	struct vhost_crypto_writeback_data *ewb = NULL;
 	struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst;
 	uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET);
 	uint8_t ret = 0;
@@ -703,16 +809,25 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
 				cipher->para.src_data_len);
-		m_src->buf_addr = get_data_ptr(vc_req, &desc,
-				cipher->para.src_data_len, VHOST_ACCESS_RO);
+		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
 		if (unlikely(m_src->buf_iova == 0 ||
 				m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
 		}
+
+		if (unlikely(move_desc(vc_req->head, &desc,
+				cipher->para.src_data_len) < 0)) {
+			VC_LOG_ERR("Incorrect descriptor");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+
 		break;
 	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		vc_req->wb_pool = vcrypto->wb_pool;
+
 		if (unlikely(cipher->para.src_data_len >
 				RTE_MBUF_DEFAULT_BUF_SIZE)) {
 			VC_LOG_ERR("Not enough space to do data copy");
@@ -743,24 +858,31 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
 				desc->addr, cipher->para.dst_data_len);
-		m_dst->buf_addr = get_data_ptr(vc_req, &desc,
-				cipher->para.dst_data_len, VHOST_ACCESS_RW);
+		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
 		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
 		}
 
+		if (unlikely(move_desc(vc_req->head, &desc,
+				cipher->para.dst_data_len) < 0)) {
+			VC_LOG_ERR("Incorrect descriptor");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+
 		m_dst->data_len = cipher->para.dst_data_len;
 		break;
 	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
-		vc_req->wb_desc = desc;
-		vc_req->wb_len = cipher->para.dst_data_len;
-		if (unlikely(move_desc(vc_req->head, &desc,
-				vc_req->wb_len) < 0)) {
+		vc_req->wb = prepare_write_back_data(vc_req, &desc, &ewb,
+				rte_pktmbuf_mtod(m_src, uint8_t *), 0,
+				cipher->para.dst_data_len);
+		if (unlikely(vc_req->wb == NULL)) {
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
 		}
+
 		break;
 	default:
 		ret = VIRTIO_CRYPTO_BADMSG;
@@ -774,7 +896,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	op->sym->cipher.data.offset = 0;
 	op->sym->cipher.data.length = cipher->para.src_data_len;
 
-	vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO);
+	vc_req->inhdr = get_data_ptr(vc_req, desc, VHOST_ACCESS_WO);
 	if (unlikely(vc_req->inhdr == NULL)) {
 		ret = VIRTIO_CRYPTO_BADMSG;
 		goto error_exit;
@@ -786,6 +908,9 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	return 0;
 
 error_exit:
+	if (vc_req->wb)
+		free_wb_data(vc_req->wb, vc_req->wb_pool);
+
 	vc_req->len = INHDR_LEN;
 	return ret;
 }
@@ -796,7 +921,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 		struct virtio_crypto_alg_chain_data_req *chain,
 		struct vring_desc *cur_desc)
 {
-	struct vring_desc *desc = cur_desc;
+	struct vring_desc *desc = cur_desc, *digest_desc;
+	struct vhost_crypto_writeback_data *ewb = NULL, *ewb2 = NULL;
 	struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst;
 	uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET);
 	uint32_t digest_offset;
@@ -812,21 +938,30 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	}
 
 	m_src->data_len = chain->para.src_data_len;
-	m_dst->data_len = chain->para.dst_data_len;
 
 	switch (vcrypto->option) {
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+		m_dst->data_len = chain->para.dst_data_len;
+
 		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
 				chain->para.src_data_len);
-		m_src->buf_addr = get_data_ptr(vc_req, &desc,
-				chain->para.src_data_len, VHOST_ACCESS_RO);
+		m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
 		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
 		}
+
+		if (unlikely(move_desc(vc_req->head, &desc,
+				chain->para.src_data_len) < 0)) {
+			VC_LOG_ERR("Incorrect descriptor");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
 		break;
 	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		vc_req->wb_pool = vcrypto->wb_pool;
+
 		if (unlikely(chain->para.src_data_len >
 				RTE_MBUF_DEFAULT_BUF_SIZE)) {
 			VC_LOG_ERR("Not enough space to do data copy");
@@ -838,6 +973,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 			ret = VIRTIO_CRYPTO_BADMSG;
 			goto error_exit;
 		}
+
 		break;
 	default:
 		ret = VIRTIO_CRYPTO_BADMSG;
@@ -856,46 +992,70 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
 		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
 				desc->addr, chain->para.dst_data_len);
-		m_dst->buf_addr = get_data_ptr(vc_req, &desc,
-				chain->para.dst_data_len, VHOST_ACCESS_RW);
+		m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW);
 		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
 		}
 
+		if (unlikely(move_desc(vc_req->head, &desc,
+				chain->para.dst_data_len) < 0)) {
+			VC_LOG_ERR("Incorrect descriptor");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+
 		op->sym->auth.digest.phys_addr = gpa_to_hpa(vcrypto->dev,
 				desc->addr, chain->para.hash_result_len);
-		op->sym->auth.digest.data = get_data_ptr(vc_req, &desc,
-				chain->para.hash_result_len, VHOST_ACCESS_RW);
+		op->sym->auth.digest.data = get_data_ptr(vc_req, desc,
+				VHOST_ACCESS_RW);
 		if (unlikely(op->sym->auth.digest.phys_addr == 0)) {
 			VC_LOG_ERR("zero_copy may fail due to cross page data");
 			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
 		}
+
+		if (unlikely(move_desc(vc_req->head, &desc,
+				chain->para.hash_result_len) < 0)) {
+			VC_LOG_ERR("Incorrect descriptor");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+
 		break;
 	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
-		digest_offset = m_dst->data_len;
-		digest_addr = rte_pktmbuf_mtod_offset(m_dst, void *,
-				digest_offset);
+		vc_req->wb = prepare_write_back_data(vc_req, &desc, &ewb,
+				rte_pktmbuf_mtod(m_src, uint8_t *),
+				chain->para.cipher_start_src_offset,
+				chain->para.dst_data_len -
+				chain->para.cipher_start_src_offset);
+		if (unlikely(vc_req->wb == NULL)) {
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
 
-		vc_req->wb_desc = desc;
-		vc_req->wb_len = m_dst->data_len + chain->para.hash_result_len;
+		digest_offset = m_src->data_len;
+		digest_addr = rte_pktmbuf_mtod_offset(m_src, void *,
+				digest_offset);
+		digest_desc = desc;
 
-		if (unlikely(move_desc(vc_req->head, &desc,
-				chain->para.dst_data_len) < 0)) {
-			ret = VIRTIO_CRYPTO_BADMSG;
+		/** create a wb_data for digest */
+		ewb->next = prepare_write_back_data(vc_req, &desc, &ewb2,
+				digest_addr, 0, chain->para.hash_result_len);
+		if (unlikely(ewb->next == NULL)) {
+			ret = VIRTIO_CRYPTO_ERR;
 			goto error_exit;
 		}
 
-		if (unlikely(copy_data(digest_addr, vc_req, &desc,
+		if (unlikely(copy_data(digest_addr, vc_req, &digest_desc,
 				chain->para.hash_result_len)) < 0) {
 			ret = VIRTIO_CRYPTO_BADMSG;
 			goto error_exit;
 		}
 
 		op->sym->auth.digest.data = digest_addr;
-		op->sym->auth.digest.phys_addr = rte_pktmbuf_iova_offset(m_dst,
+		op->sym->auth.digest.phys_addr = rte_pktmbuf_iova_offset(m_src,
 				digest_offset);
 		break;
 	default:
@@ -904,7 +1064,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	}
 
 	/* record inhdr */
-	vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO);
+	vc_req->inhdr = get_data_ptr(vc_req, desc, VHOST_ACCESS_WO);
 	if (unlikely(vc_req->inhdr == NULL)) {
 		ret = VIRTIO_CRYPTO_BADMSG;
 		goto error_exit;
@@ -927,6 +1087,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 	return 0;
 
 error_exit:
+	if (vc_req->wb)
+		free_wb_data(vc_req->wb, vc_req->wb_pool);
 	vc_req->len = INHDR_LEN;
 	return ret;
 }
@@ -967,7 +1129,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
 	vc_req->head = head;
 	vc_req->zero_copy = vcrypto->option;
 
-	req = get_data_ptr(vc_req, &desc, sizeof(*req), VHOST_ACCESS_RO);
+	req = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO);
 	if (unlikely(req == NULL)) {
 		switch (vcrypto->option) {
 		case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
@@ -988,6 +1150,12 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
 			VC_LOG_ERR("Invalid option");
 			goto error_exit;
 		}
+	} else {
+		if (unlikely(move_desc(vc_req->head, &desc,
+				sizeof(*req)) < 0)) {
+			VC_LOG_ERR("Incorrect descriptor");
+			goto error_exit;
+		}
 	}
 
 	switch (req->header.opcode) {
@@ -1062,7 +1230,6 @@ vhost_crypto_finalize_one_request(struct rte_crypto_op *op,
 	struct rte_mbuf *m_dst = op->sym->m_dst;
 	struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(m_src);
 	uint16_t desc_idx;
-	int ret = 0;
 
 	if (unlikely(!vc_req)) {
 		VC_LOG_ERR("Failed to retrieve vc_req");
@@ -1077,19 +1244,18 @@ vhost_crypto_finalize_one_request(struct rte_crypto_op *op,
 	if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS))
 		vc_req->inhdr->status = VIRTIO_CRYPTO_ERR;
 	else {
-		if (vc_req->zero_copy == 0) {
-			ret = write_back_data(op, vc_req);
-			if (unlikely(ret != 0))
-				vc_req->inhdr->status = VIRTIO_CRYPTO_ERR;
-		}
+		if (vc_req->zero_copy == 0)
+			write_back_data(vc_req);
 	}
 
 	vc_req->vq->used->ring[desc_idx].id = desc_idx;
 	vc_req->vq->used->ring[desc_idx].len = vc_req->len;
 
-	rte_mempool_put(m_dst->pool, (void *)m_dst);
 	rte_mempool_put(m_src->pool, (void *)m_src);
 
+	if (m_dst)
+		rte_mempool_put(m_dst->pool, (void *)m_dst);
+
 	return vc_req->vq;
 }
 
@@ -1186,6 +1352,18 @@ rte_vhost_crypto_create(int vid, uint8_t cryptodev_id,
 		goto error_exit;
 	}
 
+	snprintf(name, 127, "WB_POOL_VM_%u", (uint32_t)vid);
+	vcrypto->wb_pool = rte_mempool_create(name,
+			VHOST_CRYPTO_MBUF_POOL_SIZE,
+			sizeof(struct vhost_crypto_writeback_data),
+			128, 0, NULL, NULL, NULL, NULL,
+			rte_socket_id(), 0);
+	if (!vcrypto->wb_pool) {
+		VC_LOG_ERR("Failed to creath mempool");
+		ret = -ENOMEM;
+		goto error_exit;
+	}
+
 	dev->extern_data = vcrypto;
 	dev->extern_ops.pre_msg_handle = NULL;
 	dev->extern_ops.post_msg_handle = vhost_crypto_msg_post_handler;
@@ -1222,6 +1400,7 @@ rte_vhost_crypto_free(int vid)
 
 	rte_hash_free(vcrypto->session_map);
 	rte_mempool_free(vcrypto->mbuf_pool);
+	rte_mempool_free(vcrypto->wb_pool);
 	rte_free(vcrypto);
 
 	dev->extern_data = NULL;
@@ -1257,11 +1436,30 @@ rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy option)
 	if (vcrypto->option == (uint8_t)option)
 		return 0;
 
-	if (!(rte_mempool_full(vcrypto->mbuf_pool))) {
+	if (!(rte_mempool_full(vcrypto->mbuf_pool)) ||
+			!(rte_mempool_full(vcrypto->wb_pool))) {
 		VC_LOG_ERR("Cannot update zero copy as mempool is not full");
 		return -EINVAL;
 	}
 
+	if (option == RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE) {
+		char name[128];
+
+		snprintf(name, 127, "WB_POOL_VM_%u", (uint32_t)vid);
+		vcrypto->wb_pool = rte_mempool_create(name,
+				VHOST_CRYPTO_MBUF_POOL_SIZE,
+				sizeof(struct vhost_crypto_writeback_data),
+				128, 0, NULL, NULL, NULL, NULL,
+				rte_socket_id(), 0);
+		if (!vcrypto->wb_pool) {
+			VC_LOG_ERR("Failed to creath mbuf pool");
+			return -ENOMEM;
+		}
+	} else {
+		rte_mempool_free(vcrypto->wb_pool);
+		vcrypto->wb_pool = NULL;
+	}
+
 	vcrypto->option = (uint8_t)option;
 
 	return 0;
@@ -1277,9 +1475,8 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid,
 	struct vhost_virtqueue *vq;
 	uint16_t avail_idx;
 	uint16_t start_idx;
-	uint16_t required;
 	uint16_t count;
-	uint16_t i;
+	uint16_t i = 0;
 
 	if (unlikely(dev == NULL)) {
 		VC_LOG_ERR("Invalid vid %i", vid);
@@ -1311,27 +1508,66 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid,
 	/* for zero copy, we need 2 empty mbufs for src and dst, otherwise
 	 * we need only 1 mbuf as src and dst
 	 */
-	required = count * 2;
-	if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool, (void **)mbufs,
-			required) < 0)) {
-		VC_LOG_ERR("Insufficient memory");
-		return -ENOMEM;
-	}
+	switch (vcrypto->option) {
+	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+		if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool,
+				(void **)mbufs, count * 2) < 0)) {
+			VC_LOG_ERR("Insufficient memory");
+			return -ENOMEM;
+		}
 
-	for (i = 0; i < count; i++) {
-		uint16_t used_idx = (start_idx + i) & (vq->size - 1);
-		uint16_t desc_idx = vq->avail->ring[used_idx];
-		struct vring_desc *head = &vq->desc[desc_idx];
-		struct rte_crypto_op *op = ops[i];
+		for (i = 0; i < count; i++) {
+			uint16_t used_idx = (start_idx + i) & (vq->size - 1);
+			uint16_t desc_idx = vq->avail->ring[used_idx];
+			struct vring_desc *head = &vq->desc[desc_idx];
+			struct rte_crypto_op *op = ops[i];
 
-		op->sym->m_src = mbufs[i * 2];
-		op->sym->m_dst = mbufs[i * 2 + 1];
-		op->sym->m_src->data_off = 0;
-		op->sym->m_dst->data_off = 0;
+			op->sym->m_src = mbufs[i * 2];
+			op->sym->m_dst = mbufs[i * 2 + 1];
+			op->sym->m_src->data_off = 0;
+			op->sym->m_dst->data_off = 0;
+
+			if (unlikely(vhost_crypto_process_one_req(vcrypto, vq,
+					op, head, desc_idx)) < 0)
+				break;
+		}
+
+		if (unlikely(i < count))
+			rte_mempool_put_bulk(vcrypto->mbuf_pool,
+					(void **)&mbufs[i * 2],
+					(count - i) * 2);
+
+		break;
+
+	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool,
+				(void **)mbufs, count) < 0)) {
+			VC_LOG_ERR("Insufficient memory");
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < count; i++) {
+			uint16_t used_idx = (start_idx + i) & (vq->size - 1);
+			uint16_t desc_idx = vq->avail->ring[used_idx];
+			struct vring_desc *head = &vq->desc[desc_idx];
+			struct rte_crypto_op *op = ops[i];
+
+			op->sym->m_src = mbufs[i];
+			op->sym->m_dst = NULL;
+			op->sym->m_src->data_off = 0;
+
+			if (unlikely(vhost_crypto_process_one_req(vcrypto, vq,
+					op, head, desc_idx)) < 0)
+				break;
+		}
+
+		if (unlikely(i < count))
+			rte_mempool_put_bulk(vcrypto->mbuf_pool,
+					(void **)&mbufs[i],
+					count - i);
+
+		break;
 
-		if (unlikely(vhost_crypto_process_one_req(vcrypto, vq, op, head,
-				desc_idx)) < 0)
-			break;
 	}
 
 	vq->last_used_idx += i;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index cc154f31..3ea64eba 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1732,7 +1732,7 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
 	if (ret <= 0)
 		return ret;
 
-	if (msg && msg->size) {
+	if (msg->size) {
 		if (msg->size > sizeof(msg->payload)) {
 			RTE_LOG(ERR, VHOST_CONFIG,
 				"invalid msg size: %d\n", msg->size);
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 8ad30c94..5e1a1a72 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -598,7 +598,7 @@ reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 						avail_idx, &desc_count,
 						buf_vec, &vec_idx,
 						&buf_id, &len,
-						VHOST_ACCESS_RO) < 0))
+						VHOST_ACCESS_RW) < 0))
 			return -1;
 
 		len = RTE_MIN(len, size);
@@ -1503,7 +1503,7 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 						vq->last_avail_idx, &desc_count,
 						buf_vec, &nr_vec,
 						&buf_id, &dummy_len,
-						VHOST_ACCESS_RW) < 0))
+						VHOST_ACCESS_RO) < 0))
 			break;
 
 		if (likely(dev->dequeue_zero_copy == 0))
diff --git a/meson.build b/meson.build
index 6d25b90f..a72237e1 100644
--- a/meson.build
+++ b/meson.build
@@ -2,7 +2,7 @@
 # Copyright(c) 2017 Intel Corporation
 
 project('DPDK', 'C',
-	version: '18.11.0-rc2',
+	version: '18.11.0-rc3',
 	license: 'BSD',
 	default_options: ['buildtype=release', 'default_library=static'],
 	meson_version: '>= 0.41'
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 3ebc4e64..5699d979 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -50,9 +50,11 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_LPM)            += -lrte_lpm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL)            += --whole-archive
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL)            += -lrte_acl
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL)            += --no-whole-archive
+_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY)      += --no-as-needed
 _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY)      += --whole-archive
 _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY)      += -lrte_telemetry -ljansson
 _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY)      += --no-whole-archive
+_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY)      += --as-needed
 _LDLIBS-$(CONFIG_RTE_LIBRTE_JOBSTATS)       += -lrte_jobstats
 _LDLIBS-$(CONFIG_RTE_LIBRTE_METRICS)        += -lrte_metrics
 _LDLIBS-$(CONFIG_RTE_LIBRTE_BITRATE)        += -lrte_bitratestats
diff --git a/mk/rte.sdkconfig.mk b/mk/rte.sdkconfig.mk
index d90d62cc..fa77331c 100644
--- a/mk/rte.sdkconfig.mk
+++ b/mk/rte.sdkconfig.mk
@@ -114,8 +114,7 @@ SDK_RELPATH=$(shell $(RTE_SDK)/buildtools/relpath.sh $(abspath $(RTE_SRCDIR)) \
 OUTPUT_RELPATH=$(shell $(RTE_SDK)/buildtools/relpath.sh $(abspath $(RTE_OUTPUT)) \
 				$(abspath $(RTE_SRCDIR)))
 $(RTE_OUTPUT)/Makefile: | $(RTE_OUTPUT)
-	$(Q)$(RTE_SDK)/buildtools/gen-build-mk.sh $(SDK_RELPATH) $(OUTPUT_RELPATH) \
-		> $(RTE_OUTPUT)/Makefile
+	$(Q)$(RTE_SDK)/buildtools/gen-build-mk.sh $(SDK_RELPATH) > $@
 
 # clean installed files, and generate a new config header file
 # if NODOTCONF variable is defined, don't try to rebuild .config
diff --git a/test/bpf/t1.c b/test/bpf/t1.c
index 60f9434a..3364b4f1 100644
--- a/test/bpf/t1.c
+++ b/test/bpf/t1.c
@@ -20,32 +20,36 @@
  * (011) ret      #1
  * (012) ret      #0
  *
- * To compile:
- * clang -O2 -target bpf -c t1.c
+ * To compile on x86:
+ * clang -O2 -U __GNUC__ -target bpf -c t1.c
+ *
+ * To compile on ARM:
+ * clang -O2 -I/usr/include/aarch64-linux-gnu/ -target bpf -c t1.c
  */
 
 #include <stdint.h>
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #include <netinet/udp.h>
+#include <arpa/inet.h>
 
 uint64_t
 entry(void *pkt)
 {
 	struct ether_header *ether_header = (void *)pkt;
 
-	if (ether_header->ether_type != __builtin_bswap16(0x0800))
+	if (ether_header->ether_type != htons(0x0800))
 		return 0;
 
 	struct iphdr *iphdr = (void *)(ether_header + 1);
 	if (iphdr->protocol != 17 || (iphdr->frag_off & 0x1ffff) != 0 ||
-			iphdr->daddr != __builtin_bswap32(0x1020304))
+			iphdr->daddr != htonl(0x1020304))
 		return 0;
 
 	int hlen = iphdr->ihl * 4;
 	struct udphdr *udphdr = (void *)iphdr + hlen;
 
-	if (udphdr->dest !=  __builtin_bswap16(5000))
+	if (udphdr->dest != htons(5000))
 		return 0;
 
 	return 1;
diff --git a/test/bpf/t3.c b/test/bpf/t3.c
index 531b9cb8..9ba34638 100644
--- a/test/bpf/t3.c
+++ b/test/bpf/t3.c
@@ -6,9 +6,15 @@
  * eBPF program sample.
  * Accepts pointer to struct rte_mbuf as an input parameter.
  * Dump the mbuf into stdout if it is an ARP packet (aka tcpdump 'arp').
- * To compile:
- * clang -O2 -I${RTE_SDK}/${RTE_TARGET}/include \
+ *
+ * To compile on x86:
+ * clang -O2 -U __GNUC__ -I${RTE_SDK}/${RTE_TARGET}/include \
  * -target bpf -Wno-int-to-void-pointer-cast -c t3.c
+ *
+ * To compile on ARM:
+ * clang -O2 -I/usr/include/aarch64-linux-gnu \
+ * -I${RTE_SDK}/${RTE_TARGET}/include -target bpf \
+ * -Wno-int-to-void-pointer-cast -c t3.c
  */
 
 #include <stdint.h>
@@ -17,6 +23,7 @@
 #include <net/ethernet.h>
 #include <rte_config.h>
 #include "mbuf.h"
+#include <arpa/inet.h>
 
 extern void rte_pktmbuf_dump(FILE *, const struct rte_mbuf *, unsigned int);
 
@@ -29,7 +36,7 @@ entry(const void *pkt)
 	mb = pkt;
 	eth = rte_pktmbuf_mtod(mb, const struct ether_header *);
 
-	if (eth->ether_type == __builtin_bswap16(ETHERTYPE_ARP))
+	if (eth->ether_type == htons(ETHERTYPE_ARP))
 		rte_pktmbuf_dump(stdout, mb, 64);
 
 	return 1;
diff --git a/test/test/test.c b/test/test/test.c
index 24df6299..12fabd0b 100644
--- a/test/test/test.c
+++ b/test/test/test.c
@@ -102,8 +102,10 @@ main(int argc, char **argv)
 		/* merge argc/argv and the environment args */
 		all_argc = argc + eargc;
 		all_argv = malloc(sizeof(*all_argv) * (all_argc + 1));
-		if (all_argv == NULL)
-			return -1;
+		if (all_argv == NULL) {
+			ret = -1;
+			goto out;
+		}
 
 		for (i = 0; i < argc; i++)
 			all_argv[i] = argv[i];
diff --git a/test/test/test_bpf.c b/test/test/test_bpf.c
index fa17c4f7..1d50401a 100644
--- a/test/test/test_bpf.c
+++ b/test/test/test_bpf.c
@@ -48,6 +48,12 @@ struct dummy_vect8 {
 #define TEST_JCC_3	5678
 #define TEST_JCC_4	TEST_FILL_1
 
+#define TEST_IMM_1	UINT64_MAX
+#define TEST_IMM_2	((uint64_t)INT64_MIN)
+#define TEST_IMM_3	((uint64_t)INT64_MAX + INT32_MAX)
+#define TEST_IMM_4	((uint64_t)UINT32_MAX)
+#define TEST_IMM_5	((uint64_t)UINT32_MAX + 1)
+
 struct bpf_test {
 	const char *name;
 	size_t arg_sz;
@@ -268,6 +274,94 @@ test_load1_check(uint64_t rc, const void *arg)
 	return cmp_res(__func__, v, rc, dft, dft, sizeof(*dft));
 }
 
+/* load immediate test-cases */
+static const struct ebpf_insn test_ldimm1_prog[] = {
+
+	{
+		.code = (BPF_LD | BPF_IMM | EBPF_DW),
+		.dst_reg = EBPF_REG_0,
+		.imm = (uint32_t)TEST_IMM_1,
+	},
+	{
+		.imm = TEST_IMM_1 >> 32,
+	},
+	{
+		.code = (BPF_LD | BPF_IMM | EBPF_DW),
+		.dst_reg = EBPF_REG_3,
+		.imm = (uint32_t)TEST_IMM_2,
+	},
+	{
+		.imm = TEST_IMM_2 >> 32,
+	},
+	{
+		.code = (BPF_LD | BPF_IMM | EBPF_DW),
+		.dst_reg = EBPF_REG_5,
+		.imm = (uint32_t)TEST_IMM_3,
+	},
+	{
+		.imm = TEST_IMM_3 >> 32,
+	},
+	{
+		.code = (BPF_LD | BPF_IMM | EBPF_DW),
+		.dst_reg = EBPF_REG_7,
+		.imm = (uint32_t)TEST_IMM_4,
+	},
+	{
+		.imm = TEST_IMM_4 >> 32,
+	},
+	{
+		.code = (BPF_LD | BPF_IMM | EBPF_DW),
+		.dst_reg = EBPF_REG_9,
+		.imm = (uint32_t)TEST_IMM_5,
+	},
+	{
+		.imm = TEST_IMM_5 >> 32,
+	},
+	/* return sum */
+	{
+		.code = (EBPF_ALU64 | BPF_ADD | BPF_X),
+		.dst_reg = EBPF_REG_0,
+		.src_reg = EBPF_REG_3,
+	},
+	{
+		.code = (EBPF_ALU64 | BPF_ADD | BPF_X),
+		.dst_reg = EBPF_REG_0,
+		.src_reg = EBPF_REG_5,
+	},
+	{
+		.code = (EBPF_ALU64 | BPF_ADD | BPF_X),
+		.dst_reg = EBPF_REG_0,
+		.src_reg = EBPF_REG_7,
+	},
+	{
+		.code = (EBPF_ALU64 | BPF_ADD | BPF_X),
+		.dst_reg = EBPF_REG_0,
+		.src_reg = EBPF_REG_9,
+	},
+	{
+		.code = (BPF_JMP | EBPF_EXIT),
+	},
+};
+
+static int
+test_ldimm1_check(uint64_t rc, const void *arg)
+{
+	uint64_t v1, v2;
+
+	v1 = TEST_IMM_1;
+	v2 = TEST_IMM_2;
+	v1 += v2;
+	v2 = TEST_IMM_3;
+	v1 += v2;
+	v2 = TEST_IMM_4;
+	v1 += v2;
+	v2 = TEST_IMM_5;
+	v1 += v2;
+
+	return cmp_res(__func__, v1, rc, arg, arg, 0);
+}
+
+
 /* alu mul test-cases */
 static const struct ebpf_insn test_mul1_prog[] = {
 
@@ -1727,6 +1821,20 @@ static const struct bpf_test tests[] = {
 		.check_result = test_load1_check,
 	},
 	{
+		.name = "test_ldimm1",
+		.arg_sz = sizeof(struct dummy_offset),
+		.prm = {
+			.ins = test_ldimm1_prog,
+			.nb_ins = RTE_DIM(test_ldimm1_prog),
+			.prog_arg = {
+				.type = RTE_BPF_ARG_PTR,
+				.size = sizeof(struct dummy_offset),
+			},
+		},
+		.prepare = test_store1_prepare,
+		.check_result = test_ldimm1_check,
+	},
+	{
 		.name = "test_mul1",
 		.arg_sz = sizeof(struct dummy_vect8),
 		.prm = {
diff --git a/test/test/test_common.c b/test/test/test_common.c
index 7a67e458..c6d17baa 100644
--- a/test/test/test_common.c
+++ b/test/test/test_common.c
@@ -189,6 +189,37 @@ test_log2(void)
 }
 
 static int
+test_fls(void)
+{
+	struct fls_test_vector {
+		uint32_t arg;
+		int rc;
+	};
+	int expected, rc;
+	uint32_t i, arg;
+
+	const struct fls_test_vector test[] = {
+		{0x0, 0},
+		{0x1, 1},
+		{0x4000, 15},
+		{0x80000000, 32},
+	};
+
+	for (i = 0; i < RTE_DIM(test); i++) {
+		arg = test[i].arg;
+		rc = rte_fls_u32(arg);
+		expected = test[i].rc;
+		if (rc != expected) {
+			printf("Wrong rte_fls_u32(0x%x) rc=%d, expected=%d\n",
+				arg, rc, expected);
+			return TEST_FAILED;
+		}
+	}
+
+	return 0;
+}
+
+static int
 test_common(void)
 {
 	int ret = 0;
@@ -196,6 +227,7 @@ test_common(void)
 	ret |= test_macros(0);
 	ret |= test_misc();
 	ret |= test_log2();
+	ret |= test_fls();
 
 	return ret;
 }
diff --git a/test/test/test_hash_readwrite.c b/test/test/test_hash_readwrite.c
index 01f986cf..6b695ce6 100644
--- a/test/test/test_hash_readwrite.c
+++ b/test/test/test_hash_readwrite.c
@@ -678,24 +678,26 @@ test_hash_readwrite_main(void)
 							reader_faster) < 0)
 		return -1;
 
+	printf("================\n");
 	printf("Results summary:\n");
+	printf("================\n");
 
 	printf("single read: %u\n", htm_results.single_read);
 	printf("single write: %u\n", htm_results.single_write);
 	for (i = 0; i < NUM_TEST; i++) {
-		printf("core_cnt: %u\n", core_cnt[i]);
+		printf("+++ core_cnt: %u +++\n", core_cnt[i]);
 		printf("HTM:\n");
-		printf("read only: %u\n", htm_results.read_only[i]);
-		printf("write only: %u\n", htm_results.write_only[i]);
-		printf("read-write read: %u\n", htm_results.read_write_r[i]);
-		printf("read-write write: %u\n", htm_results.read_write_w[i]);
+		printf("  read only: %u\n", htm_results.read_only[i]);
+		printf("  write only: %u\n", htm_results.write_only[i]);
+		printf("  read-write read: %u\n", htm_results.read_write_r[i]);
+		printf("  read-write write: %u\n", htm_results.read_write_w[i]);
 
 		printf("non HTM:\n");
-		printf("read only: %u\n", non_htm_results.read_only[i]);
-		printf("write only: %u\n", non_htm_results.write_only[i]);
-		printf("read-write read: %u\n",
+		printf("  read only: %u\n", non_htm_results.read_only[i]);
+		printf("  write only: %u\n", non_htm_results.write_only[i]);
+		printf("  read-write read: %u\n",
 			non_htm_results.read_write_r[i]);
-		printf("read-write write: %u\n",
+		printf("  read-write write: %u\n",
 			non_htm_results.read_write_w[i]);
 	}
 
diff --git a/test/test/test_kni.c b/test/test/test_kni.c
index f3c19b5a..c92c0905 100644
--- a/test/test/test_kni.c
+++ b/test/test/test_kni.c
@@ -549,7 +549,7 @@ test_kni(void)
 	if (!dir) {
 		if (errno == ENOENT) {
 			printf("Cannot run UT due to missing rte_kni module\n");
-			return -1;
+			return TEST_SKIPPED;
 		}
 		printf("opendir: %s", strerror(errno));
 		return -1;
diff --git a/test/test/test_power_acpi_cpufreq.c b/test/test/test_power_acpi_cpufreq.c
index 22e541d6..6d637cc7 100644
--- a/test/test/test_power_acpi_cpufreq.c
+++ b/test/test/test_power_acpi_cpufreq.c
@@ -441,7 +441,7 @@ test_power_acpi_cpufreq(void)
 				"correctly(APCI cpufreq) or operating in another valid "
 				"Power management environment\n", TEST_POWER_LCORE_ID);
 		rte_power_unset_env();
-		return -1;
+		return TEST_SKIPPED;
 	}
 
 	/**
diff --git a/test/test/test_reorder.c b/test/test/test_reorder.c
index ccee4d08..58fa9c71 100644
--- a/test/test/test_reorder.c
+++ b/test/test/test_reorder.c
@@ -269,7 +269,7 @@ test_reorder_drain(void)
 		goto exit;
 	}
 	if (robufs[0] != NULL)
-		rte_pktmbuf_free(robufs[i]);
+		rte_pktmbuf_free(robufs[0]);
 
 	/* Insert more packets
 	 * RB[] = {NULL, NULL, NULL, NULL}