94 files changed, 667 insertions, 374 deletions
diff --git a/lib/librte_acl/acl_vect.h b/lib/librte_acl/acl_vect.h
index 194fca90..951e5828 100644
--- a/lib/librte_acl/acl_vect.h
+++ b/lib/librte_acl/acl_vect.h
@@ -17,7 +17,7 @@ extern "C" {
 
 
 /*
- * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1).
+ * Takes 2 SIMD registers containing N transitions each (tr0, tr1).
  * Shuffles it into different representation:
  * lo - contains low 32 bits of given N transitions.
  * hi - contains high 32 bits of given N transitions.
@@ -66,7 +66,7 @@ extern "C" {
 								\
 	dfa_ofs = _##P##_sub_epi32(t, r);			\
 								\
-	/* QUAD/SINGLE caluclations. */				\
+	/* QUAD/SINGLE calculations. */				\
 	t = _##P##_cmpgt_epi8(in, tr_hi);			\
 	t = _##P##_sign_epi8(t, t);				\
 	t = _##P##_maddubs_epi16(t, t);				\
diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build
index aec792f5..2207dbaf 100644
--- a/lib/librte_acl/meson.build
+++ b/lib/librte_acl/meson.build
@@ -23,7 +23,7 @@ if arch_subdir == 'x86'
 		avx2_tmplib = static_library('avx2_tmp',
 				'acl_run_avx2.c',
 				dependencies: static_rte_eal,
-				c_args: '-mavx2')
+				c_args: cflags + ['-mavx2'])
 		objs += avx2_tmplib.extract_objects('acl_run_avx2.c')
 		cflags += '-DCC_AVX2_SUPPORT'
 	endif
diff --git a/lib/librte_bbdev/rte_bbdev.h b/lib/librte_bbdev/rte_bbdev.h
index 25ef409f..4a2873b2 100644
--- a/lib/librte_bbdev/rte_bbdev.h
+++ b/lib/librte_bbdev/rte_bbdev.h
@@ -43,7 +43,7 @@ extern "C" {
 #define RTE_BBDEV_MAX_DEVS 128  /**< Max number of devices */
 #endif
 
-/** Flags indiciate current state of BBDEV device */
+/** Flags indicate current state of BBDEV device */
 enum rte_bbdev_state {
 	RTE_BBDEV_UNUSED,
 	RTE_BBDEV_INITIALIZED
@@ -161,7 +161,7 @@ rte_bbdev_queue_configure(uint16_t dev_id, uint16_t queue_id,
 
 /**
  * Start a device.
- * This is the last step needed before enqueueing operations is possible.
+ * This is the last step needed before enqueuing operations is possible.
  *
  * @param dev_id
  *   The identifier of the device.
diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c
index c4b28f62..639e4754 100644
--- a/lib/librte_bitratestats/rte_bitrate.c
+++ b/lib/librte_bitratestats/rte_bitrate.c
@@ -67,6 +67,7 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data,
 	int64_t delta;
 	const int64_t alpha_percent = 20;
 	uint64_t values[6];
+	int ret;
 
 	if (bitrate_data == NULL)
 		return -EINVAL;
@@ -124,7 +125,10 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data,
 	values[3] = port_data->mean_obits;
 	values[4] = port_data->peak_ibits;
 	values[5] = port_data->peak_obits;
-	rte_metrics_update_values(port_id, bitrate_data->id_stats_set,
+	ret = rte_metrics_update_values(port_id, bitrate_data->id_stats_set,
 		values, ARRAY_SIZE(values));
+	if (ret < 0)
+		return ret;
+
 	return 0;
 }
diff --git a/lib/librte_bpf/rte_bpf.h b/lib/librte_bpf/rte_bpf.h
index ad62ef2c..ab92af8f 100644
--- a/lib/librte_bpf/rte_bpf.h
+++ b/lib/librte_bpf/rte_bpf.h
@@ -120,7 +120,7 @@ rte_bpf_destroy(struct rte_bpf *bpf);
  * Create a new eBPF execution context and load given BPF code into it.
  *
  * @param prm
- *  Parameters used to create and initialise the BPF exeution context.
+ *  Parameters used to create and initialise the BPF execution context.
  * @return
  *   BPF handle that is used in future BPF operations,
  *   or NULL on error, with error code set in rte_errno.
@@ -136,7 +136,7 @@ rte_bpf_load(const struct rte_bpf_prm *prm);
  * file into it.
  *
  * @param prm
- *  Parameters used to create and initialise the BPF exeution context.
+ *  Parameters used to create and initialise the BPF execution context.
  * @param fname
  *  Pathname for a ELF file.
  * @param sname
@@ -183,7 +183,7 @@ rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[],
 		uint32_t num);
 
 /**
- * Provide information about natively compield code for given BPF handle.
+ * Provide information about natively compiled code for given BPF handle.
  *
  * @param bpf
  *   handle for the BPF code.
diff --git a/lib/librte_bpf/rte_bpf_ethdev.h b/lib/librte_bpf/rte_bpf_ethdev.h
index 11d09cdc..1943372f 100644
--- a/lib/librte_bpf/rte_bpf_ethdev.h
+++ b/lib/librte_bpf/rte_bpf_ethdev.h
@@ -73,7 +73,7 @@ rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue);
  * @param sname
  *  Name of the executable section within the file to load.
  * @param prm
- *  Parameters used to create and initialise the BPF exeution context.
+ *  Parameters used to create and initialise the BPF execution context.
  * @param flags
  *  Flags that define expected behavior of the loaded filter
  *  (i.e. jited/non-jited version to use).
@@ -98,7 +98,7 @@ rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
  * @param sname
  *  Name of the executable section within the file to load.
  * @param prm
- *  Parameters used to create and initialise the BPF exeution context.
+ *  Parameters used to create and initialise the BPF execution context.
  * @param flags
  *  Flags that define expected expected behavior of the loaded filter
  *  (i.e. jited/non-jited version to use).
diff --git a/lib/librte_cfgfile/rte_cfgfile.c b/lib/librte_cfgfile/rte_cfgfile.c
index 7d8c941e..61426963 100644
--- a/lib/librte_cfgfile/rte_cfgfile.c
+++ b/lib/librte_cfgfile/rte_cfgfile.c
@@ -7,6 +7,7 @@
 #include <string.h>
 #include <ctype.h>
 #include <errno.h>
+#include <rte_string_fns.h>
 #include <rte_common.h>
 
 #include "rte_cfgfile.h"
@@ -224,10 +225,11 @@ rte_cfgfile_load_with_params(const char *filename, int flags,
 			_strip(split[1], strlen(split[1]));
 			char *end = memchr(split[1], '\\', strlen(split[1]));
 
+			size_t split_len = strlen(split[1]) + 1;
 			while (end != NULL) {
 				if (*(end+1) == params->comment_character) {
 					*end = '\0';
-					strcat(split[1], end+1);
+					strlcat(split[1], end+1, split_len);
 				} else
 					end++;
 				end = memchr(end, '\\', strlen(end));
diff --git a/lib/librte_cryptodev/rte_crypto_asym.h b/lib/librte_cryptodev/rte_crypto_asym.h
index 5e185b2d..b1c1a6c1 100644
--- a/lib/librte_cryptodev/rte_crypto_asym.h
+++ b/lib/librte_cryptodev/rte_crypto_asym.h
@@ -112,15 +112,15 @@ enum rte_crypto_rsa_padding_type {
 	/**< RSA no padding scheme */
 	RTE_CRYPTO_RSA_PKCS1_V1_5_BT0,
 	/**< RSA PKCS#1 V1.5 Block Type 0 padding scheme
-	 * as descibed in rfc2313
+	 * as described in rfc2313
 	 */
 	RTE_CRYPTO_RSA_PKCS1_V1_5_BT1,
 	/**< RSA PKCS#1 V1.5 Block Type 01 padding scheme
-	 * as descibed in rfc2313
+	 * as described in rfc2313
 	 */
 	RTE_CRYPTO_RSA_PKCS1_V1_5_BT2,
 	/**< RSA PKCS#1 V1.5 Block Type 02 padding scheme
-	 * as descibed in rfc2313
+	 * as described in rfc2313
 	 */
 	RTE_CRYPTO_RSA_PADDING_OAEP,
 	/**< RSA PKCS#1 OAEP padding scheme */
@@ -227,7 +227,7 @@ struct rte_crypto_rsa_xform {
 /**
  * Asymmetric Modular exponentiation transform data
  *
- * Structure describing modular exponentation xform param
+ * Structure describing modular exponentiation xform param
  *
  */
 struct rte_crypto_modex_xform {
@@ -271,7 +271,7 @@ struct rte_crypto_dh_xform {
 
 	rte_crypto_param p;
 	/**< p : Prime modulus data
-	 * DH prime modulous data in octet-string network byte order format.
+	 * DH prime modulus data in octet-string network byte order format.
 	 *
 	 */
 
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
index a52eaaa4..ff8520cf 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -576,7 +576,7 @@ rte_cryptodev_devices_get(const char *driver_name, uint8_t *devices,
 
 			cmp = strncmp(devs[i].device->driver->name,
 					driver_name,
-					strlen(driver_name));
+					strlen(driver_name) + 1);
 
 			if (cmp == 0)
 				devices[count++] = devs[i].data->dev_id;
@@ -1571,7 +1571,7 @@ rte_cryptodev_driver_id_get(const char *name)
 
 	TAILQ_FOREACH(driver, &cryptodev_driver_list, next) {
 		driver_name = driver->driver->name;
-		if (strncmp(driver_name, name, strlen(driver_name)) == 0)
+		if (strncmp(driver_name, name, strlen(driver_name) + 1) == 0)
 			return driver->id;
 	}
 	return -1;
diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h
index 4099823f..d9c3a064 100644
--- a/lib/librte_cryptodev/rte_cryptodev.h
+++ b/lib/librte_cryptodev/rte_cryptodev.h
@@ -1097,7 +1097,7 @@ rte_cryptodev_asym_session_clear(uint8_t dev_id,
  * Get the size of the header session, for all registered drivers.
  *
  * @return
- *   Size of the symmetric eader session.
+ *   Size of the symmetric header session.
  */
 unsigned int
 rte_cryptodev_sym_get_header_session_size(void);
diff --git a/lib/librte_distributor/rte_distributor_private.h b/lib/librte_distributor/rte_distributor_private.h
index fce68c95..33cd8941 100644
--- a/lib/librte_distributor/rte_distributor_private.h
+++ b/lib/librte_distributor/rte_distributor_private.h
@@ -41,7 +41,7 @@ extern "C" {
 
 /**
  * Maximum number of workers allowed.
- * Be aware of increasing the limit, becaus it is limited by how we track
+ * Be aware of increasing the limit, because it is limited by how we track
  * in-flight tags. See in_flight_bitmask and rte_distributor_process
  */
 #define RTE_DISTRIB_MAX_WORKERS 64
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index f01495e3..bfac7fdc 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -227,7 +227,7 @@ rte_eal_config_create(void)
 		return;
 
 	if (mem_cfg_fd < 0){
-		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
+		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600);
 		if (mem_cfg_fd < 0)
 			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
 	}
@@ -662,6 +662,12 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	if (rte_eal_alarm_init() < 0) {
+		rte_eal_init_alert("Cannot init interrupt-handling thread");
+		/* rte_eal_alarm_init sets rte_errno on failure. */
+		return -1;
+	}
+
 	/* Put mp channel init before bus scan so that we can init the vdev
 	 * bus through mp channel in the secondary process before the bus scan.
 	 */
@@ -751,12 +757,6 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	if (rte_eal_alarm_init() < 0) {
-		rte_eal_init_alert("Cannot init interrupt-handling thread");
-		/* rte_eal_alarm_init sets rte_errno on failure. */
-		return -1;
-	}
-
 	if (rte_eal_timer_init() < 0) {
 		rte_eal_init_alert("Cannot init HPET or TSC timers");
 		rte_errno = ENOTSUP;
diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
index 1e8f5df2..32012e14 100644
--- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
@@ -22,7 +22,7 @@ static void *
 map_shared_memory(const char *filename, const size_t mem_size, int flags)
 {
 	void *retval;
-	int fd = open(filename, flags, 0666);
+	int fd = open(filename, flags, 0600);
 	if (fd < 0)
 		return NULL;
 	if (ftruncate(fd, mem_size) < 0) {
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 999ba24b..e3ef3714 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -55,6 +55,7 @@ static uint64_t system_page_sz;
 static uint64_t baseaddr = 0x100000000;
 #endif
 
+#define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
 void *
 eal_get_virtual_area(void *requested_addr, size_t *size,
 		size_t page_sz, int flags, int mmap_flags)
@@ -62,6 +63,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 	bool addr_is_hint, allow_shrink, unmap, no_align;
 	uint64_t map_sz;
 	void *mapped_addr, *aligned_addr;
+	uint8_t try = 0;
 
 	if (system_page_sz == 0)
 		system_page_sz = sysconf(_SC_PAGESIZE);
@@ -117,11 +119,14 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 
 		if (mapped_addr != MAP_FAILED && addr_is_hint &&
 		    mapped_addr != requested_addr) {
-			/* hint was not used. Try with another offset */
-			munmap(mapped_addr, map_sz);
-			mapped_addr = MAP_FAILED;
+			try++;
 			next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
-			requested_addr = next_baseaddr;
+			if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
+				/* hint was not used. Try with another offset */
+				munmap(mapped_addr, map_sz);
+				mapped_addr = MAP_FAILED;
+				requested_addr = next_baseaddr;
+			}
 		}
 	} while ((allow_shrink || addr_is_hint) &&
 		 mapped_addr == MAP_FAILED && *size > 0);
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index f6dfbc73..d4ab5e23 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -216,6 +216,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->create_uio_dev = 0;
 	internal_cfg->iova_mode = RTE_IOVA_DC;
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
+	CPU_ZERO(&internal_cfg->ctrl_cpuset);
 	internal_cfg->init_complete = 0;
 }
 
@@ -417,21 +418,44 @@ eal_service_cores_parsed(void)
 }
 
 static int
-eal_parse_coremask(const char *coremask)
+update_lcore_config(int *cores)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
-	int i, j, idx = 0;
+	unsigned int count = 0;
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < RTE_MAX_LCORE; i++) {
+		if (cores[i] != -1) {
+			if (!lcore_config[i].detected) {
+				RTE_LOG(ERR, EAL, "lcore %u unavailable\n", i);
+				ret = -1;
+				continue;
+			}
+			cfg->lcore_role[i] = ROLE_RTE;
+			count++;
+		} else {
+			cfg->lcore_role[i] = ROLE_OFF;
+		}
+		lcore_config[i].core_index = cores[i];
+	}
+	if (!ret)
+		cfg->lcore_count = count;
+	return ret;
+}
+
+static int
+eal_parse_coremask(const char *coremask, int *cores)
+{
 	unsigned count = 0;
-	char c;
+	int i, j, idx;
 	int val;
+	char c;
 
-	if (eal_service_cores_parsed())
-		RTE_LOG(WARNING, EAL,
-			"Service cores parsed before dataplane cores. "
-			"Please ensure -c is before -s or -S\n");
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++)
+		cores[idx] = -1;
+	idx = 0;
 
-	if (coremask == NULL)
-		return -1;
 	/* Remove all blank characters ahead and after .
 	 * Remove 0x/0X if exists.
 	 */
@@ -456,32 +480,16 @@ eal_parse_coremask(const char *coremask)
 		for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++)
 		{
 			if ((1 << j) & val) {
-				if (!lcore_config[idx].detected) {
-					RTE_LOG(ERR, EAL, "lcore %u "
-					        "unavailable\n", idx);
-					return -1;
-				}
-
-				cfg->lcore_role[idx] = ROLE_RTE;
-				lcore_config[idx].core_index = count;
+				cores[idx] = count;
 				count++;
-			} else {
-				cfg->lcore_role[idx] = ROLE_OFF;
-				lcore_config[idx].core_index = -1;
 			}
 		}
 	}
 	for (; i >= 0; i--)
 		if (coremask[i] != '0')
 			return -1;
-	for (; idx < RTE_MAX_LCORE; idx++) {
-		cfg->lcore_role[idx] = ROLE_OFF;
-		lcore_config[idx].core_index = -1;
-	}
 	if (count == 0)
 		return -1;
-	/* Update the count of enabled logical cores of the EAL configuration */
-	cfg->lcore_count = count;
 	return 0;
 }
 
@@ -562,34 +570,19 @@ eal_parse_service_corelist(const char *corelist)
 }
 
 static int
-eal_parse_corelist(const char *corelist)
+eal_parse_corelist(const char *corelist, int *cores)
 {
-	struct rte_config *cfg = rte_eal_get_configuration();
-	int i, idx = 0;
 	unsigned count = 0;
 	char *end = NULL;
 	int min, max;
+	int idx;
 
-	if (eal_service_cores_parsed())
-		RTE_LOG(WARNING, EAL,
-			"Service cores parsed before dataplane cores. "
-			"Please ensure -l is before -s or -S\n");
-
-	if (corelist == NULL)
-		return -1;
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++)
+		cores[idx] = -1;
 
-	/* Remove all blank characters ahead and after */
+	/* Remove all blank characters ahead */
 	while (isblank(*corelist))
 		corelist++;
-	i = strlen(corelist);
-	while ((i > 0) && isblank(corelist[i - 1]))
-		i--;
-
-	/* Reset config */
-	for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
-		cfg->lcore_role[idx] = ROLE_OFF;
-		lcore_config[idx].core_index = -1;
-	}
 
 	/* Get list of cores */
 	min = RTE_MAX_LCORE;
@@ -600,10 +593,10 @@ eal_parse_corelist(const char *corelist)
 			return -1;
 		errno = 0;
 		idx = strtol(corelist, &end, 10);
-		if (idx < 0 || idx >= (int)cfg->lcore_count)
-			return -1;
 		if (errno || end == NULL)
 			return -1;
+		if (idx < 0 || idx >= RTE_MAX_LCORE)
+			return -1;
 		while (isblank(*end))
 			end++;
 		if (*end == '-') {
@@ -613,9 +606,8 @@ eal_parse_corelist(const char *corelist)
 			if (min == RTE_MAX_LCORE)
 				min = idx;
 			for (idx = min; idx <= max; idx++) {
-				if (cfg->lcore_role[idx] != ROLE_RTE) {
-					cfg->lcore_role[idx] = ROLE_RTE;
-					lcore_config[idx].core_index = count;
+				if (cores[idx] == -1) {
+					cores[idx] = count;
 					count++;
 				}
 			}
@@ -627,10 +619,6 @@ eal_parse_corelist(const char *corelist)
 
 	if (count == 0)
 		return -1;
-
-	/* Update the count of enabled logical cores of the EAL configuration */
-	cfg->lcore_count = count;
-
 	return 0;
 }
 
@@ -1106,13 +1094,81 @@ eal_parse_iova_mode(const char *name)
 	return 0;
 }
 
+/* caller is responsible for freeing the returned string */
+static char *
+available_cores(void)
+{
+	char *str = NULL;
+	int previous;
+	int sequence;
+	char *tmp;
+	int idx;
+
+	/* find the first available cpu */
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+		if (!lcore_config[idx].detected)
+			continue;
+		break;
+	}
+	if (idx >= RTE_MAX_LCORE)
+		return NULL;
+
+	/* first sequence */
+	if (asprintf(&str, "%d", idx) < 0)
+		return NULL;
+	previous = idx;
+	sequence = 0;
+
+	for (idx++ ; idx < RTE_MAX_LCORE; idx++) {
+		if (!lcore_config[idx].detected)
+			continue;
+
+		if (idx == previous + 1) {
+			previous = idx;
+			sequence = 1;
+			continue;
+		}
+
+		/* finish current sequence */
+		if (sequence) {
+			if (asprintf(&tmp, "%s-%d", str, previous) < 0) {
+				free(str);
+				return NULL;
+			}
+			free(str);
+			str = tmp;
+		}
+
+		/* new sequence */
+		if (asprintf(&tmp, "%s,%d", str, idx) < 0) {
+			free(str);
+			return NULL;
+		}
+		free(str);
+		str = tmp;
+		previous = idx;
+		sequence = 0;
+	}
+
+	/* finish last sequence */
+	if (sequence) {
+		if (asprintf(&tmp, "%s-%d", str, previous) < 0) {
+			free(str);
+			return NULL;
+		}
+		free(str);
+		str = tmp;
+	}
+
+	return str;
+}
+
 int
 eal_parse_common_option(int opt, const char *optarg,
 			struct internal_config *conf)
 {
 	static int b_used;
 	static int w_used;
-	struct rte_config *cfg = rte_eal_get_configuration();
 
 	switch (opt) {
 	/* blacklist */
@@ -1136,9 +1192,23 @@ eal_parse_common_option(int opt, const char *optarg,
 		w_used = 1;
 		break;
 	/* coremask */
-	case 'c':
-		if (eal_parse_coremask(optarg) < 0) {
-			RTE_LOG(ERR, EAL, "invalid coremask\n");
+	case 'c': {
+		int lcore_indexes[RTE_MAX_LCORE];
+
+		if (eal_service_cores_parsed())
+			RTE_LOG(WARNING, EAL,
+				"Service cores parsed before dataplane cores. Please ensure -c is before -s or -S\n");
+		if (eal_parse_coremask(optarg, lcore_indexes) < 0) {
+			RTE_LOG(ERR, EAL, "invalid coremask syntax\n");
+			return -1;
+		}
+		if (update_lcore_config(lcore_indexes) < 0) {
+			char *available = available_cores();
+
+			RTE_LOG(ERR, EAL,
+				"invalid coremask, please check specified cores are part of %s\n",
+				available);
+			free(available);
 			return -1;
 		}
 
@@ -1152,12 +1222,26 @@ eal_parse_common_option(int opt, const char *optarg,
 
 		core_parsed = LCORE_OPT_MSK;
 		break;
+	}
 	/* corelist */
-	case 'l':
-		if (eal_parse_corelist(optarg) < 0) {
+	case 'l': {
+		int lcore_indexes[RTE_MAX_LCORE];
+
+		if (eal_service_cores_parsed())
+			RTE_LOG(WARNING, EAL,
+				"Service cores parsed before dataplane cores. Please ensure -l is before -s or -S\n");
+
+		if (eal_parse_corelist(optarg, lcore_indexes) < 0) {
+			RTE_LOG(ERR, EAL, "invalid core list syntax\n");
+			return -1;
+		}
+		if (update_lcore_config(lcore_indexes) < 0) {
+			char *available = available_cores();
+
 			RTE_LOG(ERR, EAL,
-				"invalid core list, please check core numbers are in [0, %u] range\n",
-					cfg->lcore_count-1);
+				"invalid core list, please check specified cores are part of %s\n",
+				available);
+			free(available);
 			return -1;
 		}
 
@@ -1171,6 +1255,7 @@ eal_parse_common_option(int opt, const char *optarg,
 
 		core_parsed = LCORE_OPT_LST;
 		break;
+	}
 	/* service coremask */
 	case 's':
 		if (eal_parse_service_coremask(optarg) < 0) {
@@ -1342,10 +1427,9 @@ eal_auto_detect_cores(struct rte_config *cfg)
 	unsigned int lcore_id;
 	unsigned int removed = 0;
 	rte_cpuset_t affinity_set;
-	pthread_t tid = pthread_self();
 
-	if (pthread_getaffinity_np(tid, sizeof(rte_cpuset_t),
-				&affinity_set) < 0)
+	if (pthread_getaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+				&affinity_set))
 		CPU_ZERO(&affinity_set);
 
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
@@ -1359,6 +1443,31 @@ eal_auto_detect_cores(struct rte_config *cfg)
 	cfg->lcore_count -= removed;
 }
 
+static void
+compute_ctrl_threads_cpuset(struct internal_config *internal_cfg)
+{
+	rte_cpuset_t *cpuset = &internal_cfg->ctrl_cpuset;
+	rte_cpuset_t default_set;
+	unsigned int lcore_id;
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (eal_cpu_detected(lcore_id) &&
+				rte_lcore_has_role(lcore_id, ROLE_OFF)) {
+			CPU_SET(lcore_id, cpuset);
+		}
+	}
+
+	if (pthread_getaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
+				&default_set))
+		CPU_ZERO(&default_set);
+
+	RTE_CPU_AND(cpuset, cpuset, &default_set);
+
+	/* if no detected CPU is off, use master core */
+	if (!CPU_COUNT(cpuset))
+		CPU_SET(rte_get_master_lcore(), cpuset);
+}
+
 int
 eal_cleanup_config(struct internal_config *internal_cfg)
 {
@@ -1392,6 +1501,8 @@ eal_adjust_config(struct internal_config *internal_cfg)
 		lcore_config[cfg->master_lcore].core_role = ROLE_RTE;
 	}
 
+	compute_ctrl_threads_cpuset(internal_cfg);
+
 	/* if no memory amounts were requested, this will result in 0 and
 	 * will be overridden later, right after eal_hugepage_info_init() */
 	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index b46d644b..852e52e0 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -285,7 +285,15 @@ read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 			break;
 		}
 	}
-
+	/* sanity-check the response */
+	if (m->msg.num_fds < 0 || m->msg.num_fds > RTE_MP_MAX_FD_NUM) {
+		RTE_LOG(ERR, EAL, "invalid number of fd's received\n");
+		return -1;
+	}
+	if (m->msg.len_param < 0 || m->msg.len_param > RTE_MP_MAX_PARAM_LEN) {
+		RTE_LOG(ERR, EAL, "invalid received data length\n");
+		return -1;
+	}
 	return 0;
 }
 
@@ -678,11 +686,6 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
 			unlink(dst_path);
 			return 0;
 		}
-		if (errno == ENOBUFS) {
-			RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
-				dst_path);
-			return 0;
-		}
 		RTE_LOG(ERR, EAL, "failed to send to (%s) due to %s\n",
 			dst_path, strerror(errno));
 		return -1;
@@ -758,6 +761,18 @@ check_input(const struct rte_mp_msg *msg)
 	if (validate_action_name(msg->name))
 		return false;
 
+	if (msg->len_param < 0) {
+		RTE_LOG(ERR, EAL, "Message data length is negative\n");
+		rte_errno = EINVAL;
+		return false;
+	}
+
+	if (msg->num_fds < 0) {
+		RTE_LOG(ERR, EAL, "Number of fd's is negative\n");
+		rte_errno = EINVAL;
+		return false;
+	}
+
 	if (msg->len_param > RTE_MP_MAX_PARAM_LEN) {
 		RTE_LOG(ERR, EAL, "Message data is too long\n");
 		rte_errno = E2BIG;
@@ -919,7 +934,7 @@ int __rte_experimental
 rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		const struct timespec *ts)
 {
-	int dir_fd, ret = 0;
+	int dir_fd, ret = -1;
 	DIR *mp_dir;
 	struct dirent *ent;
 	struct timeval now;
@@ -927,13 +942,13 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 
 	RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
 
-	if (check_input(req) == false)
-		return -1;
-
 	reply->nb_sent = 0;
 	reply->nb_received = 0;
 	reply->msgs = NULL;
 
+	if (check_input(req) == false)
+		goto end;
+
 	if (internal_config.no_shconf) {
 		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
 		return 0;
@@ -942,7 +957,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	if (gettimeofday(&now, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "Failed to get current time\n");
 		rte_errno = errno;
-		return -1;
+		goto end;
 	}
 
 	end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
@@ -954,7 +969,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		pthread_mutex_lock(&pending_requests.lock);
 		ret = mp_request_sync(eal_mp_socket_path(), req, reply, &end);
 		pthread_mutex_unlock(&pending_requests.lock);
-		return ret;
+		goto end;
 	}
 
 	/* for primary process, broadcast request, and collect reply 1 by 1 */
@@ -962,7 +977,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	if (!mp_dir) {
 		RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
 		rte_errno = errno;
-		return -1;
+		goto end;
 	}
 
 	dir_fd = dirfd(mp_dir);
@@ -970,9 +985,8 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	if (flock(dir_fd, LOCK_SH)) {
 		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
 			mp_dir_path);
-		closedir(mp_dir);
 		rte_errno = errno;
-		return -1;
+		goto close_end;
 	}
 
 	pthread_mutex_lock(&pending_requests.lock);
@@ -989,14 +1003,25 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 		 * locks on receive
 		 */
 		if (mp_request_sync(path, req, reply, &end))
-			ret = -1;
+			goto unlock_end;
 	}
+	ret = 0;
+
+unlock_end:
 	pthread_mutex_unlock(&pending_requests.lock);
 	/* unlock the directory */
 	flock(dir_fd, LOCK_UN);
 
+close_end:
 	/* dir_fd automatically closed on closedir */
 	closedir(mp_dir);
+
+end:
+	if (ret) {
+		free(reply->msgs);
+		reply->nb_received = 0;
+		reply->msgs = NULL;
+	}
 	return ret;
 }
 
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 48ef4d6d..14f206c0 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -16,6 +16,7 @@
 #include <rte_memory.h>
 #include <rte_log.h>
 
+#include "eal_internal_cfg.h"
 #include "eal_private.h"
 #include "eal_thread.h"
 
@@ -168,10 +169,9 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 		const pthread_attr_t *attr,
 		void *(*start_routine)(void *), void *arg)
 {
+	rte_cpuset_t *cpuset = &internal_config.ctrl_cpuset;
 	struct rte_thread_ctrl_params *params;
-	unsigned int lcore_id;
-	rte_cpuset_t cpuset;
-	int cpu_found, ret;
+	int ret;
 
 	params = malloc(sizeof(*params));
 	if (!params)
@@ -195,21 +195,8 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 				"Cannot set name for ctrl thread\n");
 	}
 
-	cpu_found = 0;
-	CPU_ZERO(&cpuset);
-	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-		if (eal_cpu_detected(lcore_id) &&
-				rte_lcore_has_role(lcore_id, ROLE_OFF)) {
-			CPU_SET(lcore_id, &cpuset);
-			cpu_found = 1;
-		}
-	}
-	/* if no detected cpu is off, use master core */
-	if (!cpu_found)
-		CPU_SET(rte_get_master_lcore(), &cpuset);
-
-	ret = pthread_setaffinity_np(*thread, sizeof(cpuset), &cpuset);
-	if (ret < 0)
+	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
+	if (ret)
 		goto fail;
 
 	ret = pthread_barrier_wait(&params->configured);
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 783ce7de..189d4f5b 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -13,6 +13,8 @@
 #include <rte_eal.h>
 #include <rte_pci_dev_feature_defs.h>
 
+#include "eal_thread.h"
+
 #define MAX_HUGEPAGE_SIZES 3  /**< support up to 3 page sizes */
 
 /*
@@ -71,6 +73,7 @@ struct internal_config {
 	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
 	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
 	enum rte_iova_mode iova_mode ;    /**< Set IOVA mode on this system  */
+	rte_cpuset_t ctrl_cpuset;         /**< cpuset for ctrl threads */
 	volatile unsigned int init_complete;
 	/**< indicates whether EAL has completed initialization */
 };
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 327c95e9..1623ae8c 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -5,6 +5,8 @@
 #ifndef EAL_OPTIONS_H
 #define EAL_OPTIONS_H
 
+#include "getopt.h"
+
 enum {
 	/* long options mapped to a short option */
 #define OPT_HELP              "help"
diff --git a/lib/librte_eal/common/hotplug_mp.c b/lib/librte_eal/common/hotplug_mp.c
index 9d610a8a..7c3f38db 100644
--- a/lib/librte_eal/common/hotplug_mp.c
+++ b/lib/librte_eal/common/hotplug_mp.c
@@ -361,7 +361,7 @@ int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req)
 
 	ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
 	if (ret || mp_reply.nb_received != 1) {
-		RTE_LOG(ERR, EAL, "cannot send request to primary");
+		RTE_LOG(ERR, EAL, "Cannot send request to primary\n");
 		if (!ret)
 			return -1;
 		return ret;
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
index ce38350b..797381c0 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -63,11 +63,7 @@ extern "C" {
  * Guarantees that the STORE operations generated before the barrier
  * occur before the STORE operations generated after.
  */
-#ifdef RTE_ARCH_64
-#define	rte_wmb() asm volatile("lwsync" : : : "memory")
-#else
 #define	rte_wmb() asm volatile("sync" : : : "memory")
-#endif
 
 /**
  * Read memory barrier.
@@ -75,11 +71,7 @@ extern "C" {
  * Guarantees that the LOAD operations generated before the barrier
  * occur before the LOAD operations generated after.
  */
-#ifdef RTE_ARCH_64
-#define	rte_rmb() asm volatile("lwsync" : : : "memory")
-#else
 #define	rte_rmb() asm volatile("sync" : : : "memory")
-#endif
 
 #define rte_smp_mb() rte_mb()
 
diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h
index ac379e87..d318b91a 100644
--- a/lib/librte_eal/common/include/generic/rte_cycles.h
+++ b/lib/librte_eal/common/include/generic/rte_cycles.h
@@ -173,7 +173,7 @@ rte_delay_us_sleep(unsigned int us);
  *
  * @param userfunc
  *   User function which replaces rte_delay_us. rte_delay_us_block restores
- *   buildin block delay function.
+ *   builtin block delay function.
  */
 void rte_delay_us_callback_register(void(*userfunc)(unsigned int));
 
diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h
index 5751a0e6..2c284f0b 100644
--- a/lib/librte_eal/common/include/generic/rte_rwlock.h
+++ b/lib/librte_eal/common/include/generic/rte_rwlock.h
@@ -64,14 +64,14 @@ rte_rwlock_read_lock(rte_rwlock_t *rwl)
 	int success = 0;
 
 	while (success == 0) {
-		x = rwl->cnt;
+		x = __atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED);
 		/* write lock is held */
 		if (x < 0) {
 			rte_pause();
 			continue;
 		}
-		success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt,
-					      (uint32_t)x, (uint32_t)(x + 1));
+		success = __atomic_compare_exchange_n(&rwl->cnt, &x, x + 1, 1,
+					__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
 	}
 }
 
@@ -84,7 +84,7 @@ rte_rwlock_read_lock(rte_rwlock_t *rwl)
 static inline void
 rte_rwlock_read_unlock(rte_rwlock_t *rwl)
 {
-	rte_atomic32_dec((rte_atomic32_t *)(intptr_t)&rwl->cnt);
+	__atomic_fetch_sub(&rwl->cnt, 1, __ATOMIC_RELEASE);
 }
 
 /**
@@ -100,14 +100,14 @@ rte_rwlock_write_lock(rte_rwlock_t *rwl)
 	int success = 0;
 
 	while (success == 0) {
-		x = rwl->cnt;
+		x = __atomic_load_n(&rwl->cnt, __ATOMIC_RELAXED);
 		/* a lock is held */
 		if (x != 0) {
 			rte_pause();
 			continue;
 		}
-		success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt,
-					      0, (uint32_t)-1);
+		success = __atomic_compare_exchange_n(&rwl->cnt, &x, -1, 1,
+					__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
 	}
 }
 
@@ -120,7 +120,7 @@ rte_rwlock_write_lock(rte_rwlock_t *rwl)
 static inline void
 rte_rwlock_write_unlock(rte_rwlock_t *rwl)
 {
-	rte_atomic32_inc((rte_atomic32_t *)(intptr_t)&rwl->cnt);
+	__atomic_store_n(&rwl->cnt, 0, __ATOMIC_RELEASE);
 }
 
 /**
diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h
index c4c3fc31..87ae7a4f 100644
--- a/lib/librte_eal/common/include/generic/rte_spinlock.h
+++ b/lib/librte_eal/common/include/generic/rte_spinlock.h
@@ -61,9 +61,14 @@ rte_spinlock_lock(rte_spinlock_t *sl);
 static inline void
 rte_spinlock_lock(rte_spinlock_t *sl)
 {
-	while (__sync_lock_test_and_set(&sl->locked, 1))
-		while(sl->locked)
+	int exp = 0;
+
+	while (!__atomic_compare_exchange_n(&sl->locked, &exp, 1, 0,
+				__ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
+		while (__atomic_load_n(&sl->locked, __ATOMIC_RELAXED))
 			rte_pause();
+		exp = 0;
+	}
 }
 #endif
 
@@ -80,7 +85,7 @@ rte_spinlock_unlock (rte_spinlock_t *sl);
 static inline void
 rte_spinlock_unlock (rte_spinlock_t *sl)
 {
-	__sync_lock_release(&sl->locked);
+	__atomic_store_n(&sl->locked, 0, __ATOMIC_RELEASE);
 }
 #endif
 
@@ -99,7 +104,10 @@ rte_spinlock_trylock (rte_spinlock_t *sl);
 static inline int
 rte_spinlock_trylock (rte_spinlock_t *sl)
 {
-	return __sync_lock_test_and_set(&sl->locked,1) == 0;
+	int exp = 0;
+	return __atomic_compare_exchange_n(&sl->locked, &exp, 1,
+				0, /* disallow spurious failure */
+				__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
 }
 #endif
 
@@ -113,7 +121,7 @@ rte_spinlock_trylock (rte_spinlock_t *sl)
  */
 static inline int rte_spinlock_is_locked (rte_spinlock_t *sl)
 {
-	return sl->locked;
+	return __atomic_load_n(&sl->locked, __ATOMIC_ACQUIRE);
 }
 
 /**
diff --git a/lib/librte_eal/common/include/generic/rte_vect.h b/lib/librte_eal/common/include/generic/rte_vect.h
index 11c6475b..3fc47979 100644
--- a/lib/librte_eal/common/include/generic/rte_vect.h
+++ b/lib/librte_eal/common/include/generic/rte_vect.h
@@ -55,7 +55,7 @@ typedef uint16_t rte_v128u16_t __attribute__((vector_size(16), aligned(16)));
 /**
  * 128 bits vector size to use with unsigned 32 bits elements.
  *
- * a = (rte_v128u32_t){ a0, a1, a2, a3, a4 }
+ * a = (rte_v128u32_t){ a0, a1, a2, a3 }
  */
 typedef uint32_t rte_v128u32_t __attribute__((vector_size(16), aligned(16)));
 
diff --git a/lib/librte_eal/common/include/rte_class.h b/lib/librte_eal/common/include/rte_class.h
index 276c91e9..856d09b2 100644
--- a/lib/librte_eal/common/include/rte_class.h
+++ b/lib/librte_eal/common/include/rte_class.h
@@ -15,7 +15,7 @@
  *
  * A device class defines the type of function a device
  * will be used for e.g.: Ethernet adapter (eth),
- * cryptographic coprocessor (crypto), etc.
+ * cryptographic co-processor (crypto), etc.
  */
 
 #ifdef __cplusplus
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 66cdf60b..48bf28ca 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -340,7 +340,7 @@ rte_is_power_of_2(uint32_t n)
  * Aligns input parameter to the next power of 2
  *
  * @param x
- *   The integer value to algin
+ *   The integer value to align
  *
  * @return
  *   Input parameter aligned to the next power of 2
@@ -358,7 +358,7 @@ rte_align32pow2(uint32_t x)
  * Aligns input parameter to the previous power of 2
  *
  * @param x
- *   The integer value to algin
+ *   The integer value to align
  *
  * @return
  *   Input parameter aligned to the previous power of 2
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index a0cedd57..9951228e 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -228,6 +228,13 @@ struct rte_mp_reply {
  *
  * As we create  socket channel for primary/secondary communication, use
  * this function typedef to register action for coming messages.
+ *
+ * @note When handling IPC request callbacks, the reply must be sent even in
+ *   cases of error handling. Simply returning success or failure will *not*
+ *   send a response to the requestor.
+ *   Implementation of error signalling mechanism is up to the application.
+ *
+ * @note No memory allocations should take place inside the callback.
  */
 typedef int (*rte_mp_t)(const struct rte_mp_msg *msg, const void *peer);
 
@@ -237,6 +244,13 @@ typedef int (*rte_mp_t)(const struct rte_mp_msg *msg, const void *peer);
  * As we create socket channel for primary/secondary communication, use
  * this function typedef to register action for coming responses to asynchronous
  * requests.
+ *
+ * @note When handling IPC request callbacks, the reply must be sent even in
+ *   cases of error handling. Simply returning success or failure will *not*
+ *   send a response to the requestor.
+ *   Implementation of error signalling mechanism is up to the application.
+ *
+ * @note No memory allocations should take place inside the callback.
  */
 typedef int (*rte_mp_async_reply_t)(const struct rte_mp_msg *request,
 		const struct rte_mp_reply *reply);
@@ -287,7 +301,7 @@ rte_mp_action_unregister(const char *name);
  *
  * Send a message to the peer process.
  *
- * This function will send a message which will be responsed by the action
+ * This function will send a message which will be responded by the action
  * identified by name in the peer process.
  *
  * @param msg
@@ -311,6 +325,9 @@ rte_mp_sendmsg(struct rte_mp_msg *msg);
  *
  * @note The caller is responsible to free reply->replies.
  *
+ * @note This API must not be used inside memory-related or IPC callbacks, and
+ *   no memory allocations should take place inside such callback.
+ *
  * @param req
  *   The req argument contains the customized request message.
  *
@@ -364,6 +381,11 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
  * This function will send a reply message in response to a request message
  * received previously.
  *
+ * @note When handling IPC request callbacks, the reply must be sent even in
+ *   cases of error handling. Simply returning success or failure will *not*
+ *   send a response to the requestor.
+ *   Implementation of error signalling mechanism is up to the application.
+ *
  * @param msg
  *   The msg argument contains the customized message.
  *
@@ -424,7 +446,7 @@ rte_set_application_usage_hook(rte_usage_hook_t usage_func);
 #define RTE_EAL_TAILQ_RWLOCK         (&rte_eal_get_configuration()->mem_config->qlock)
 
 /**
- * macro to get the multiple lock of mempool shared by mutiple-instance
+ * macro to get the multiple lock of mempool shared by multiple-instance
  */
 #define RTE_EAL_MEMPOOL_RWLOCK            (&rte_eal_get_configuration()->mem_config->mplock)
 
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
index 6e09d918..dea17f50 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -23,10 +23,18 @@ extern "C" {
 #define LCORE_ID_ANY     UINT32_MAX       /**< Any lcore. */
 
 #if defined(__linux__)
-	typedef	cpu_set_t rte_cpuset_t;
+typedef	cpu_set_t rte_cpuset_t;
+#define RTE_CPU_AND(dst, src1, src2) CPU_AND(dst, src1, src2)
 #elif defined(__FreeBSD__)
 #include <pthread_np.h>
-	typedef cpuset_t rte_cpuset_t;
+typedef cpuset_t rte_cpuset_t;
+#define RTE_CPU_AND(dst, src1, src2) do \
+{ \
+	cpuset_t tmp; \
+	CPU_COPY(src1, &tmp); \
+	CPU_AND(&tmp, src2); \
+	CPU_COPY(&tmp, dst); \
+} while (0)
 #endif
 
 /**
@@ -280,8 +288,9 @@ int rte_thread_setname(pthread_t id, const char *name);
  * Create a control thread.
  *
  * Wrapper to pthread_create(), pthread_setname_np() and
- * pthread_setaffinity_np(). The dataplane and service lcores are
- * excluded from the affinity of the new thread.
+ * pthread_setaffinity_np(). The affinity of the new thread is based
+ * on the CPU affinity retrieved at the time rte_eal_init() was called,
+ * the dataplane and service lcores are then excluded.
  *
  * @param thread
  *   Filled with the thread id of the new created thread.
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index 2f789cb9..213043c4 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -36,7 +36,7 @@ struct rte_logs {
 	struct rte_log_dynamic_type *dynamic_types;
 };
 
-/** Global log informations */
+/** Global log information */
 extern struct rte_logs rte_logs;
 
 /* SDK log type */
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index 54a12467..e0be13ca 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -111,7 +111,7 @@ rte_calloc(const char *type, size_t num, size_t size, unsigned align);
 /**
  * Replacement function for realloc(), using huge-page memory. Reserved area
  * memory is resized, preserving contents. In NUMA systems, the new area
- * resides on the same NUMA socket as the old area.
+ * may not reside on the same NUMA node as the old one.
  *
  * @param ptr
  *   Pointer to already allocated memory
diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h
index 34b41aff..11f67350 100644
--- a/lib/librte_eal/common/include/rte_service.h
+++ b/lib/librte_eal/common/include/rte_service.h
@@ -337,7 +337,7 @@ int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable);
 int32_t rte_service_lcore_list(uint32_t array[], uint32_t n);
 
 /**
- * Get the numer of services running on the supplied lcore.
+ * Get the number of services running on the supplied lcore.
  *
  * @param lcore Id of the service core.
  * @retval >=0 Number of services registered to this core.
diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h
index 9a2a1ff9..35c6b003 100644
--- a/lib/librte_eal/common/include/rte_string_fns.h
+++ b/lib/librte_eal/common/include/rte_string_fns.h
@@ -59,10 +59,25 @@ rte_strlcpy(char *dst, const char *src, size_t size)
 	return (size_t)snprintf(dst, size, "%s", src);
 }
 
+/**
+ * @internal
+ * DPDK-specific version of strlcat for systems without
+ * libc or libbsd copies of the function
+ */
+static inline size_t
+rte_strlcat(char *dst, const char *src, size_t size)
+{
+	size_t l = strnlen(dst, size);
+	if (l < size)
+		return l + rte_strlcpy(&dst[l], src, size - l);
+	return l + strlen(src);
+}
+
 /* pull in a strlcpy function */
 #ifdef RTE_EXEC_ENV_BSDAPP
 #ifndef __BSD_VISIBLE /* non-standard functions are hidden */
 #define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
+#define strlcat(dst, src, size) rte_strlcat(dst, src, size)
 #endif
 
 #else /* non-BSD platforms */
@@ -71,6 +86,7 @@ rte_strlcpy(char *dst, const char *src, size_t size)
 
 #else /* no BSD header files, create own */
 #define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
+#define strlcat(dst, src, size) rte_strlcat(dst, src, size)
 
 #endif /* RTE_USE_LIBBSD */
 #endif /* BSDAPP */
diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h
index 9b01abb2..b6fe4e5f 100644
--- a/lib/librte_eal/common/include/rte_tailq.h
+++ b/lib/librte_eal/common/include/rte_tailq.h
@@ -53,7 +53,7 @@ struct rte_tailq_elem {
 };
 
 /**
- * Return the first tailq entry casted to the right struct.
+ * Return the first tailq entry cast to the right struct.
  */
 #define RTE_TAILQ_CAST(tailq_entry, struct_name) \
 	(struct struct_name *)&(tailq_entry)->tailq_head
diff --git a/lib/librte_eal/common/include/rte_uuid.h b/lib/librte_eal/common/include/rte_uuid.h
index 2c846b5f..16bbed32 100644
--- a/lib/librte_eal/common/include/rte_uuid.h
+++ b/lib/librte_eal/common/include/rte_uuid.h
@@ -43,7 +43,7 @@ extern "C" {
 #include <stdbool.h>
 
 /**
- * Struct describing a Universal Unique Identifer
+ * Struct describing a Universal Unique Identifier
  */
 typedef unsigned char rte_uuid_t[16];
 
@@ -105,7 +105,7 @@ int	rte_uuid_compare(const rte_uuid_t a, const rte_uuid_t b);
  * @param uu
  *    Destination UUID
  * @return
- *    Returns 0 on succes, and -1 if string is not a valid UUID.
+ *    Returns 0 on success, and -1 if string is not a valid UUID.
  */
 int	rte_uuid_parse(const char *in, rte_uuid_t uu);
 
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index b4c6dd3c..7c0b13b5 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -37,7 +37,7 @@ extern "C" {
 /**
  * Patch level number i.e. the z in yy.mm.z
  */
-#define RTE_VER_MINOR 1
+#define RTE_VER_MINOR 2
 
 /**
  * Extra string to be appended to version number
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index cae96fab..d837f1e7 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -178,7 +178,7 @@ int rte_vfio_noiommu_is_enabled(void);
  * an error on BSD.
  *
  * @param vfio_group_fd
- *   VFIO Grouup FD.
+ *   VFIO Group FD.
  *
  * @return
  *   0 on success.
@@ -291,6 +291,10 @@ rte_vfio_get_group_fd(int iommu_group_num);
  *       containers by default, user needs to manage DMA mappings for
  *       any container created by this API.
  *
+ * @note When creating containers using this API, the container will only be
+ *       available in the process that has created it. Sharing containers and
+ *       devices between multiple processes is not supported.
+ *
  * @return
  *   the container fd if successful
  *   <0 if failed
diff --git a/lib/librte_eal/common/malloc_mp.c b/lib/librte_eal/common/malloc_mp.c
index f3a13353..b470565e 100644
--- a/lib/librte_eal/common/malloc_mp.c
+++ b/lib/librte_eal/common/malloc_mp.c
@@ -501,7 +501,7 @@ handle_rollback_response(const struct rte_mp_msg *request,
 	/* lock the request */
 	pthread_mutex_lock(&mp_request_list.lock);
 
-	memset(&msg, 0, sizeof(0));
+	memset(&msg, 0, sizeof(msg));
 
 	entry = find_request_by_id(mpreq->id);
 	if (entry == NULL) {
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 30138b63..7a08cf1e 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -320,7 +320,7 @@ rte_eal_config_create(void)
 		rte_mem_cfg_addr = NULL;
 
 	if (mem_cfg_fd < 0){
-		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
+		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600);
 		if (mem_cfg_fd < 0)
 			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
 	}
@@ -1000,6 +1000,12 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	if (rte_eal_alarm_init() < 0) {
+		rte_eal_init_alert("Cannot init interrupt-handling thread");
+		/* rte_eal_alarm_init sets rte_errno on failure. */
+		return -1;
+	}
+
 	/* Put mp channel init before bus scan so that we can init the vdev
 	 * bus through mp channel in the secondary process before the bus scan.
 	 */
@@ -1120,12 +1126,6 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	if (rte_eal_alarm_init() < 0) {
-		rte_eal_init_alert("Cannot init interrupt-handling thread");
-		/* rte_eal_alarm_init sets rte_errno on failure. */
-		return -1;
-	}
-
 	if (rte_eal_timer_init() < 0) {
 		rte_eal_init_alert("Cannot init HPET or TSC timers");
 		rte_errno = ENOTSUP;
@@ -1214,8 +1214,11 @@ rte_eal_init(int argc, char **argv)
 	 * whether we are primary or secondary process, but we cannot remove
 	 * primary process' files because secondary should be able to run even
 	 * if primary process is dead.
+	 *
+	 * In no_shconf mode, no runtime directory is created in the first
+	 * place, so no cleanup needed.
 	 */
-	if (eal_clean_runtime_dir() < 0) {
+	if (!internal_config.no_shconf && eal_clean_runtime_dir() < 0) {
 		rte_eal_init_alert("Cannot clear runtime directory\n");
 		return -1;
 	}
diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
index 2830c868..c4180938 100644
--- a/lib/librte_eal/linuxapp/eal/eal_dev.c
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -66,8 +66,8 @@ static void sigbus_handler(int signum, siginfo_t *info,
 {
 	int ret;
 
-	RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
-		(int)pthread_self(), info->si_addr);
+	RTE_LOG(DEBUG, EAL, "Thread catch SIGBUS, fault address:%p\n",
+		info->si_addr);
 
 	rte_spinlock_lock(&failure_handle_lock);
 	ret = rte_bus_sigbus_handler(info->si_addr);
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 0eab1cf7..6e898c24 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -45,7 +45,7 @@ static void *
 map_shared_memory(const char *filename, const size_t mem_size, int flags)
 {
 	void *retval;
-	int fd = open(filename, flags, 0666);
+	int fd = open(filename, flags, 0600);
 	if (fd < 0)
 		return NULL;
 	if (ftruncate(fd, mem_size) < 0) {
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index f63d9ca6..81b441a9 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -740,6 +740,10 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 			__func__, socket_id, cur_socket_id);
 		goto mapped;
 	}
+#else
+	if (rte_socket_count() > 1)
+		RTE_LOG(DEBUG, EAL, "%s(): not checking hugepage NUMA node.\n",
+				__func__);
 #endif
 
 	ms->addr = addr;
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index e05da74c..898bdb77 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -46,6 +46,7 @@
 #include "eal_internal_cfg.h"
 #include "eal_filesystem.h"
 #include "eal_hugepages.h"
+#include "eal_options.h"
 
 #define PFN_MASK_SIZE	8
 
@@ -110,7 +111,7 @@ rte_mem_virt2phy(const void *virtaddr)
 
 	fd = open("/proc/self/pagemap", O_RDONLY);
 	if (fd < 0) {
-		RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
+		RTE_LOG(INFO, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
 			__func__, strerror(errno));
 		return RTE_BAD_IOVA;
 	}
@@ -118,7 +119,7 @@ rte_mem_virt2phy(const void *virtaddr)
 	virt_pfn = (unsigned long)virtaddr / page_size;
 	offset = sizeof(uint64_t) * virt_pfn;
 	if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
-		RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
+		RTE_LOG(INFO, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
 				__func__, strerror(errno));
 		close(fd);
 		return RTE_BAD_IOVA;
@@ -127,11 +128,11 @@ rte_mem_virt2phy(const void *virtaddr)
 	retval = read(fd, &page, PFN_MASK_SIZE);
 	close(fd);
 	if (retval < 0) {
-		RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
+		RTE_LOG(INFO, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
 				__func__, strerror(errno));
 		return RTE_BAD_IOVA;
 	} else if (retval != PFN_MASK_SIZE) {
-		RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap "
+		RTE_LOG(INFO, EAL, "%s(): read %d bytes from /proc/self/pagemap "
 				"but expected %d:\n",
 				__func__, retval, PFN_MASK_SIZE);
 		return RTE_BAD_IOVA;
@@ -536,7 +537,7 @@ create_shared_memory(const char *filename, const size_t mem_size)
 		return retval;
 	}
 
-	fd = open(filename, O_CREAT | O_RDWR, 0666);
+	fd = open(filename, O_CREAT | O_RDWR, 0600);
 	if (fd < 0)
 		return NULL;
 	if (ftruncate(fd, mem_size) < 0) {
@@ -1392,7 +1393,7 @@ eal_legacy_hugepage_init(void)
 		if (mcfg->dma_maskbits &&
 		    rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
 			RTE_LOG(ERR, EAL,
-				"%s(): couldnt allocate memory due to IOVA exceeding limits of current DMA mask.\n",
+				"%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n",
 				__func__);
 			if (rte_eal_iova_mode() == RTE_IOVA_VA &&
 			    rte_eal_using_phys_addrs())
@@ -2038,7 +2039,8 @@ memseg_primary_init_32(void)
 		socket_id = rte_socket_id_by_idx(i);
 
 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
-		if (socket_id > 0)
+		/* we can still sort pages by socket in legacy mode */
+		if (!internal_config.legacy_mem && socket_id > 0)
 			break;
 #endif
 
@@ -2219,7 +2221,8 @@ memseg_primary_init(void)
 			int socket_id = rte_socket_id_by_idx(i);
 
 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
-			if (socket_id > 0)
+			/* we can still sort pages by socket in legacy mode */
+			if (!internal_config.legacy_mem && socket_id > 0)
 				break;
 #endif
 			memtypes[cur_type].page_sz = hugepage_sz;
@@ -2378,6 +2381,13 @@ rte_eal_memseg_init(void)
 	} else {
 		RTE_LOG(ERR, EAL, "Cannot get current resource limits\n");
 	}
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+	if (!internal_config.legacy_mem && rte_socket_count() > 1) {
+		RTE_LOG(WARNING, EAL, "DPDK is running on a NUMA system, but is compiled without NUMA support.\n");
+		RTE_LOG(WARNING, EAL, "This will have adverse consequences for performance and usability.\n");
+		RTE_LOG(WARNING, EAL, "Please use --"OPT_LEGACY_MEM" option, or recompile with NUMA support.\n");
+	}
+#endif
 
 	return rte_eal_process_type() == RTE_PROC_PRIMARY ?
 #ifndef RTE_ARCH_64
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 5afa0871..5db5a133 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -127,7 +127,7 @@ struct rte_kni_device_info {
 	/* mbuf size */
 	unsigned mbuf_size;
 	unsigned int mtu;
-	char mac_addr[6];
+	uint8_t mac_addr[6];
 };
 
 #define KNI_DEVICE "kni"
diff --git a/lib/librte_efd/rte_efd.h b/lib/librte_efd/rte_efd.h
index 2ace008e..c2be4c09 100644
--- a/lib/librte_efd/rte_efd.h
+++ b/lib/librte_efd/rte_efd.h
@@ -191,7 +191,7 @@ rte_efd_find_existing(const char *name);
  *     This operation was still successful, and entry contains a valid update
  *  RTE_EFD_UPDATE_FAILED
  *     Either the EFD failed to find a suitable perfect hash or the group was full
- *     This is a fatal error, and the table is now in an indeterminite state
+ *     This is a fatal error, and the table is now in an indeterminate state
  *  RTE_EFD_UPDATE_NO_CHANGE
  *     Operation resulted in no change to the table (same value already exists)
  *  0 - success
diff --git a/lib/librte_ethdev/rte_eth_ctrl.h b/lib/librte_ethdev/rte_eth_ctrl.h
index 5ea8ae24..925a63f2 100644
--- a/lib/librte_ethdev/rte_eth_ctrl.h
+++ b/lib/librte_ethdev/rte_eth_ctrl.h
@@ -589,7 +589,7 @@ struct rte_eth_fdir_masks {
 	uint16_t vlan_tci_mask;   /**< Bit mask for vlan_tci in big endian */
 	/** Bit mask for ipv4 flow in big endian. */
 	struct rte_eth_ipv4_flow   ipv4_mask;
-	/** Bit maks for ipv6 flow in big endian. */
+	/** Bit mask for ipv6 flow in big endian. */
 	struct rte_eth_ipv6_flow   ipv6_mask;
 	/** Bit mask for L4 source port in big endian. */
 	uint16_t src_port_mask;
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 9d5107dc..191658da 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -48,7 +48,6 @@ int rte_eth_dev_logtype;
 
 static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
 struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
-static uint16_t eth_dev_last_created_port;
 
 /* spinlock for eth device callbacks */
 static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
@@ -431,8 +430,6 @@ eth_dev_get(uint16_t port_id)
 
 	eth_dev->data = &rte_eth_dev_shared_data->data[port_id];
 
-	eth_dev_last_created_port = port_id;
-
 	return eth_dev;
 }
 
@@ -1646,7 +1643,7 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	     local_conf.offloads) {
 		RTE_ETHDEV_LOG(ERR,
 			"Ethdev port_id=%d rx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
-			"within pre-queue offload capabilities 0x%"PRIx64" in %s()\n",
+			"within per-queue offload capabilities 0x%"PRIx64" in %s()\n",
 			port_id, rx_queue_id, local_conf.offloads,
 			dev_info.rx_queue_offload_capa,
 			__func__);
@@ -1750,7 +1747,7 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 	     local_conf.offloads) {
 		RTE_ETHDEV_LOG(ERR,
 			"Ethdev port_id=%d tx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
-			"within pre-queue offload capabilities 0x%"PRIx64" in %s()\n",
+			"within per-queue offload capabilities 0x%"PRIx64" in %s()\n",
 			port_id, tx_queue_id, local_conf.offloads,
 			dev_info.tx_queue_offload_capa,
 			__func__);
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index a3c864a1..0e353619 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -1135,7 +1135,7 @@ struct rte_eth_dev_info {
 
 /**
  * Ethernet device RX queue information structure.
- * Used to retieve information about configured queue.
+ * Used to retrieve information about configured queue.
  */
 struct rte_eth_rxq_info {
 	struct rte_mempool *mp;     /**< mempool used by that queue. */
@@ -1551,14 +1551,14 @@ const char *rte_eth_dev_tx_offload_name(uint64_t offload);
  *        Applications should set the ignore_bitfield_offloads bit on *rxmode*
  *        structure and use offloads field to set per-port offloads instead.
  *     -  Any offloading set in eth_conf->[rt]xmode.offloads must be within
- *        the [rt]x_offload_capa returned from rte_eth_dev_infos_get().
+ *        the [rt]x_offload_capa returned from rte_eth_dev_info_get().
  *        Any type of device supported offloading set in the input argument
  *        eth_conf->[rt]xmode.offloads to rte_eth_dev_configure() is enabled
  *        on all queues and it can't be disabled in rte_eth_[rt]x_queue_setup()
  *     -  the Receive Side Scaling (RSS) configuration when using multiple RX
  *        queues per port. Any RSS hash function set in eth_conf->rss_conf.rss_hf
  *        must be within the flow_type_rss_offloads provided by drivers via
- *        rte_eth_dev_infos_get() API.
+ *        rte_eth_dev_info_get() API.
  *
  *   Embedding all configuration information in a single data structure
  *   is the more flexible method that allows the addition of new features
@@ -2101,7 +2101,7 @@ rte_eth_xstats_get_names_by_id(uint16_t port_id,
  *   A pointer to an ids array passed by application. This tells which
  *   statistics values function should retrieve. This parameter
  *   can be set to NULL if size is 0. In this case function will retrieve
- *   all avalible statistics.
+ *   all available statistics.
  * @param values
  *   A pointer to a table to be filled with device statistics values.
  * @param size
@@ -2542,7 +2542,7 @@ rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent,
 /**
  * Request the driver to free mbufs currently cached by the driver. The
  * driver will only free the mbuf if it is no longer in use. It is the
- * application's responsibity to ensure rte_eth_tx_buffer_flush(..) is
+ * application's responsibility to ensure rte_eth_tx_buffer_flush(..) is
  * called if needed.
  *
  * @param port_id
diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
index 8f03f83f..16300b14 100644
--- a/lib/librte_ethdev/rte_ethdev_core.h
+++ b/lib/librte_ethdev/rte_ethdev_core.h
@@ -105,7 +105,7 @@ typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev,
 
 typedef void (*eth_dev_infos_get_t)(struct rte_eth_dev *dev,
 				    struct rte_eth_dev_info *dev_info);
-/**< @internal Get specific informations of an Ethernet device. */
+/**< @internal Get specific information of an Ethernet device. */
 
 typedef const uint32_t *(*eth_dev_supported_ptypes_get_t)(struct rte_eth_dev *dev);
 /**< @internal Get supported ptypes of an Ethernet device. */
@@ -367,7 +367,7 @@ typedef int (*eth_tm_ops_get_t)(struct rte_eth_dev *dev, void *ops);
 /**< @internal Get Traffic Management (TM) operations on an Ethernet device */
 
 typedef int (*eth_mtr_ops_get_t)(struct rte_eth_dev *dev, void *ops);
-/**< @internal Get Trafffic Metering and Policing (MTR) operations */
+/**< @internal Get Traffic Metering and Policing (MTR) operations */
 
 typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev,
 				 struct rte_eth_dcb_info *dcb_info);
diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h
index c2ac2632..f2274809 100644
--- a/lib/librte_ethdev/rte_ethdev_driver.h
+++ b/lib/librte_ethdev/rte_ethdev_driver.h
@@ -317,7 +317,7 @@ typedef int (*ethdev_uninit_t)(struct rte_eth_dev *ethdev);
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
  *
- * PMD helper function for cleaing up the resources of a ethdev port on it's
+ * PMD helper function for cleaning up the resources of a ethdev port on it's
  * destruction.
  *
  * @param ethdev
diff --git a/lib/librte_ethdev/rte_tm.h b/lib/librte_ethdev/rte_tm.h
index 646ef388..af23c0f5 100644
--- a/lib/librte_ethdev/rte_tm.h
+++ b/lib/librte_ethdev/rte_tm.h
@@ -378,7 +378,7 @@ struct rte_tm_capabilities {
 	uint32_t sched_wfq_weight_max;
 
 	/** WRED packet mode support. When non-zero, this parameter indicates
-	 * that there is atleast one leaf node that supports the WRED packet
+	 * that there is at least one leaf node that supports the WRED packet
 	 * mode, which might not be true for all the leaf nodes. In packet
 	 * mode, the WRED thresholds specify the queue length in packets, as
 	 * opposed to bytes.
@@ -386,7 +386,7 @@ struct rte_tm_capabilities {
 	int cman_wred_packet_mode_supported;
 
 	/** WRED byte mode support. When non-zero, this parameter indicates that
-	 * there is atleast one leaf node that supports the WRED byte mode,
+	 * there is at least one leaf node that supports the WRED byte mode,
 	 * which might not be true for all the leaf nodes. In byte mode, the
 	 * WRED thresholds specify the queue length in bytes, as opposed to
 	 * packets.
@@ -645,7 +645,7 @@ struct rte_tm_level_capabilities {
 			uint32_t shaper_shared_n_max;
 
 			/** WRED packet mode support. When non-zero, this
-			 * parameter indicates that there is atleast one leaf
+			 * parameter indicates that there is at least one leaf
 			 * node on this level that supports the WRED packet
 			 * mode, which might not be true for all the leaf
 			 * nodes. In packet mode, the WRED thresholds specify
@@ -654,7 +654,7 @@ struct rte_tm_level_capabilities {
 			int cman_wred_packet_mode_supported;
 
 			/** WRED byte mode support. When non-zero, this
-			 * parameter indicates that there is atleast one leaf
+			 * parameter indicates that there is at least one leaf
 			 * node on this level that supports the WRED byte mode,
 			 * which might not be true for all the leaf nodes. In
 			 * byte mode, the WRED thresholds specify the queue
diff --git a/lib/librte_eventdev/rte_event_crypto_adapter.c b/lib/librte_eventdev/rte_event_crypto_adapter.c
index 11b28ca9..5faf3c90 100644
--- a/lib/librte_eventdev/rte_event_crypto_adapter.c
+++ b/lib/librte_eventdev/rte_event_crypto_adapter.c
@@ -159,6 +159,9 @@ eca_default_config_cb(uint8_t id, uint8_t dev_id,
 	struct rte_event_port_conf *port_conf = arg;
 	struct rte_event_crypto_adapter *adapter = eca_id_to_adapter(id);
 
+	if (adapter == NULL)
+		return -EINVAL;
+
 	dev = &rte_eventdevs[adapter->eventdev_id];
 	dev_conf = dev->data->dev_conf;
 
@@ -353,7 +356,7 @@ eca_enq_to_cryptodev(struct rte_event_crypto_adapter *adapter,
 			cdev_id = m_data->request_info.cdev_id;
 			qp_id = m_data->request_info.queue_pair_id;
 			qp_info = &adapter->cdevs[cdev_id].qpairs[qp_id];
-			if (qp_info == NULL) {
+			if (!qp_info->qp_enabled) {
 				rte_pktmbuf_free(crypto_op->sym->m_src);
 				rte_crypto_op_free(crypto_op);
 				continue;
@@ -369,7 +372,7 @@ eca_enq_to_cryptodev(struct rte_event_crypto_adapter *adapter,
 			cdev_id = m_data->request_info.cdev_id;
 			qp_id = m_data->request_info.queue_pair_id;
 			qp_info = &adapter->cdevs[cdev_id].qpairs[qp_id];
-			if (qp_info == NULL) {
+			if (!qp_info->qp_enabled) {
 				rte_pktmbuf_free(crypto_op->sym->m_src);
 				rte_crypto_op_free(crypto_op);
 				continue;
@@ -427,10 +430,9 @@ eca_crypto_enq_flush(struct rte_event_crypto_adapter *adapter)
 	ret = 0;
 	for (cdev_id = 0; cdev_id < num_cdev; cdev_id++) {
 		curr_dev = &adapter->cdevs[cdev_id];
-		if (curr_dev == NULL)
-			continue;
 		dev = curr_dev->dev;
-
+		if (dev == NULL)
+			continue;
 		for (qp = 0; qp < dev->data->nb_queue_pairs; qp++) {
 
 			curr_queue = &curr_dev->qpairs[qp];
@@ -579,9 +581,9 @@ eca_crypto_adapter_deq_run(struct rte_event_crypto_adapter *adapter,
 		for (cdev_id = adapter->next_cdev_id;
 			cdev_id < num_cdev; cdev_id++) {
 			curr_dev = &adapter->cdevs[cdev_id];
-			if (curr_dev == NULL)
-				continue;
 			dev = curr_dev->dev;
+			if (dev == NULL)
+				continue;
 			dev_qps = dev->data->nb_queue_pairs;
 
 			for (qp = curr_dev->next_queue_pair_id;
diff --git a/lib/librte_eventdev/rte_event_crypto_adapter.h b/lib/librte_eventdev/rte_event_crypto_adapter.h
index d367309c..9ac8e6f4 100644
--- a/lib/librte_eventdev/rte_event_crypto_adapter.h
+++ b/lib/librte_eventdev/rte_event_crypto_adapter.h
@@ -139,7 +139,7 @@
  *  - rte_event_crypto_adapter_stats_get()
  *  - rte_event_crypto_adapter_stats_reset()
 
- * The applicaton creates an instance using rte_event_crypto_adapter_create()
+ * The application creates an instance using rte_event_crypto_adapter_create()
  * or rte_event_crypto_adapter_create_ext().
  *
  * Cryptodev queue pair addition/deletion is done using the
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
index 8d178be1..627875a9 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
@@ -872,7 +872,7 @@ rxa_eth_rx(struct rte_event_eth_rx_adapter *rx_adapter,
 			break;
 	}
 
-	if (buf->count >= BATCH_SIZE)
+	if (buf->count > 0)
 		rxa_flush_event_buffer(rx_adapter);
 
 	return nb_rx;
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.h b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
index 863b72a1..bb14bb2d 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.h
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
@@ -66,9 +66,9 @@
  * For SW based packet transfers, i.e., when the
  * RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT is not set in the adapter's
  * capabilities flags for a particular ethernet device, the service function
- * temporarily enqueues mbufs to an event buffer before batch enqueueing these
+ * temporarily enqueues mbufs to an event buffer before batch enqueuing these
  * to the event device. If the buffer fills up, the service function stops
- * dequeueing packets from the ethernet device. The application may want to
+ * dequeuing packets from the ethernet device. The application may want to
  * monitor the buffer fill level and instruct the service function to
  * selectively buffer packets. The application may also use some other
  * criteria to decide which packets should enter the event device even when
diff --git a/lib/librte_eventdev/rte_event_eth_tx_adapter.h b/lib/librte_eventdev/rte_event_eth_tx_adapter.h
index 81456d4a..7a4a01fa 100644
--- a/lib/librte_eventdev/rte_event_eth_tx_adapter.h
+++ b/lib/librte_eventdev/rte_event_eth_tx_adapter.h
@@ -365,7 +365,8 @@ rte_event_eth_tx_adapter_event_port_get(uint8_t id, uint8_t *event_port_id);
  *  which contain the event object enqueue operations to be processed.
  * @param nb_events
  *  The number of event objects to enqueue, typically number of
- *  rte_event_port_enqueue_depth() available for this port.
+ *  rte_event_port_attr_get(...RTE_EVENT_PORT_ATTR_ENQ_DEPTH...)
+ *  available for this port.
  *
  * @return
  *   The number of event objects actually enqueued on the event device. The
diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h
index ef10a855..38608114 100644
--- a/lib/librte_eventdev/rte_eventdev.h
+++ b/lib/librte_eventdev/rte_eventdev.h
@@ -1155,7 +1155,7 @@ rte_event_timer_adapter_caps_get(uint8_t dev_id, uint32_t *caps);
  */
 
 #define RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA   0x8
-/**< Flag indicates HW/SW suports a mechanism to store and retrieve
+/**< Flag indicates HW/SW supports a mechanism to store and retrieve
  * the private data information along with the crypto session.
  */
 
@@ -1366,7 +1366,8 @@ __rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
  *   which contain the event object enqueue operations to be processed.
  * @param nb_events
  *   The number of event objects to enqueue, typically number of
- *   rte_event_port_enqueue_depth() available for this port.
+ *   rte_event_port_attr_get(...RTE_EVENT_PORT_ATTR_ENQ_DEPTH...)
+ *   available for this port.
  *
  * @return
  *   The number of event objects actually enqueued on the event device. The
@@ -1381,7 +1382,7 @@ __rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
  *   - -ENOSPC  The event port was backpressured and unable to enqueue
  *              one or more events. This error code is only applicable to
  *              closed systems.
- * @see rte_event_port_enqueue_depth()
+ * @see rte_event_port_attr_get(), RTE_EVENT_PORT_ATTR_ENQ_DEPTH
  */
 static inline uint16_t
 rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
@@ -1415,7 +1416,8 @@ rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
  *   which contain the event object enqueue operations to be processed.
  * @param nb_events
  *   The number of event objects to enqueue, typically number of
- *   rte_event_port_enqueue_depth() available for this port.
+ *   rte_event_port_attr_get(...RTE_EVENT_PORT_ATTR_ENQ_DEPTH...)
+ *   available for this port.
  *
  * @return
  *   The number of event objects actually enqueued on the event device. The
@@ -1430,7 +1432,8 @@ rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id,
  *   - -ENOSPC  The event port was backpressured and unable to enqueue
  *              one or more events. This error code is only applicable to
  *              closed systems.
- * @see rte_event_port_enqueue_depth() rte_event_enqueue_burst()
+ * @see rte_event_port_attr_get(), RTE_EVENT_PORT_ATTR_ENQ_DEPTH
+ * @see rte_event_enqueue_burst()
  */
 static inline uint16_t
 rte_event_enqueue_new_burst(uint8_t dev_id, uint8_t port_id,
@@ -1464,7 +1467,8 @@ rte_event_enqueue_new_burst(uint8_t dev_id, uint8_t port_id,
  *   which contain the event object enqueue operations to be processed.
  * @param nb_events
  *   The number of event objects to enqueue, typically number of
- *   rte_event_port_enqueue_depth() available for this port.
+ *   rte_event_port_attr_get(...RTE_EVENT_PORT_ATTR_ENQ_DEPTH...)
+ *   available for this port.
  *
  * @return
  *   The number of event objects actually enqueued on the event device. The
@@ -1479,7 +1483,8 @@ rte_event_enqueue_new_burst(uint8_t dev_id, uint8_t port_id,
  *   - -ENOSPC  The event port was backpressured and unable to enqueue
  *              one or more events. This error code is only applicable to
  *              closed systems.
- * @see rte_event_port_enqueue_depth() rte_event_enqueue_burst()
+ * @see rte_event_port_attr_get(), RTE_EVENT_PORT_ATTR_ENQ_DEPTH
+ * @see rte_event_enqueue_burst()
  */
 static inline uint16_t
 rte_event_enqueue_forward_burst(uint8_t dev_id, uint8_t port_id,
@@ -1737,7 +1742,7 @@ rte_event_port_unlink(uint8_t dev_id, uint8_t port_id,
  * @see rte_event_port_unlink() to issue unlink requests.
  *
  * @param dev_id
- *   The indentifier of the device.
+ *   The identifier of the device.
  *
  * @param port_id
  *   Event port identifier to select port to check for unlinks in progress.
diff --git a/lib/librte_eventdev/rte_eventdev_pmd.h b/lib/librte_eventdev/rte_eventdev_pmd.h
index 1a01326b..d118b9e5 100644
--- a/lib/librte_eventdev/rte_eventdev_pmd.h
+++ b/lib/librte_eventdev/rte_eventdev_pmd.h
@@ -873,7 +873,7 @@ typedef int (*eventdev_eth_tx_adapter_free_t)(uint8_t id,
  *   Ethernet device pointer
  *
  * @param tx_queue_id
- *   Transmt queue index
+ *   Transmit queue index
  *
  * @return
  *   - 0: Success.
diff --git a/lib/librte_flow_classify/rte_flow_classify.h b/lib/librte_flow_classify/rte_flow_classify.h
index 56e06353..01e88e54 100644
--- a/lib/librte_flow_classify/rte_flow_classify.h
+++ b/lib/librte_flow_classify/rte_flow_classify.h
@@ -208,7 +208,7 @@ rte_flow_classify_validate(struct rte_flow_classifier *cls,
 		struct rte_flow_error *error);
 
 /**
- * Add a flow classify rule to the flow_classifer table.
+ * Add a flow classify rule to the flow_classifier table.
  *
  * @param[in] cls
  *   Flow classifier handle
@@ -235,7 +235,7 @@ rte_flow_classify_table_entry_add(struct rte_flow_classifier *cls,
 		struct rte_flow_error *error);
 
 /**
- * Delete a flow classify rule from the flow_classifer table.
+ * Delete a flow classify rule from the flow_classifier table.
  *
  * @param[in] cls
  *   Flow classifier handle
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index c01489ba..d7a5f4c2 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -1536,14 +1536,19 @@ int __rte_experimental
 rte_hash_free_key_with_position(const struct rte_hash *h,
 				const int32_t position)
 {
-	RETURN_IF_TRUE(((h == NULL) || (position == EMPTY_SLOT)), -EINVAL);
+	/* Key index where key is stored, adding the first dummy index */
+	uint32_t key_idx = position + 1;
+
+	RETURN_IF_TRUE(((h == NULL) || (key_idx == EMPTY_SLOT)), -EINVAL);
 
 	unsigned int lcore_id, n_slots;
 	struct lcore_cache *cached_free_slots;
-	const int32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
+	const uint32_t total_entries = h->use_local_cache ?
+		h->entries + (RTE_MAX_LCORE - 1) * (LCORE_CACHE_SIZE - 1) + 1
+							: h->entries + 1;
 
 	/* Out of bounds */
-	if (position >= total_entries)
+	if (key_idx >= total_entries)
 		return -EINVAL;
 
 	if (h->use_local_cache) {
@@ -1560,11 +1565,11 @@ rte_hash_free_key_with_position(const struct rte_hash *h,
 		}
 		/* Put index of new free slot in cache. */
 		cached_free_slots->objs[cached_free_slots->len] =
-					(void *)((uintptr_t)position);
+					(void *)((uintptr_t)key_idx);
 		cached_free_slots->len++;
 	} else {
 		rte_ring_sp_enqueue(h->free_slots,
-				(void *)((uintptr_t)position));
+				(void *)((uintptr_t)key_idx));
 	}
 
 	return 0;
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index c93d1a13..4432aef7 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -39,7 +39,7 @@ extern "C" {
 /** Flag to support reader writer concurrency */
 #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04
 
-/** Flag to indicate the extendabe bucket table feature should be used */
+/** Flag to indicate the extendable bucket table feature should be used */
 #define RTE_HASH_EXTRA_FLAGS_EXT_TABLE 0x08
 
 /** Flag to disable freeing of key index on hash delete.
@@ -463,7 +463,7 @@ rte_hash_lookup_with_hash(const struct rte_hash *h,
 
 /**
  * Calc a hash value by key.
- * This operation is not multi-thread safe.
+ * This operation is not multi-process safe.
  *
  * @param h
  *   Hash table to look in.
diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
index 04fd9df5..bc4c100f 100644
--- a/lib/librte_ip_frag/rte_ip_frag.h
+++ b/lib/librte_ip_frag/rte_ip_frag.h
@@ -274,7 +274,7 @@ int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
 
 /**
  * This function implements reassembly of fragmented IPv4 packets.
- * Incoming mbufs should have its l2_len/l3_len fields setup correclty.
+ * Incoming mbufs should have its l2_len/l3_len fields setup correctly.
  *
  * @param tbl
  *   Table where to lookup/add the fragmented packet.
diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
index 02ca43b4..d44496c7 100644
--- a/lib/librte_kni/rte_kni.h
+++ b/lib/librte_kni/rte_kni.h
@@ -68,7 +68,7 @@ struct rte_kni_conf {
 
 	__extension__
 	uint8_t force_bind : 1; /* Flag to bind kernel thread */
-	char mac_addr[ETHER_ADDR_LEN]; /* MAC address assigned to KNI */
+	uint8_t mac_addr[ETHER_ADDR_LEN]; /* MAC address assigned to KNI */
 	uint16_t mtu;
 };
 
diff --git a/lib/librte_latencystats/rte_latencystats.h b/lib/librte_latencystats/rte_latencystats.h
index efcfa028..67120729 100644
--- a/lib/librte_latencystats/rte_latencystats.h
+++ b/lib/librte_latencystats/rte_latencystats.h
@@ -24,7 +24,7 @@ extern "C" {
  *  Note: This function pointer is for future flow based latency stats
  *  implementation.
  *
- * Function type used for identifting flow types of a Rx packet.
+ * Function type used for identifying flow types of a Rx packet.
  *
  * The callback function is called on Rx for each packet.
  * This function is used for flow based latency calculations.
diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index 21550444..b886f54b 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -442,7 +442,7 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips,
  * @param hop
  *   Next hop of the most specific rule found for IP (valid on lookup hit only).
  *   This is an 4 elements array of two byte values.
- *   If the lookup was succesfull for the given IP, then least significant byte
+ *   If the lookup was successful for the given IP, then least significant byte
  *   of the corresponding element is the  actual next hop and the most
  *   significant byte is zero.
  *   If the lookup for the given IP failed, then corresponding element would
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 3dbc6695..8eab5a83 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -209,7 +209,7 @@ extern "C" {
 /**
  * Outer UDP checksum offload flag. This flag is used for enabling
  * outer UDP checksum in PMD. To use outer UDP checksum, the user needs to
- * 1) Enable the following in mbuff,
+ * 1) Enable the following in mbuf,
  * a) Fill outer_l2_len and outer_l3_len in mbuf.
  * b) Set the PKT_TX_OUTER_UDP_CKSUM flag.
  * c) Set the PKT_TX_OUTER_IPV4 or PKT_TX_OUTER_IPV6 flag.
@@ -279,9 +279,11 @@ extern "C" {
 #define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
 
 /**
- * Second VLAN insertion (QinQ) flag.
+ * Double VLAN insertion (QinQ) request to driver, driver may offload the
+ * insertion based on device capability.
+ * mbuf 'vlan_tci' & 'vlan_tci_outer' must be valid when this flag is set.
  */
-#define PKT_TX_QINQ        (1ULL << 49)   /**< TX packet with double VLAN inserted. */
+#define PKT_TX_QINQ        (1ULL << 49)
 /* this old name is deprecated */
 #define PKT_TX_QINQ_PKT    PKT_TX_QINQ
 
@@ -337,7 +339,9 @@ extern "C" {
 #define PKT_TX_IPV6          (1ULL << 56)
 
 /**
- * TX packet is a 802.1q VLAN packet.
+ * VLAN tag insertion request to driver, driver may offload the insertion
+ * based on the device capability.
+ * mbuf 'vlan_tci' field must be valid when this flag is set.
  */
 #define PKT_TX_VLAN          (1ULL << 57)
 /* this old name is deprecated */
@@ -913,7 +917,7 @@ rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value)
 {
 	/*
 	 * The atomic_add is an expensive operation, so we don't want to
-	 * call it in the case where we know we are the uniq holder of
+	 * call it in the case where we know we are the unique holder of
 	 * this mbuf (i.e. ref_cnt == 1). Otherwise, an atomic
 	 * operation has to be used because concurrent accesses on the
 	 * reference counter can occur.
@@ -1286,7 +1290,7 @@ static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m)
  * The given mbuf must have only one segment.
  *
  * @param m
- *   The packet mbuf to be resetted.
+ *   The packet mbuf to be reset.
  */
 #define MBUF_INVALID_PORT UINT16_MAX
 
@@ -1459,7 +1463,7 @@ rte_pktmbuf_ext_shinfo_init_helper(void *buf_addr, uint16_t *buf_len,
  * ``rte_pktmbuf_detach()``.
  *
  * Memory for shared data must be provided and user must initialize all of
- * the content properly, escpecially free callback and refcnt. The pointer
+ * the content properly, especially free callback and refcnt. The pointer
  * of shared data will be stored in m->shinfo.
  * ``rte_pktmbuf_ext_shinfo_init_helper`` can help to simply spare a few
  * bytes at the end of buffer for the shared data, store free callback and
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
index 23bc635f..17a2dd35 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.h
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -426,7 +426,7 @@ extern "C" {
  */
 #define RTE_PTYPE_TUNNEL_ESP                0x00009000
 /**
- * L2TP (Layer 2 Tunneling Protocol) tunnleing packet type.
+ * L2TP (Layer 2 Tunneling Protocol) tunneling packet type.
  *
  * Packet format:
  * <'ether type'=0x0800
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 7c9cd9a2..fe2f3335 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -427,7 +427,7 @@ typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp,
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
  *
- * Dequeue a number of contiquous object blocks from the external pool.
+ * Dequeue a number of contiguous object blocks from the external pool.
  */
 typedef int (*rte_mempool_dequeue_contig_blocks_t)(struct rte_mempool *mp,
 		 void **first_obj_table, unsigned int n);
@@ -1364,7 +1364,7 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
 			&cache->objs[cache->len], req);
 		if (unlikely(ret < 0)) {
 			/*
-			 * In the offchance that we are buffer constrained,
+			 * In the off chance that we are buffer constrained,
 			 * where we are not able to allocate cache + n, go to
 			 * the ring directly. If that fails, we are truly out of
 			 * buffers.
diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h
index c2c5e249..e0d83111 100644
--- a/lib/librte_net/rte_ether.h
+++ b/lib/librte_net/rte_ether.h
@@ -408,7 +408,7 @@ static inline int rte_vlan_insert(struct rte_mbuf **m)
 	vh = (struct vlan_hdr *) (nh + 1);
 	vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci);
 
-	(*m)->ol_flags &= ~PKT_RX_VLAN_STRIPPED;
+	(*m)->ol_flags &= ~(PKT_RX_VLAN_STRIPPED | PKT_TX_VLAN);
 
 	return 0;
 }
diff --git a/lib/librte_power/power_acpi_cpufreq.c b/lib/librte_power/power_acpi_cpufreq.c
index cd5978d5..f7d3f9ca 100644
--- a/lib/librte_power/power_acpi_cpufreq.c
+++ b/lib/librte_power/power_acpi_cpufreq.c
@@ -12,8 +12,9 @@
 #include <signal.h>
 #include <limits.h>
 
-#include <rte_memcpy.h>
 #include <rte_atomic.h>
+#include <rte_memcpy.h>
+#include <rte_memory.h>
 
 #include "power_acpi_cpufreq.h"
 #include "power_common.h"
@@ -147,6 +148,8 @@ power_set_governor_userspace(struct rte_power_info *pi)
 
 	s = fgets(buf, sizeof(buf), f);
 	FOPS_OR_NULL_GOTO(s, out);
+	/* Strip off terminating '\n' */
+	strtok(buf, "\n");
 
 	/* Check if current governor is userspace */
 	if (strncmp(buf, POWER_GOVERNOR_USERSPACE,
diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c
index 208b7919..cc05b0fa 100644
--- a/lib/librte_power/rte_power.c
+++ b/lib/librte_power/rte_power.c
@@ -2,7 +2,7 @@
  * Copyright(c) 2010-2014 Intel Corporation
  */
 
-#include <rte_atomic.h>
+#include <rte_spinlock.h>
 
 #include "rte_power.h"
 #include "power_acpi_cpufreq.h"
@@ -11,7 +11,7 @@
 
 enum power_management_env global_default_env = PM_ENV_NOT_SET;
 
-volatile uint32_t global_env_cfg_status = 0;
+static rte_spinlock_t global_env_cfg_lock = RTE_SPINLOCK_INITIALIZER;
 
 /* function pointers */
 rte_power_freqs_t rte_power_freqs  = NULL;
@@ -29,9 +29,15 @@ rte_power_get_capabilities_t rte_power_get_capabilities;
 int
 rte_power_set_env(enum power_management_env env)
 {
-	if (rte_atomic32_cmpset(&global_env_cfg_status, 0, 1) == 0) {
+	rte_spinlock_lock(&global_env_cfg_lock);
+
+	if (global_default_env != PM_ENV_NOT_SET) {
+		rte_spinlock_unlock(&global_env_cfg_lock);
 		return 0;
 	}
+
+	int ret = 0;
+
 	if (env == PM_ENV_ACPI_CPUFREQ) {
 		rte_power_freqs = power_acpi_cpufreq_freqs;
 		rte_power_get_freq = power_acpi_cpufreq_get_freq;
@@ -59,19 +65,25 @@ rte_power_set_env(enum power_management_env env)
 	} else {
 		RTE_LOG(ERR, POWER, "Invalid Power Management Environment(%d) set\n",
 				env);
-		rte_power_unset_env();
-		return -1;
+		ret = -1;
 	}
-	global_default_env = env;
-	return 0;
+
+	if (ret == 0)
+		global_default_env = env;
+	else
+		global_default_env = PM_ENV_NOT_SET;
+
+	rte_spinlock_unlock(&global_env_cfg_lock);
+	return ret;
 
 }
 
 void
 rte_power_unset_env(void)
 {
-	if (rte_atomic32_cmpset(&global_env_cfg_status, 1, 0) != 0)
-		global_default_env = PM_ENV_NOT_SET;
+	rte_spinlock_lock(&global_env_cfg_lock);
+	global_default_env = PM_ENV_NOT_SET;
+	rte_spinlock_unlock(&global_env_cfg_lock);
 }
 
 enum power_management_env
diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h
index d70bc0b3..d7542c3f 100644
--- a/lib/librte_power/rte_power.h
+++ b/lib/librte_power/rte_power.h
@@ -25,7 +25,7 @@ enum power_management_env {PM_ENV_NOT_SET, PM_ENV_ACPI_CPUFREQ, PM_ENV_KVM_VM};
 /**
  * Set the default power management implementation. If this is not called prior
  * to rte_power_init(), then auto-detect of the environment will take place.
- * It is not thread safe.
+ * It is thread safe.
  *
  * @param env
  *  env. The environment in which to initialise Power Management for.
diff --git a/lib/librte_power/rte_power_empty_poll.c b/lib/librte_power/rte_power_empty_poll.c
index e6145462..15d4f050 100644
--- a/lib/librte_power/rte_power_empty_poll.c
+++ b/lib/librte_power/rte_power_empty_poll.c
@@ -156,11 +156,8 @@ update_training_stats(struct priority_worker *poll_stats,
 {
 	RTE_SET_USED(specific_freq);
 
-	char pfi_str[32];
 	uint64_t p0_empty_deq;
 
-	sprintf(pfi_str, "%02d", freq);
-
 	if (poll_stats->cur_freq == freq &&
 			poll_stats->thresh[freq].trained == false) {
 		if (poll_stats->thresh[freq].cur_train_iter == 0) {
diff --git a/lib/librte_power/rte_power_empty_poll.h b/lib/librte_power/rte_power_empty_poll.h
index c1ad5c24..33f24e0c 100644
--- a/lib/librte_power/rte_power_empty_poll.h
+++ b/lib/librte_power/rte_power_empty_poll.h
@@ -59,7 +59,7 @@ struct freq_threshold {
 	uint32_t cur_train_iter;
 };
 
-/* Each Worder Thread Empty Poll Stats */
+/* Each Worker Thread Empty Poll Stats */
 struct priority_worker {
 
 	/* Current dequeue and throughput counts */
diff --git a/lib/librte_rawdev/rte_rawdev.h b/lib/librte_rawdev/rte_rawdev.h
index 684bfdb8..ed011ca2 100644
--- a/lib/librte_rawdev/rte_rawdev.h
+++ b/lib/librte_rawdev/rte_rawdev.h
@@ -25,7 +25,7 @@ extern "C" {
 #include <rte_memory.h>
 #include <rte_errno.h>
 
-/* Rawdevice object - essentially a void to be typecasted by implementation */
+/* Rawdevice object - essentially a void to be typecast by implementation */
 typedef void *rte_rawdev_obj_t;
 
 /**
@@ -244,7 +244,7 @@ rte_rawdev_close(uint16_t dev_id);
  * @param dev_id
  *   Raw device identifiers
  * @return
- *   0 for sucessful reset,
+ *   0 for successful reset,
  *  !0 for failure in resetting
  */
 int
@@ -373,7 +373,7 @@ rte_rawdev_set_attr(uint16_t dev_id,
  * @param dev_id
  *   The identifier of the device to configure.
  * @param buffers
- *   Collection of buffers for enqueueing
+ *   Collection of buffers for enqueuing
  * @param count
  *   Count of buffers to enqueue
  * @param context
diff --git a/lib/librte_rawdev/rte_rawdev_pmd.h b/lib/librte_rawdev/rte_rawdev_pmd.h
index 811e51d0..5e6cf1d1 100644
--- a/lib/librte_rawdev/rte_rawdev_pmd.h
+++ b/lib/librte_rawdev/rte_rawdev_pmd.h
@@ -282,7 +282,7 @@ typedef uint16_t (*rawdev_queue_count_t)(struct rte_rawdev *dev);
  *   an opaque object representing context of the call; for example, an
  *   application can pass information about the queues on which enqueue needs
  *   to be done. Or, the enqueue operation might be passed reference to an
- *   object containing a callback (agreed upon between applicatio and driver).
+ *   object containing a callback (agreed upon between application and driver).
  *
  * @return
  *   >=0 Count of buffers successfully enqueued (0: no buffers enqueued)
@@ -463,7 +463,7 @@ typedef int (*rawdev_firmware_version_get_t)(struct rte_rawdev *dev,
 					     rte_rawdev_obj_t version_info);
 
 /**
- * Load firwmare from a buffer (DMA'able)
+ * Load firmware from a buffer (DMA'able)
  *
  * @param dev
  *   Raw device pointer
@@ -480,7 +480,7 @@ typedef int (*rawdev_firmware_load_t)(struct rte_rawdev *dev,
 				      rte_rawdev_obj_t firmware_buf);
 
 /**
- * Unload firwmare
+ * Unload firmware
  *
  * @param dev
  *   Raw device pointer
@@ -548,7 +548,7 @@ struct rte_rawdev_ops {
 	/**< Reset the statistics values in xstats. */
 	rawdev_xstats_reset_t xstats_reset;
 
-	/**< Obtainer firmware status */
+	/**< Obtain firmware status */
 	rawdev_firmware_status_get_t firmware_status_get;
 	/**< Obtain firmware version information */
 	rawdev_firmware_version_get_t firmware_version_get;
diff --git a/lib/librte_reorder/rte_reorder.h b/lib/librte_reorder/rte_reorder.h
index 1bcc2e32..6d397100 100644
--- a/lib/librte_reorder/rte_reorder.h
+++ b/lib/librte_reorder/rte_reorder.h
@@ -70,7 +70,7 @@ rte_reorder_init(struct rte_reorder_buffer *b, unsigned int bufsize,
  * and return a pointer to it.
  *
  * @param name
- *   Name of the reorder buffer instacne as passed to rte_reorder_create()
+ *   Name of the reorder buffer instance as passed to rte_reorder_create()
  * @return
  *   Pointer to reorder buffer instance or NULL if object not found with rte_errno
  *   set appropriately. Possible rte_errno values include:
diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c
index d215acec..550549db 100644
--- a/lib/librte_ring/rte_ring.c
+++ b/lib/librte_ring/rte_ring.c
@@ -189,7 +189,8 @@ rte_ring_free(struct rte_ring *r)
 	 * therefore, there is no memzone to free.
 	 */
 	if (r->memzone == NULL) {
-		RTE_LOG(ERR, RING, "Cannot free ring (not created with rte_ring_create()");
+		RTE_LOG(ERR, RING,
+			"Cannot free ring, not created with rte_ring_create()\n");
 		return;
 	}
 
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index af5444a9..e265e947 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -57,7 +57,7 @@ enum rte_ring_queue_behavior {
 };
 
 #define RTE_RING_MZ_PREFIX "RG_"
-/**< The maximum length of a ring name. */
+/** The maximum length of a ring name. */
 #define RTE_RING_NAMESIZE (RTE_MEMZONE_NAMESIZE - \
 			   sizeof(RTE_RING_MZ_PREFIX) + 1)
 
@@ -302,7 +302,7 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r);
  * (powerpc/arm).
  * There are 2 choices for the users
  * 1.use rmb() memory barrier
- * 2.use one-direcion load_acquire/store_release barrier,defined by
+ * 2.use one-direction load_acquire/store_release barrier,defined by
  * CONFIG_RTE_USE_C11_MEM_MODEL=y
  * It depends on performance test results.
  * By default, move common functions to rte_ring_generic.h
diff --git a/lib/librte_ring/rte_ring_generic.h b/lib/librte_ring/rte_ring_generic.h
index ea7dbe5b..953cdbbd 100644
--- a/lib/librte_ring/rte_ring_generic.h
+++ b/lib/librte_ring/rte_ring_generic.h
@@ -158,11 +158,14 @@ __rte_ring_move_cons_head(struct rte_ring *r, unsigned int is_sc,
 			return 0;
 
 		*new_head = *old_head + n;
-		if (is_sc)
-			r->cons.head = *new_head, success = 1;
-		else
+		if (is_sc) {
+			r->cons.head = *new_head;
+			rte_smp_rmb();
+			success = 1;
+		} else {
 			success = rte_atomic32_cmpset(&r->cons.head, *old_head,
 					*new_head);
+		}
 	} while (unlikely(success == 0));
 	return n;
 }
diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h
index 84fa896d..c1bdb1df 100644
--- a/lib/librte_sched/rte_sched.h
+++ b/lib/librte_sched/rte_sched.h
@@ -33,7 +33,7 @@ extern "C" {
  *	    classes of the same subport;
  *           - When any subport traffic class is oversubscribed
  *	    (configuration time event), the usage of subport member
- *	    pipes with high demand for thattraffic class pipes is
+ *	    pipes with high demand for that traffic class pipes is
  *	    truncated to a dynamically adjusted value with no
  *             impact to low demand pipes;
  *     3. Pipe:
diff --git a/lib/librte_security/rte_security.h b/lib/librte_security/rte_security.h
index 718147e0..7e6ced4e 100644
--- a/lib/librte_security/rte_security.h
+++ b/lib/librte_security/rte_security.h
@@ -534,7 +534,7 @@ struct rte_security_capability {
 			enum rte_security_pdcp_domain domain;
 			/**< PDCP mode of operation: Control or data */
 			uint32_t capa_flags;
-			/**< Capabilitity flags, see RTE_SECURITY_PDCP_* */
+			/**< Capability flags, see RTE_SECURITY_PDCP_* */
 		} pdcp;
 		/**< PDCP capability */
 	};
@@ -566,7 +566,7 @@ struct rte_security_capability {
 #define RTE_SECURITY_TX_HW_TRAILER_OFFLOAD	0x00000002
 /**< HW constructs trailer of packets
  * Transmitted packets will have the trailer added to them
- * by hardawre. The next protocol field will be based on
+ * by hardware. The next protocol field will be based on
  * the mbuf->inner_esp_next_proto field.
  */
 #define RTE_SECURITY_RX_HW_TRAILER_OFFLOAD	0x00010000
diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h
index 6f55bd57..61a0eed6 100644
--- a/lib/librte_table/rte_table_hash.h
+++ b/lib/librte_table/rte_table_hash.h
@@ -29,7 +29,7 @@ extern "C" {
  *        be picked and dropped, the most likely candidate for drop, i.e. the
  *        current LRU key, is always picked. The LRU logic requires maintaining
  *        specific data structures per each bucket. Use-cases: flow cache, etc.
- *     b. Extendible bucket (ext): The bucket is extended with space for 4 more
+ *     b. Extendable bucket (ext): The bucket is extended with space for 4 more
  *        keys. This is done by allocating additional memory at table init time,
  *        which is used to create a pool of free keys (the size of this pool is
  *        configurable and always a multiple of 4). On key add operation, the
@@ -41,7 +41,7 @@ extern "C" {
  *        current bucket is in extended state and a match is not found in the
  *        first group of 4 keys, the search continues beyond the first group of
  *        4 keys, potentially until all keys in this bucket are examined. The
- *        extendible bucket logic requires maintaining specific data structures
+ *        extendable bucket logic requires maintaining specific data structures
  *        per table and per each bucket. Use-cases: flow table, etc.
  * 2. Key size:
  *     a. Configurable key size
@@ -86,7 +86,7 @@ struct rte_table_hash_params {
 	uint64_t seed;
 };
 
-/** Extendible bucket hash table operations */
+/** Extendable bucket hash table operations */
 extern struct rte_table_ops rte_table_hash_ext_ops;
 extern struct rte_table_ops rte_table_hash_key8_ext_ops;
 extern struct rte_table_ops rte_table_hash_key16_ext_ops;
diff --git a/lib/librte_table/rte_table_hash_func.h b/lib/librte_table/rte_table_hash_func.h
index 02296eab..11ea5a90 100644
--- a/lib/librte_table/rte_table_hash_func.h
+++ b/lib/librte_table/rte_table_hash_func.h
@@ -40,7 +40,7 @@ rte_crc32_u64(uint64_t crc, uint64_t v)
 	return _mm_crc32_u64(crc, v);
 }
 
-#elif defined(RTE_ARCH_ARM64)
+#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32)
 #include "rte_table_hash_func_arm64.h"
 #else
 
diff --git a/lib/librte_telemetry/Makefile b/lib/librte_telemetry/Makefile
index 1a050691..ef73a4e7 100644
--- a/lib/librte_telemetry/Makefile
+++ b/lib/librte_telemetry/Makefile
@@ -19,6 +19,11 @@ EXPORT_MAP := rte_telemetry_version.map
 
 LIBABIVER := 1
 
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_rte_telemetry.o += -Wno-address-of-packed-member
+CFLAGS_rte_telemetry_parser.o += -Wno-address-of-packed-member
+endif
+
 # library source files
 SRCS-$(CONFIG_RTE_LIBRTE_TELEMETRY) := rte_telemetry.c
 SRCS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += rte_telemetry_parser.c
diff --git a/lib/librte_telemetry/rte_telemetry_parser.c b/lib/librte_telemetry/rte_telemetry_parser.c
index 03a58a2f..9bc16eef 100644
--- a/lib/librte_telemetry/rte_telemetry_parser.c
+++ b/lib/librte_telemetry/rte_telemetry_parser.c
@@ -256,7 +256,7 @@ rte_telemetry_command_ports_all_stat_values(struct telemetry_impl *telemetry,
 	 int action, json_t *data)
 {
 	int ret, num_metrics, i, p;
-	struct rte_metric_name *names;
+	struct rte_metric_value *values;
 	uint64_t num_port_ids = 0;
 	uint32_t port_ids[RTE_MAX_ETHPORTS];
 
@@ -281,7 +281,7 @@ rte_telemetry_command_ports_all_stat_values(struct telemetry_impl *telemetry,
 		return -1;
 	}
 
-	num_metrics = rte_metrics_get_names(NULL, 0);
+	num_metrics = rte_metrics_get_values(0, NULL, 0);
 	if (num_metrics < 0) {
 		TELEMETRY_LOG_ERR("Cannot get metrics count");
 
@@ -300,8 +300,8 @@ rte_telemetry_command_ports_all_stat_values(struct telemetry_impl *telemetry,
 		return -1;
 	}
 
-	names = malloc(sizeof(struct rte_metric_name) * num_metrics);
-	if (names == NULL) {
+	values = malloc(sizeof(struct rte_metric_value) * num_metrics);
+	if (values == NULL) {
 		TELEMETRY_LOG_ERR("Cannot allocate memory");
 		ret = rte_telemetry_send_error_response(telemetry,
 			 -ENOMEM);
@@ -310,7 +310,6 @@ rte_telemetry_command_ports_all_stat_values(struct telemetry_impl *telemetry,
 		return -1;
 	}
 
-	const char *stat_names[num_metrics];
 	uint32_t stat_ids[num_metrics];
 
 	RTE_ETH_FOREACH_DEV(p) {
@@ -328,16 +327,13 @@ rte_telemetry_command_ports_all_stat_values(struct telemetry_impl *telemetry,
 		goto fail;
 	}
 
-	ret = rte_metrics_get_names(names, num_metrics);
-	for (i = 0; i < num_metrics; i++)
-		stat_names[i] = names[i].name;
-
-	ret = rte_telemetry_stat_names_to_ids(telemetry, stat_names, stat_ids,
-		num_metrics);
+	ret = rte_metrics_get_values(port_ids[0], values, num_metrics);
 	if (ret < 0) {
-		TELEMETRY_LOG_ERR("Could not convert stat names to IDs");
+		TELEMETRY_LOG_ERR("Could not get stat values");
 		goto fail;
 	}
+	for (i = 0; i < num_metrics; i++)
+		stat_ids[i] = values[i].key;
 
 	ret = rte_telemetry_send_ports_stats_values(stat_ids, num_metrics,
 		port_ids, num_port_ids, telemetry);
@@ -349,7 +345,7 @@ rte_telemetry_command_ports_all_stat_values(struct telemetry_impl *telemetry,
 	return 0;
 
 fail:
-	free(names);
+	free(values);
 	return -1;
 }
 
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index d280ac42..5905e240 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -488,7 +488,7 @@ int rte_vhost_get_ifname(int vid, char *buf, size_t len);
  *  virtio queue index
  *
  * @return
- *  num of avail entires left
+ *  num of avail entries left
  */
 uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
 
@@ -536,7 +536,7 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 /**
  * Get guest mem table: a list of memory regions.
  *
- * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
+ * An rte_vhost_vhost_memory object will be allocated internally, to hold the
  * guest memory regions. Application should free it at destroy_device()
  * callback.
  *
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 9883b049..09799516 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -240,7 +240,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 			RTE_LOG(ERR, VHOST_CONFIG,
 				"failed to add vhost user connection with fd %d\n",
 				fd);
-			goto err;
+			goto err_cleanup;
 		}
 	}
 
@@ -257,7 +257,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 		if (vsocket->notify_ops->destroy_connection)
 			vsocket->notify_ops->destroy_connection(conn->vid);
 
-		goto err;
+		goto err_cleanup;
 	}
 
 	pthread_mutex_lock(&vsocket->conn_mutex);
@@ -267,6 +267,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 	fdset_pipe_notify(&vhost_user.fdset);
 	return;
 
+err_cleanup:
+	vhost_destroy_device(vid);
 err:
 	free(conn);
 	close(fd);
@@ -295,13 +297,19 @@ vhost_user_read_cb(int connfd, void *dat, int *remove)
 
 	ret = vhost_user_msg_handler(conn->vid, connfd);
 	if (ret < 0) {
+		struct virtio_net *dev = get_device(conn->vid);
+
 		close(connfd);
 		*remove = 1;
-		vhost_destroy_device(conn->vid);
+
+		if (dev)
+			vhost_destroy_device_notify(dev);
 
 		if (vsocket->notify_ops->destroy_connection)
 			vsocket->notify_ops->destroy_connection(conn->vid);
 
+		vhost_destroy_device(conn->vid);
+
 		pthread_mutex_lock(&vsocket->conn_mutex);
 		TAILQ_REMOVE(&vsocket->conn_list, conn, next);
 		pthread_mutex_unlock(&vsocket->conn_mutex);
@@ -547,6 +555,9 @@ find_vhost_user_socket(const char *path)
 {
 	int i;
 
+	if (path == NULL)
+		return NULL;
+
 	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
 
@@ -562,7 +573,7 @@ rte_vhost_driver_attach_vdpa_device(const char *path, int did)
 {
 	struct vhost_user_socket *vsocket;
 
-	if (rte_vdpa_get_device(did) == NULL)
+	if (rte_vdpa_get_device(did) == NULL || path == NULL)
 		return -1;
 
 	pthread_mutex_lock(&vhost_user.mutex);
@@ -961,6 +972,9 @@ rte_vhost_driver_unregister(const char *path)
 	int count;
 	struct vhost_user_connection *conn, *next;
 
+	if (path == NULL)
+		return -1;
+
 again:
 	pthread_mutex_lock(&vhost_user.mutex);
 
diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
index e7d849ee..f560419b 100644
--- a/lib/librte_vhost/vdpa.c
+++ b/lib/librte_vhost/vdpa.c
@@ -49,7 +49,7 @@ rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
 	char device_name[MAX_VDPA_NAME_LEN];
 	int i;
 
-	if (vdpa_device_num >= MAX_VHOST_DEVICE)
+	if (vdpa_device_num >= MAX_VHOST_DEVICE || addr == NULL || ops == NULL)
 		return -1;
 
 	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
@@ -66,7 +66,7 @@ rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
 	if (i == MAX_VHOST_DEVICE)
 		return -1;
 
-	sprintf(device_name, "vdpa-dev-%d", i);
+	snprintf(device_name, sizeof(device_name), "vdpa-dev-%d", i);
 	dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device),
 			RTE_CACHE_LINE_SIZE);
 	if (!dev)
@@ -99,6 +99,9 @@ rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr)
 	struct rte_vdpa_device *dev;
 	int i;
 
+	if (addr == NULL)
+		return -1;
+
 	for (i = 0; i < MAX_VHOST_DEVICE; ++i) {
 		dev = vdpa_devices[i];
 		if (dev && is_same_vdpa_device(&dev->addr, addr))
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 70ac6bc9..488cf169 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -460,7 +460,7 @@ rte_vhost_get_mtu(int vid, uint16_t *mtu)
 {
 	struct virtio_net *dev = get_device(vid);
 
-	if (!dev)
+	if (dev == NULL || mtu == NULL)
 		return -ENODEV;
 
 	if (!(dev->flags & VIRTIO_DEV_READY))
@@ -528,7 +528,7 @@ rte_vhost_get_ifname(int vid, char *buf, size_t len)
 {
 	struct virtio_net *dev = get_device(vid);
 
-	if (dev == NULL)
+	if (dev == NULL || buf == NULL)
 		return -1;
 
 	len = RTE_MIN(len, sizeof(dev->ifname));
@@ -545,7 +545,7 @@ rte_vhost_get_negotiated_features(int vid, uint64_t *features)
 	struct virtio_net *dev;
 
 	dev = get_device(vid);
-	if (!dev)
+	if (dev == NULL || features == NULL)
 		return -1;
 
 	*features = dev->features;
@@ -560,7 +560,7 @@ rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
 	size_t size;
 
 	dev = get_device(vid);
-	if (!dev)
+	if (dev == NULL || mem == NULL)
 		return -1;
 
 	size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
@@ -583,7 +583,7 @@ rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
 	struct vhost_virtqueue *vq;
 
 	dev = get_device(vid);
-	if (!dev)
+	if (dev == NULL || vring == NULL)
 		return -1;
 
 	if (vring_idx >= VHOST_MAX_VRING)
@@ -776,7 +776,7 @@ int rte_vhost_get_log_base(int vid, uint64_t *log_base,
 {
 	struct virtio_net *dev = get_device(vid);
 
-	if (!dev)
+	if (dev == NULL || log_base == NULL || log_size == NULL)
 		return -1;
 
 	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
@@ -797,7 +797,7 @@ int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
 {
 	struct virtio_net *dev = get_device(vid);
 
-	if (!dev)
+	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
 		return -1;
 
 	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
@@ -818,7 +818,7 @@ int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
 {
 	struct virtio_net *dev = get_device(vid);
 
-	if (!dev)
+	if (dev == NULL)
 		return -1;
 
 	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 552b9298..bb9cff9f 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -686,16 +686,20 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
 		uint16_t old = vq->signalled_used;
 		uint16_t new = vq->last_used_idx;
+		bool signalled_used_valid = vq->signalled_used_valid;
+
+		vq->signalled_used = new;
+		vq->signalled_used_valid = true;
 
 		VHOST_LOG_DEBUG(VHOST_DATA, "%s: used_event_idx=%d, old=%d, new=%d\n",
 			__func__,
 			vhost_used_event(vq),
 			old, new);
-		if (vhost_need_event(vhost_used_event(vq), new, old)
-			&& (vq->callfd >= 0)) {
-			vq->signalled_used = vq->last_used_idx;
+
+		if ((vhost_need_event(vhost_used_event(vq), new, old) &&
+					(vq->callfd >= 0)) ||
+				unlikely(!signalled_used_valid))
 			eventfd_write(vq->callfd, (eventfd_t) 1);
-		}
 	} else {
 		/* Kick the guest if necessary. */
 		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
@@ -755,4 +759,38 @@ kick:
 		eventfd_write(vq->callfd, (eventfd_t)1);
 }
 
+static __rte_always_inline void
+restore_mbuf(struct rte_mbuf *m)
+{
+	uint32_t mbuf_size, priv_size;
+
+	while (m) {
+		priv_size = rte_pktmbuf_priv_size(m->pool);
+		mbuf_size = sizeof(struct rte_mbuf) + priv_size;
+		/* start of buffer is after mbuf structure and priv data */
+
+		m->buf_addr = (char *)m + mbuf_size;
+		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
+		m = m->next;
+	}
+}
+
+static __rte_always_inline bool
+mbuf_is_consumed(struct rte_mbuf *m)
+{
+	while (m) {
+		if (rte_mbuf_refcnt_read(m) > 1)
+			return false;
+		m = m->next;
+	}
+
+	return true;
+}
+
+static __rte_always_inline void
+put_zmbuf(struct zcopy_mbuf *zmbuf)
+{
+	zmbuf->in_use = 0;
+}
+
 #endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c
index 0694c0a7..fc362ba9 100644
--- a/lib/librte_vhost/vhost_crypto.c
+++ b/lib/librte_vhost/vhost_crypto.c
@@ -1102,7 +1102,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
 		if (unlikely(copy_data(digest_addr, vc_req, &digest_desc,
 				chain->para.hash_result_len,
-				nb_descs, vq_size)) < 0) {
+				nb_descs, vq_size) < 0)) {
 			ret = VIRTIO_CRYPTO_BADMSG;
 			goto error_exit;
 		}
@@ -1617,7 +1617,7 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid,
 			op->sym->m_src->data_off = 0;
 
 			if (unlikely(vhost_crypto_process_one_req(vcrypto, vq,
-					op, head, desc_idx)) < 0)
+					op, head, desc_idx) < 0))
 				break;
 		}
 
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 19e04c95..5552f8bb 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -93,15 +93,47 @@ get_blk_size(int fd)
 	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
 }
 
+/*
+ * Reclaim all the outstanding zmbufs for a virtqueue.
+ */
+static void
+drain_zmbuf_list(struct vhost_virtqueue *vq)
+{
+	struct zcopy_mbuf *zmbuf, *next;
+
+	for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+	     zmbuf != NULL; zmbuf = next) {
+		next = TAILQ_NEXT(zmbuf, next);
+
+		while (!mbuf_is_consumed(zmbuf->mbuf))
+			usleep(1000);
+
+		TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+		restore_mbuf(zmbuf->mbuf);
+		rte_pktmbuf_free(zmbuf->mbuf);
+		put_zmbuf(zmbuf);
+		vq->nr_zmbuf -= 1;
+	}
+}
+
 static void
 free_mem_region(struct virtio_net *dev)
 {
 	uint32_t i;
 	struct rte_vhost_mem_region *reg;
+	struct vhost_virtqueue *vq;
 
 	if (!dev || !dev->mem)
 		return;
 
+	if (dev->dequeue_zero_copy) {
+		for (i = 0; i < dev->nr_vring; i++) {
+			vq = dev->virtqueue[i];
+			if (vq)
+				drain_zmbuf_list(vq);
+		}
+	}
+
 	for (i = 0; i < dev->mem->nregions; i++) {
 		reg = &dev->mem->regions[i];
 		if (reg->host_user_addr) {
@@ -1199,8 +1231,12 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg,
 	 * the ring starts already enabled. Otherwise, it is enabled via
 	 * the SET_VRING_ENABLE message.
 	 */
-	if (!(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)))
+	if (!(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) {
 		vq->enabled = 1;
+		if (dev->notify_ops->vring_state_changed)
+			dev->notify_ops->vring_state_changed(
+				dev->vid, file.index, 1);
+	}
 
 	if (vq->kickfd >= 0)
 		close(vq->kickfd);
@@ -1212,15 +1248,7 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg,
 static void
 free_zmbufs(struct vhost_virtqueue *vq)
 {
-	struct zcopy_mbuf *zmbuf, *next;
-
-	for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
-	     zmbuf != NULL; zmbuf = next) {
-		next = TAILQ_NEXT(zmbuf, next);
-
-		rte_pktmbuf_free(zmbuf->mbuf);
-		TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
-	}
+	drain_zmbuf_list(vq);
 
 	rte_free(vq->zmbufs);
 }
@@ -1274,6 +1302,8 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
 
 	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
 
+	vq->signalled_used_valid = false;
+
 	if (dev->dequeue_zero_copy)
 		free_zmbufs(vq);
 	if (vq_is_packed(dev)) {
@@ -1321,6 +1351,10 @@ vhost_user_set_vring_enable(struct virtio_net **pdev,
 		dev->notify_ops->vring_state_changed(dev->vid,
 				index, enable);
 
+	/* On disable, rings have to be stopped being processed. */
+	if (!enable && dev->dequeue_zero_copy)
+		drain_zmbuf_list(dev->virtqueue[index]);
+
 	dev->virtqueue[index]->enabled = enable;
 
 	return VH_RESULT_OK;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 15d682c3..a6576891 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1088,12 +1088,6 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
 	}
 }
 
-static __rte_always_inline void
-put_zmbuf(struct zcopy_mbuf *zmbuf)
-{
-	zmbuf->in_use = 0;
-}
-
 static __rte_always_inline int
 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		  struct buf_vector *buf_vec, uint16_t nr_vec,
@@ -1331,34 +1325,6 @@ again:
 	return NULL;
 }
 
-static __rte_always_inline bool
-mbuf_is_consumed(struct rte_mbuf *m)
-{
-	while (m) {
-		if (rte_mbuf_refcnt_read(m) > 1)
-			return false;
-		m = m->next;
-	}
-
-	return true;
-}
-
-static __rte_always_inline void
-restore_mbuf(struct rte_mbuf *m)
-{
-	uint32_t mbuf_size, priv_size;
-
-	while (m) {
-		priv_size = rte_pktmbuf_priv_size(m->pool);
-		mbuf_size = sizeof(struct rte_mbuf) + priv_size;
-		/* start of buffer is after mbuf structure and priv data */
-
-		m->buf_addr = (char *)m + mbuf_size;
-		m->buf_iova = rte_mempool_virt2iova(m) + mbuf_size;
-		m = m->next;
-	}
-}
-
 static __rte_always_inline uint16_t
 virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)