aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/librte_compressdev/rte_comp.h4
-rw-r--r--lib/librte_eal/bsdapp/eal/eal.c42
-rw-r--r--lib/librte_eal/common/eal_common_memory.c12
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c8
-rw-r--r--lib/librte_eal/common/eal_common_options.c51
-rw-r--r--lib/librte_eal/common/eal_common_proc.c42
-rw-r--r--lib/librte_eal/common/eal_filesystem.h9
-rw-r--r--lib/librte_eal/common/eal_internal_cfg.h6
-rw-r--r--lib/librte_eal/common/eal_options.h1
-rw-r--r--lib/librte_eal/common/eal_private.h6
-rw-r--r--lib/librte_eal/common/hotplug_mp.c4
-rw-r--r--lib/librte_eal/common/include/generic/rte_atomic.h6
-rw-r--r--lib/librte_eal/common/include/rte_malloc.h13
-rw-r--r--lib/librte_eal/common/include/rte_version.h2
-rw-r--r--lib/librte_eal/common/malloc_elem.c4
-rw-r--r--lib/librte_eal/common/malloc_mp.c8
-rw-r--r--lib/librte_eal/common/rte_malloc.c34
-rw-r--r--lib/librte_eal/common/rte_option.c5
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c150
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memalloc.c50
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c2
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.c91
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.h12
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c16
-rw-r--r--lib/librte_efd/rte_efd.c14
-rw-r--r--lib/librte_ethdev/rte_ethdev.c4
-rw-r--r--lib/librte_ethdev/rte_ethdev.h7
-rw-r--r--lib/librte_eventdev/rte_event_eth_rx_adapter.c2
-rw-r--r--lib/librte_eventdev/rte_event_eth_tx_adapter.c54
-rw-r--r--lib/librte_eventdev/rte_event_timer_adapter.h57
-rw-r--r--lib/librte_eventdev/rte_eventdev.h4
-rw-r--r--lib/librte_gro/gro_tcp4.c10
-rw-r--r--lib/librte_gro/gro_tcp4.h5
-rw-r--r--lib/librte_gro/gro_vxlan_tcp4.c10
-rw-r--r--lib/librte_gso/gso_common.h4
-rw-r--r--lib/librte_hash/rte_cuckoo_hash.c12
-rw-r--r--lib/librte_hash/rte_cuckoo_hash.h11
-rw-r--r--lib/librte_ip_frag/rte_ip_frag.h1
-rw-r--r--lib/librte_ip_frag/rte_ipv6_fragmentation.c18
-rw-r--r--lib/librte_net/rte_ip.h12
-rw-r--r--lib/librte_sched/rte_sched.c2
-rw-r--r--lib/librte_telemetry/rte_telemetry.c40
-rw-r--r--lib/librte_telemetry/rte_telemetry_internal.h2
-rw-r--r--lib/librte_timer/rte_timer.c28
-rw-r--r--lib/librte_vhost/fd_man.c4
-rw-r--r--lib/librte_vhost/fd_man.h1
-rw-r--r--lib/librte_vhost/socket.c4
-rw-r--r--lib/librte_vhost/vhost.h6
-rw-r--r--lib/librte_vhost/vhost_crypto.c140
-rw-r--r--lib/librte_vhost/vhost_user.c22
-rw-r--r--lib/librte_vhost/virtio_net.c32
-rw-r--r--lib/meson.build2
52 files changed, 803 insertions, 283 deletions
diff --git a/lib/librte_compressdev/rte_comp.h b/lib/librte_compressdev/rte_comp.h
index 395ce29f..ea306d5f 100644
--- a/lib/librte_compressdev/rte_comp.h
+++ b/lib/librte_compressdev/rte_comp.h
@@ -310,7 +310,7 @@ struct rte_comp_op {
struct rte_mbuf *m_src;
/**< source mbuf
* The total size of the input buffer(s) can be retrieved using
- * rte_pktmbuf_data_len(m_src). The max data size which can fit in a
+ * rte_pktmbuf_pkt_len(m_src). The max data size which can fit in a
* single mbuf is limited by the uint16_t rte_mbuf.data_len to 64k-1.
* If the input data is bigger than this it can be passed to the PMD in
* a chain of mbufs if the PMD's capabilities indicate it supports this.
@@ -318,7 +318,7 @@ struct rte_comp_op {
struct rte_mbuf *m_dst;
/**< destination mbuf
* The total size of the output buffer(s) can be retrieved using
- * rte_pktmbuf_data_len(m_dst). The max data size which can fit in a
+ * rte_pktmbuf_pkt_len(m_dst). The max data size which can fit in a
* single mbuf is limited by the uint16_t rte_mbuf.data_len to 64k-1.
* If the output data is expected to be bigger than this a chain of
* mbufs can be passed to the PMD if the PMD's capabilities indicate
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index b8152a75..f01495e3 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -115,7 +115,7 @@ eal_create_runtime_dir(void)
/* create prefix-specific subdirectory under DPDK runtime dir */
ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
- tmp, internal_config.hugefile_prefix);
+ tmp, eal_get_hugefile_prefix());
if (ret < 0 || ret == sizeof(runtime_dir)) {
RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
return -1;
@@ -141,6 +141,16 @@ eal_create_runtime_dir(void)
return 0;
}
+int
+eal_clean_runtime_dir(void)
+{
+ /* FreeBSD doesn't need this implemented for now, because, unlike Linux,
+ * FreeBSD doesn't create per-process files, so no need to clean up.
+ */
+ return 0;
+}
+
+
const char *
rte_eal_get_runtime_dir(void)
{
@@ -447,9 +457,21 @@ eal_parse_args(int argc, char **argv)
switch (opt) {
case OPT_MBUF_POOL_OPS_NAME_NUM:
- internal_config.user_mbuf_pool_ops_name =
- strdup(optarg);
+ {
+ char *ops_name = strdup(optarg);
+ if (ops_name == NULL)
+ RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n");
+ else {
+ /* free old ops name */
+ if (internal_config.user_mbuf_pool_ops_name !=
+ NULL)
+ free(internal_config.user_mbuf_pool_ops_name);
+
+ internal_config.user_mbuf_pool_ops_name =
+ ops_name;
+ }
break;
+ }
case 'h':
eal_usage(prgname);
exit(EXIT_SUCCESS);
@@ -807,6 +829,18 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ /*
+ * Clean up unused files in runtime directory. We do this at the end of
+ * init and not at the beginning because we want to clean stuff up
+ * whether we are primary or secondary process, but we cannot remove
+ * primary process' files because secondary should be able to run even
+ * if primary process is dead.
+ */
+ if (eal_clean_runtime_dir() < 0) {
+ rte_eal_init_alert("Cannot clear runtime directory\n");
+ return -1;
+ }
+
rte_eal_mcfg_complete();
/* Call each registered callback, if enabled */
@@ -819,6 +853,8 @@ int __rte_experimental
rte_eal_cleanup(void)
{
rte_service_finalize();
+ rte_mp_channel_cleanup();
+ eal_cleanup_config(&internal_config);
return 0;
}
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index d47ea493..999ba24b 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -704,6 +704,12 @@ rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
return -1;
}
+ /* segment fd API is not supported for external segments */
+ if (msl->external) {
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
if (ret < 0) {
rte_errno = -ret;
@@ -754,6 +760,12 @@ rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
return -1;
}
+ /* segment fd API is not supported for external segments */
+ if (msl->external) {
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
if (ret < 0) {
rte_errno = -ret;
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index b7081afb..664df5b9 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -365,6 +365,7 @@ int
rte_eal_memzone_init(void)
{
struct rte_mem_config *mcfg;
+ int ret = 0;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
@@ -375,17 +376,16 @@ rte_eal_memzone_init(void)
rte_fbarray_init(&mcfg->memzones, "memzone",
RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
- return -1;
+ ret = -1;
} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
rte_fbarray_attach(&mcfg->memzones)) {
RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n");
- rte_rwlock_write_unlock(&mcfg->mlock);
- return -1;
+ ret = -1;
}
rte_rwlock_write_unlock(&mcfg->mlock);
- return 0;
+ return ret;
}
/* Walk all reserved memory zones */
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index e31eca5c..f6dfbc73 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -168,6 +168,14 @@ eal_option_device_parse(void)
return ret;
}
+const char *
+eal_get_hugefile_prefix(void)
+{
+ if (internal_config.hugefile_prefix != NULL)
+ return internal_config.hugefile_prefix;
+ return HUGEFILE_PREFIX_DEFAULT;
+}
+
void
eal_reset_internal_config(struct internal_config *internal_cfg)
{
@@ -176,7 +184,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
internal_cfg->memory = 0;
internal_cfg->force_nrank = 0;
internal_cfg->force_nchannel = 0;
- internal_cfg->hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
+ internal_cfg->hugefile_prefix = NULL;
internal_cfg->hugepage_dir = NULL;
internal_cfg->force_sockets = 0;
/* zero out the NUMA config */
@@ -591,7 +599,9 @@ eal_parse_corelist(const char *corelist)
if (*corelist == '\0')
return -1;
errno = 0;
- idx = strtoul(corelist, &end, 10);
+ idx = strtol(corelist, &end, 10);
+ if (idx < 0 || idx >= (int)cfg->lcore_count)
+ return -1;
if (errno || end == NULL)
return -1;
while (isblank(*end))
@@ -1102,6 +1112,7 @@ eal_parse_common_option(int opt, const char *optarg,
{
static int b_used;
static int w_used;
+ struct rte_config *cfg = rte_eal_get_configuration();
switch (opt) {
/* blacklist */
@@ -1144,7 +1155,9 @@ eal_parse_common_option(int opt, const char *optarg,
/* corelist */
case 'l':
if (eal_parse_corelist(optarg) < 0) {
- RTE_LOG(ERR, EAL, "invalid core list\n");
+ RTE_LOG(ERR, EAL,
+ "invalid core list, please check core numbers are in [0, %u] range\n",
+ cfg->lcore_count-1);
return -1;
}
@@ -1347,6 +1360,19 @@ eal_auto_detect_cores(struct rte_config *cfg)
}
int
+eal_cleanup_config(struct internal_config *internal_cfg)
+{
+ if (internal_cfg->hugefile_prefix != NULL)
+ free(internal_cfg->hugefile_prefix);
+ if (internal_cfg->hugepage_dir != NULL)
+ free(internal_cfg->hugepage_dir);
+ if (internal_cfg->user_mbuf_pool_ops_name != NULL)
+ free(internal_cfg->user_mbuf_pool_ops_name);
+
+ return 0;
+}
+
+int
eal_adjust_config(struct internal_config *internal_cfg)
{
int i;
@@ -1361,6 +1387,8 @@ eal_adjust_config(struct internal_config *internal_cfg)
/* default master lcore is the first one */
if (!master_lcore_parsed) {
cfg->master_lcore = rte_get_next_lcore(-1, 0, 0);
+ if (cfg->master_lcore >= RTE_MAX_LCORE)
+ return -1;
lcore_config[cfg->master_lcore].core_role = ROLE_RTE;
}
@@ -1386,7 +1414,22 @@ eal_check_common_options(struct internal_config *internal_cfg)
RTE_LOG(ERR, EAL, "Invalid process type specified\n");
return -1;
}
- if (index(internal_cfg->hugefile_prefix, '%') != NULL) {
+ if (internal_cfg->hugefile_prefix != NULL &&
+ strlen(internal_cfg->hugefile_prefix) < 1) {
+ RTE_LOG(ERR, EAL, "Invalid length of --" OPT_FILE_PREFIX " option\n");
+ return -1;
+ }
+ if (internal_cfg->hugepage_dir != NULL &&
+ strlen(internal_cfg->hugepage_dir) < 1) {
+ RTE_LOG(ERR, EAL, "Invalid length of --" OPT_HUGE_DIR" option\n");
+ return -1;
+ }
+ if (internal_cfg->user_mbuf_pool_ops_name != NULL &&
+ strlen(internal_cfg->user_mbuf_pool_ops_name) < 1) {
+ RTE_LOG(ERR, EAL, "Invalid length of --" OPT_MBUF_POOL_OPS_NAME" option\n");
+ return -1;
+ }
+ if (index(eal_get_hugefile_prefix(), '%') != NULL) {
RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" "
"option\n");
return -1;
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 1c3f09aa..b46d644b 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -37,6 +37,7 @@ static int mp_fd = -1;
static char mp_filter[PATH_MAX]; /* Filter for secondary process sockets */
static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
+static char peer_name[PATH_MAX];
struct action_entry {
TAILQ_ENTRY(action_entry) next;
@@ -511,9 +512,9 @@ async_reply_handle(void *arg)
static int
open_socket_fd(void)
{
- char peer_name[PATH_MAX] = {0};
struct sockaddr_un un;
+ peer_name[0] = '\0';
if (rte_eal_process_type() == RTE_PROC_SECONDARY)
snprintf(peer_name, sizeof(peer_name),
"%d_%"PRIx64, getpid(), rte_rdtsc());
@@ -542,27 +543,17 @@ open_socket_fd(void)
return mp_fd;
}
-static int
-unlink_sockets(const char *filter)
+static void
+close_socket_fd(void)
{
- int dir_fd;
- DIR *mp_dir;
- struct dirent *ent;
-
- mp_dir = opendir(mp_dir_path);
- if (!mp_dir) {
- RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
- return -1;
- }
- dir_fd = dirfd(mp_dir);
+ char path[PATH_MAX];
- while ((ent = readdir(mp_dir))) {
- if (fnmatch(filter, ent->d_name, 0) == 0)
- unlinkat(dir_fd, ent->d_name, 0);
- }
+ if (mp_fd < 0)
+ return;
- closedir(mp_dir);
- return 0;
+ close(mp_fd);
+ create_socket_path(peer_name, path, sizeof(path));
+ unlink(path);
}
int
@@ -603,13 +594,6 @@ rte_mp_channel_init(void)
return -1;
}
- if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
- unlink_sockets(mp_filter)) {
- RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
- close(dir_fd);
- return -1;
- }
-
if (open_socket_fd() < 0) {
close(dir_fd);
return -1;
@@ -632,6 +616,12 @@ rte_mp_channel_init(void)
return 0;
}
+void
+rte_mp_channel_cleanup(void)
+{
+ close_socket_fd();
+}
+
/**
* Return -1, as fail to send message and it's caused by the local side.
* Return 0, as fail to send message and it's caused by the remote side.
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index 6e0331fd..89a3adde 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -25,6 +25,13 @@
int
eal_create_runtime_dir(void);
+int
+eal_clean_runtime_dir(void);
+
+/** Function to return hugefile prefix that's currently set up */
+const char *
+eal_get_hugefile_prefix(void);
+
#define RUNTIME_CONFIG_FNAME "config"
static inline const char *
eal_runtime_config_path(void)
@@ -86,7 +93,7 @@ static inline const char *
eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
{
snprintf(buffer, buflen, HUGEFILE_FMT, hugedir,
- internal_config.hugefile_prefix, f_id);
+ eal_get_hugefile_prefix(), f_id);
buffer[buflen - 1] = '\0';
return buffer;
}
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 737f17e3..783ce7de 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -64,9 +64,9 @@ struct internal_config {
volatile int syslog_facility; /**< facility passed to openlog() */
/** default interrupt mode for VFIO */
volatile enum rte_intr_mode vfio_intr_mode;
- const char *hugefile_prefix; /**< the base filename of hugetlbfs files */
- const char *hugepage_dir; /**< specific hugetlbfs directory to use */
- const char *user_mbuf_pool_ops_name;
+ char *hugefile_prefix; /**< the base filename of hugetlbfs files */
+ char *hugepage_dir; /**< specific hugetlbfs directory to use */
+ char *user_mbuf_pool_ops_name;
/**< user defined mbuf pool ops name */
unsigned num_hugepage_sizes; /**< how many sizes on this system */
struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 5271f944..327c95e9 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -75,6 +75,7 @@ int eal_parse_common_option(int opt, const char *argv,
struct internal_config *conf);
int eal_option_device_parse(void);
int eal_adjust_config(struct internal_config *internal_cfg);
+int eal_cleanup_config(struct internal_config *internal_cfg);
int eal_check_common_options(struct internal_config *internal_cfg);
void eal_common_usage(void);
enum rte_proc_type_t eal_proc_type_detect(void);
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 442c6dc4..4f483833 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -255,10 +255,14 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str);
* 0 on success;
* (<0) on failure.
*/
-
int rte_mp_channel_init(void);
/**
+ * Primary/secondary communication cleanup.
+ */
+void rte_mp_channel_cleanup(void);
+
+/**
* @internal
* Parse a device string and store its information in an
* rte_devargs structure.
diff --git a/lib/librte_eal/common/hotplug_mp.c b/lib/librte_eal/common/hotplug_mp.c
index 070e2e0c..9d610a8a 100644
--- a/lib/librte_eal/common/hotplug_mp.c
+++ b/lib/librte_eal/common/hotplug_mp.c
@@ -208,6 +208,8 @@ handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
ret = rte_eal_alarm_set(1, __handle_secondary_request, bundle);
if (ret != 0) {
RTE_LOG(ERR, EAL, "failed to add mp task\n");
+ free(bundle->peer);
+ free(bundle);
return send_response_to_secondary(req, ret, peer);
}
return 0;
@@ -332,6 +334,8 @@ handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
*/
ret = rte_eal_alarm_set(1, __handle_primary_request, bundle);
if (ret != 0) {
+ free(bundle->peer);
+ free(bundle);
resp->result = ret;
ret = rte_mp_reply(&mp_resp, peer);
if (ret != 0) {
diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h
index b99ba468..4afd1acc 100644
--- a/lib/librte_eal/common/include/generic/rte_atomic.h
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -212,7 +212,7 @@ rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val);
static inline uint16_t
rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
{
-#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+#if defined(__clang__)
return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
#else
return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
@@ -495,7 +495,7 @@ rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val);
static inline uint32_t
rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
{
-#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+#if defined(__clang__)
return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
#else
return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
@@ -777,7 +777,7 @@ rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val);
static inline uint64_t
rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
{
-#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+#if defined(__clang__)
return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
#else
return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST);
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index 7249e6aa..54a12467 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -251,6 +251,9 @@ rte_malloc_validate(const void *ptr, size_t *size);
/**
* Get heap statistics for the specified heap.
*
+ * @note This function is not thread-safe with respect to
+ * ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions.
+ *
* @param socket
* An unsigned integer specifying the socket to get heap statistics for
* @param socket_stats
@@ -282,9 +285,9 @@ rte_malloc_get_socket_stats(int socket,
* @param heap_name
* Name of the heap to add memory chunk to
* @param va_addr
- * Start of virtual area to add to the heap
+ * Start of virtual area to add to the heap. Must be aligned by ``page_sz``.
* @param len
- * Length of virtual area to add to the heap
+ * Length of virtual area to add to the heap. Must be aligned by ``page_sz``.
* @param iova_addrs
* Array of page IOVA addresses corresponding to each page in this memory
* area. Can be NULL, in which case page IOVA addresses will be set to
@@ -461,6 +464,9 @@ rte_malloc_heap_socket_is_external(int socket_id);
* Dump for the specified type to a file. If the type argument is
* NULL, all memory types will be dumped.
*
+ * @note This function is not thread-safe with respect to
+ * ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions.
+ *
* @param f
* A pointer to a file for output
* @param type
@@ -473,6 +479,9 @@ rte_malloc_dump_stats(FILE *f, const char *type);
/**
* Dump contents of all malloc heaps to a file.
*
+ * @note This function is not thread-safe with respect to
+ * ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions.
+ *
* @param f
* A pointer to a file for output
*/
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index f01c227f..b4c6dd3c 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -37,7 +37,7 @@ extern "C" {
/**
* Patch level number i.e. the z in yy.mm.z
*/
-#define RTE_VER_MINOR 0
+#define RTE_VER_MINOR 1
/**
* Extra string to be appended to version number
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 9d3dcb6a..052aeeb7 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -38,6 +38,10 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
/* segment must start after header and with specified alignment */
contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
+ /* return if aligned address is already out of malloc element */
+ if (contig_seg_start > data_end)
+ return 0;
+
/* if we're in IOVA as VA mode, or if we're in legacy mode with
* hugepages, all elements are IOVA-contiguous. however, we can only
* make these assumptions about internal memory - externally allocated
diff --git a/lib/librte_eal/common/malloc_mp.c b/lib/librte_eal/common/malloc_mp.c
index 5f2d4e0b..f3a13353 100644
--- a/lib/librte_eal/common/malloc_mp.c
+++ b/lib/librte_eal/common/malloc_mp.c
@@ -209,6 +209,8 @@ handle_alloc_request(const struct malloc_mp_req *m,
map_addr = ms[0]->addr;
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
+
/* we have succeeded in allocating memory, but we still need to sync
* with other processes. however, since DPDK IPC is single-threaded, we
* send an asynchronous request and exit this callback.
@@ -258,6 +260,9 @@ handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused)
if (m->t == REQ_TYPE_ALLOC) {
ret = handle_alloc_request(m, entry);
} else if (m->t == REQ_TYPE_FREE) {
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ m->free_req.addr, m->free_req.len);
+
ret = malloc_heap_free_pages(m->free_req.addr,
m->free_req.len);
} else {
@@ -436,6 +441,9 @@ handle_sync_response(const struct rte_mp_msg *request,
memset(&rb_msg, 0, sizeof(rb_msg));
/* we've failed to sync, so do a rollback */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ state->map_addr, state->map_len);
+
rollback_expand_heap(state->ms, state->ms_len, state->elem,
state->map_addr, state->map_len);
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index 0da5ad5e..47c2bec7 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -156,20 +156,14 @@ rte_malloc_get_socket_stats(int socket,
struct rte_malloc_socket_stats *socket_stats)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int heap_idx, ret = -1;
-
- rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ int heap_idx;
heap_idx = malloc_socket_to_heap_id(socket);
if (heap_idx < 0)
- goto unlock;
+ return -1;
- ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+ return malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
socket_stats);
-unlock:
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
-
- return ret;
}
/*
@@ -181,14 +175,10 @@ rte_malloc_dump_heaps(FILE *f)
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
unsigned int idx;
- rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
-
for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
fprintf(f, "Heap id: %u\n", idx);
malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
}
-
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
}
int
@@ -262,8 +252,6 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
unsigned int heap_id;
struct rte_malloc_socket_stats sock_stats;
- rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
-
/* Iterate through all initialised heaps */
for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
@@ -280,7 +268,6 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
}
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
return;
}
@@ -345,6 +332,9 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
if (heap_name == NULL || va_addr == NULL ||
page_sz == 0 || !rte_is_power_of_2(page_sz) ||
+ RTE_ALIGN(len, page_sz) != len ||
+ !rte_is_aligned(va_addr, page_sz) ||
+ ((len / page_sz) != n_pages && iova_addrs != NULL) ||
strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
RTE_HEAP_NAME_MAX_LEN) {
@@ -367,11 +357,6 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
goto unlock;
}
n = len / page_sz;
- if (n != n_pages && iova_addrs != NULL) {
- rte_errno = EINVAL;
- ret = -1;
- goto unlock;
- }
rte_spinlock_lock(&heap->lock);
ret = malloc_heap_add_external_memory(heap, va_addr, iova_addrs, n,
@@ -517,13 +502,8 @@ sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
if (wa.result < 0) {
rte_errno = -wa.result;
ret = -1;
- } else {
- /* notify all subscribers that a new memory area was added */
- if (attach)
- eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
- va_addr, len);
+ } else
ret = 0;
- }
unlock:
rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
return ret;
diff --git a/lib/librte_eal/common/rte_option.c b/lib/librte_eal/common/rte_option.c
index 02d59a86..198de6d2 100644
--- a/lib/librte_eal/common/rte_option.c
+++ b/lib/librte_eal/common/rte_option.c
@@ -35,10 +35,11 @@ void __rte_experimental
rte_option_register(struct rte_option *opt)
{
TAILQ_FOREACH(option, &rte_option_list, next) {
- if (strcmp(opt->opt_str, option->opt_str) == 0)
- RTE_LOG(INFO, EAL, "Option %s has already been registered.",
+ if (strcmp(opt->opt_str, option->opt_str) == 0) {
+ RTE_LOG(ERR, EAL, "Option %s has already been registered.\n",
opt->opt_str);
return;
+ }
}
TAILQ_INSERT_HEAD(&rte_option_list, opt, next);
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 361744d4..30138b63 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -13,7 +13,9 @@
#include <syslog.h>
#include <getopt.h>
#include <sys/file.h>
+#include <dirent.h>
#include <fcntl.h>
+#include <fnmatch.h>
#include <stddef.h>
#include <errno.h>
#include <limits.h>
@@ -123,7 +125,7 @@ eal_create_runtime_dir(void)
/* create prefix-specific subdirectory under DPDK runtime dir */
ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
- tmp, internal_config.hugefile_prefix);
+ tmp, eal_get_hugefile_prefix());
if (ret < 0 || ret == sizeof(runtime_dir)) {
RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
return -1;
@@ -149,6 +151,91 @@ eal_create_runtime_dir(void)
return 0;
}
+int
+eal_clean_runtime_dir(void)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ int dir_fd, fd, lck_result;
+ static const char * const filters[] = {
+ "fbarray_*",
+ "mp_socket_*"
+ };
+
+ /* open directory */
+ dir = opendir(runtime_dir);
+ if (!dir) {
+ RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+ dir_fd = dirfd(dir);
+
+ /* lock the directory before doing anything, to avoid races */
+ if (flock(dir_fd, LOCK_EX) < 0) {
+ RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+
+ dirent = readdir(dir);
+ if (!dirent) {
+ RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+
+ while (dirent != NULL) {
+ unsigned int f_idx;
+ bool skip = true;
+
+ /* skip files that don't match the patterns */
+ for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) {
+ const char *filter = filters[f_idx];
+
+ if (fnmatch(filter, dirent->d_name, 0) == 0) {
+ skip = false;
+ break;
+ }
+ }
+ if (skip) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* try and lock the file */
+ fd = openat(dir_fd, dirent->d_name, O_RDONLY);
+
+ /* skip to next file */
+ if (fd == -1) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* non-blocking lock */
+ lck_result = flock(fd, LOCK_EX | LOCK_NB);
+
+ /* if lock succeeds, remove the file */
+ if (lck_result != -1)
+ unlinkat(dir_fd, dirent->d_name, 0);
+ close(fd);
+ dirent = readdir(dir);
+ }
+
+ /* closedir closes dir_fd and drops the lock */
+ closedir(dir);
+ return 0;
+
+error:
+ if (dir)
+ closedir(dir);
+
+ RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n",
+ strerror(errno));
+
+ return -1;
+}
+
const char *
rte_eal_get_runtime_dir(void)
{
@@ -494,10 +581,6 @@ eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
socket_arg[i] = val;
}
- /* check if we have a positive amount of total memory */
- if (total_mem == 0)
- return -1;
-
return 0;
}
@@ -639,13 +722,31 @@ eal_parse_args(int argc, char **argv)
exit(EXIT_SUCCESS);
case OPT_HUGE_DIR_NUM:
- internal_config.hugepage_dir = strdup(optarg);
+ {
+ char *hdir = strdup(optarg);
+ if (hdir == NULL)
+ RTE_LOG(ERR, EAL, "Could not store hugepage directory\n");
+ else {
+ /* free old hugepage dir */
+ if (internal_config.hugepage_dir != NULL)
+ free(internal_config.hugepage_dir);
+ internal_config.hugepage_dir = hdir;
+ }
break;
-
+ }
case OPT_FILE_PREFIX_NUM:
- internal_config.hugefile_prefix = strdup(optarg);
+ {
+ char *prefix = strdup(optarg);
+ if (prefix == NULL)
+ RTE_LOG(ERR, EAL, "Could not store file prefix\n");
+ else {
+ /* free old prefix */
+ if (internal_config.hugefile_prefix != NULL)
+ free(internal_config.hugefile_prefix);
+ internal_config.hugefile_prefix = prefix;
+ }
break;
-
+ }
case OPT_SOCKET_MEM_NUM:
if (eal_parse_socket_arg(optarg,
internal_config.socket_mem) < 0) {
@@ -695,10 +796,21 @@ eal_parse_args(int argc, char **argv)
break;
case OPT_MBUF_POOL_OPS_NAME_NUM:
- internal_config.user_mbuf_pool_ops_name =
- strdup(optarg);
+ {
+ char *ops_name = strdup(optarg);
+ if (ops_name == NULL)
+ RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n");
+ else {
+ /* free old ops name */
+ if (internal_config.user_mbuf_pool_ops_name !=
+ NULL)
+ free(internal_config.user_mbuf_pool_ops_name);
+
+ internal_config.user_mbuf_pool_ops_name =
+ ops_name;
+ }
break;
-
+ }
default:
if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
RTE_LOG(ERR, EAL, "Option %c is not supported "
@@ -1096,6 +1208,18 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ /*
+ * Clean up unused files in runtime directory. We do this at the end of
+ * init and not at the beginning because we want to clean stuff up
+ * whether we are primary or secondary process, but we cannot remove
+ * primary process' files because secondary should be able to run even
+ * if primary process is dead.
+ */
+ if (eal_clean_runtime_dir() < 0) {
+ rte_eal_init_alert("Cannot clear runtime directory\n");
+ return -1;
+ }
+
rte_eal_mcfg_complete();
/* Call each registered callback, if enabled */
@@ -1130,6 +1254,8 @@ rte_eal_cleanup(void)
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
rte_memseg_walk(mark_freeable, NULL);
rte_service_finalize();
+ rte_mp_channel_cleanup();
+ eal_cleanup_config(&internal_config);
return 0;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 78493956..f63d9ca6 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -23,6 +23,10 @@
#include <sys/time.h>
#include <signal.h>
#include <setjmp.h>
+#ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */
+#include <linux/memfd.h>
+#define MEMFD_SUPPORTED
+#endif
#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
#include <numa.h>
#include <numaif.h>
@@ -53,8 +57,8 @@ const int anonymous_hugepages_supported =
#endif
/*
- * we don't actually care if memfd itself is supported - we only need to check
- * if memfd supports hugetlbfs, as that already implies memfd support.
+ * we've already checked memfd support at compile-time, but we also need to
+ * check if we can create hugepage files with memfd.
*
* also, this is not a constant, because while we may be *compiled* with memfd
* hugetlbfs support, we might not be *running* on a system that supports memfd
@@ -63,10 +67,11 @@ const int anonymous_hugepages_supported =
*/
static int memfd_create_supported =
#ifdef MFD_HUGETLB
-#define MEMFD_SUPPORTED
1;
+#define RTE_MFD_HUGETLB MFD_HUGETLB
#else
0;
+#define RTE_MFD_HUGETLB 4U
#endif
/*
@@ -171,7 +176,7 @@ prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id)
RTE_LOG(ERR, EAL,
"Failed to get current mempolicy: %s. "
"Assuming MPOL_DEFAULT.\n", strerror(errno));
- oldpolicy = MPOL_DEFAULT;
+ *oldpolicy = MPOL_DEFAULT;
}
RTE_LOG(DEBUG, EAL,
"Setting policy MPOL_PREFERRED for socket %d\n",
@@ -338,12 +343,12 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused,
int fd;
char segname[250]; /* as per manpage, limit is 249 bytes plus null */
+ int flags = RTE_MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
+
if (internal_config.single_file_segments) {
fd = fd_list[list_idx].memseg_list_fd;
if (fd < 0) {
- int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
-
snprintf(segname, sizeof(segname), "seg_%i", list_idx);
fd = memfd_create(segname, flags);
if (fd < 0) {
@@ -357,8 +362,6 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused,
fd = fd_list[list_idx].fds[seg_idx];
if (fd < 0) {
- int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
-
snprintf(segname, sizeof(segname), "seg_%i-%i",
list_idx, seg_idx);
fd = memfd_create(segname, flags);
@@ -633,13 +636,13 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
int mmap_flags;
if (internal_config.in_memory && !memfd_create_supported) {
- int pagesz_flag, flags;
+ const int in_memory_flags = MAP_HUGETLB | MAP_FIXED |
+ MAP_PRIVATE | MAP_ANONYMOUS;
+ int pagesz_flag;
pagesz_flag = pagesz_flags(alloc_sz);
- flags = pagesz_flag | MAP_HUGETLB | MAP_FIXED |
- MAP_PRIVATE | MAP_ANONYMOUS;
fd = -1;
- mmap_flags = flags;
+ mmap_flags = in_memory_flags | pagesz_flag;
/* single-file segments codepath will never be active
* here because in-memory mode is incompatible with the
@@ -1542,6 +1545,17 @@ int
eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
{
int fd;
+
+ if (internal_config.in_memory || internal_config.no_hugetlbfs) {
+#ifndef MEMFD_SUPPORTED
+ /* in in-memory or no-huge mode, we rely on memfd support */
+ return -ENOTSUP;
+#endif
+ /* memfd supported, but hugetlbfs memfd may not be */
+ if (!internal_config.no_hugetlbfs && !memfd_create_supported)
+ return -ENOTSUP;
+ }
+
if (internal_config.single_file_segments) {
fd = fd_list[list_idx].memseg_list_fd;
} else if (fd_list[list_idx].len == 0) {
@@ -1565,7 +1579,7 @@ test_memfd_create(void)
int pagesz_flag = pagesz_flags(pagesz);
int flags;
- flags = pagesz_flag | MFD_HUGETLB;
+ flags = pagesz_flag | RTE_MFD_HUGETLB;
int fd = memfd_create("test", flags);
if (fd < 0) {
/* we failed - let memalloc know this isn't working */
@@ -1589,6 +1603,16 @@ eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ if (internal_config.in_memory || internal_config.no_hugetlbfs) {
+#ifndef MEMFD_SUPPORTED
+ /* in in-memory or no-huge mode, we rely on memfd support */
+ return -ENOTSUP;
+#endif
+ /* memfd supported, but hugetlbfs memfd may not be */
+ if (!internal_config.no_hugetlbfs && !memfd_create_supported)
+ return -ENOTSUP;
+ }
+
/* fd_list not initialized? */
if (fd_list[list_idx].len == 0)
return -ENODEV;
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 32feb415..e05da74c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -434,7 +434,7 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
}
snprintf(hugedir_str, sizeof(hugedir_str),
- "%s/%s", hpi->hugedir, internal_config.hugefile_prefix);
+ "%s/%s", hpi->hugedir, eal_get_hugefile_prefix());
/* parse numa map */
while (fgets(buf, sizeof(buf), f) != NULL) {
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 0516b159..c821e838 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -549,6 +549,65 @@ next:
}
}
+static int
+vfio_sync_default_container(void)
+{
+ struct rte_mp_msg mp_req, *mp_rep;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+ int iommu_type_id;
+ unsigned int i;
+
+ /* cannot be called from primary */
+ if (rte_eal_process_type() != RTE_PROC_SECONDARY)
+ return -1;
+
+ /* default container fd should have been opened in rte_vfio_enable() */
+ if (!default_vfio_cfg->vfio_enabled ||
+ default_vfio_cfg->vfio_container_fd < 0) {
+ RTE_LOG(ERR, EAL, "VFIO support is not initialized\n");
+ return -1;
+ }
+
+ /* find default container's IOMMU type */
+ p->req = SOCKET_REQ_IOMMU_TYPE;
+ strcpy(mp_req.name, EAL_VFIO_MP);
+ mp_req.len_param = sizeof(*p);
+ mp_req.num_fds = 0;
+
+ iommu_type_id = -1;
+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+ mp_reply.nb_received == 1) {
+ mp_rep = &mp_reply.msgs[0];
+ p = (struct vfio_mp_param *)mp_rep->param;
+ if (p->result == SOCKET_OK)
+ iommu_type_id = p->iommu_type_id;
+ free(mp_reply.msgs);
+ }
+ if (iommu_type_id < 0) {
+ RTE_LOG(ERR, EAL, "Could not get IOMMU type for default container\n");
+ return -1;
+ }
+
+ /* we now have an fd for default container, as well as its IOMMU type.
+ * now, set up default VFIO container config to match.
+ */
+ for (i = 0; i < RTE_DIM(iommu_types); i++) {
+ const struct vfio_iommu_type *t = &iommu_types[i];
+ if (t->type_id != iommu_type_id)
+ continue;
+
+ /* we found our IOMMU type */
+ default_vfio_cfg->vfio_iommu_type = t;
+
+ return 0;
+ }
+ RTE_LOG(ERR, EAL, "Could not find IOMMU type id (%i)\n",
+ iommu_type_id);
+ return -1;
+}
+
int
rte_vfio_clear_group(int vfio_group_fd)
{
@@ -745,6 +804,26 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
else
RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n");
}
+ } else if (rte_eal_process_type() != RTE_PROC_PRIMARY &&
+ vfio_cfg == default_vfio_cfg &&
+ vfio_cfg->vfio_iommu_type == NULL) {
+ /* if we're not a primary process, we do not set up the VFIO
+ * container because it's already been set up by the primary
+ * process. instead, we simply ask the primary about VFIO type
+ * we are using, and set the VFIO config up appropriately.
+ */
+ ret = vfio_sync_default_container();
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Could not sync default VFIO container\n");
+ close(vfio_group_fd);
+ rte_vfio_clear_group(vfio_group_fd);
+ return -1;
+ }
+ /* we have successfully initialized VFIO, notify user */
+ const struct vfio_iommu_type *t =
+ default_vfio_cfg->vfio_iommu_type;
+ RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",
+ t->type_id, t->name);
}
/* get a file descriptor for the device */
@@ -857,7 +936,8 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
/* if there are no active device groups, unregister the callback to
* avoid spurious attempts to map/unmap memory from VFIO.
*/
- if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0)
+ if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0 &&
+ rte_eal_process_type() != RTE_PROC_SECONDARY)
rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME,
NULL);
@@ -977,6 +1057,15 @@ vfio_get_default_container_fd(void)
return -1;
}
+int
+vfio_get_iommu_type(void)
+{
+ if (default_vfio_cfg->vfio_iommu_type == NULL)
+ return -1;
+
+ return default_vfio_cfg->vfio_iommu_type->type_id;
+}
+
const struct vfio_iommu_type *
vfio_set_iommu_type(int vfio_container_fd)
{
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 63ae115c..cb2d35fb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -5,6 +5,8 @@
#ifndef EAL_VFIO_H_
#define EAL_VFIO_H_
+#include <rte_common.h>
+
/*
* determine if VFIO is present on the system
*/
@@ -122,6 +124,9 @@ int vfio_get_default_container_fd(void);
const struct vfio_iommu_type *
vfio_set_iommu_type(int vfio_container_fd);
+int
+vfio_get_iommu_type(void);
+
/* check if we have any supported extensions */
int
vfio_has_supported_extensions(int vfio_container_fd);
@@ -133,6 +138,7 @@ int vfio_mp_sync_setup(void);
#define SOCKET_REQ_CONTAINER 0x100
#define SOCKET_REQ_GROUP 0x200
#define SOCKET_REQ_DEFAULT_CONTAINER 0x400
+#define SOCKET_REQ_IOMMU_TYPE 0x800
#define SOCKET_OK 0x0
#define SOCKET_NO_FD 0x1
#define SOCKET_ERR 0xFF
@@ -140,7 +146,11 @@ int vfio_mp_sync_setup(void);
struct vfio_mp_param {
int req;
int result;
- int group_num;
+ RTE_STD_C11
+ union {
+ int group_num;
+ int iommu_type_id;
+ };
};
#endif /* VFIO_PRESENT */
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index a1e8c834..2a47f29d 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -77,6 +77,22 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
reply.fds[0] = fd;
}
break;
+ case SOCKET_REQ_IOMMU_TYPE:
+ {
+ int iommu_type_id;
+
+ r->req = SOCKET_REQ_IOMMU_TYPE;
+
+ iommu_type_id = vfio_get_iommu_type();
+
+ if (iommu_type_id < 0)
+ r->result = SOCKET_ERR;
+ else {
+ r->iommu_type_id = iommu_type_id;
+ r->result = SOCKET_OK;
+ }
+ break;
+ }
default:
RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
return -1;
diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c
index e6e5cfda..1a97ece0 100644
--- a/lib/librte_efd/rte_efd.c
+++ b/lib/librte_efd/rte_efd.c
@@ -740,6 +740,8 @@ void
rte_efd_free(struct rte_efd_table *table)
{
uint8_t socket_id;
+ struct rte_efd_list *efd_list;
+ struct rte_tailq_entry *te, *temp;
if (table == NULL)
return;
@@ -747,6 +749,18 @@ rte_efd_free(struct rte_efd_table *table)
for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES; socket_id++)
rte_free(table->chunks[socket_id]);
+ efd_list = RTE_TAILQ_CAST(rte_efd_tailq.head, rte_efd_list);
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+ TAILQ_FOREACH_SAFE(te, efd_list, next, temp) {
+ if (te->data == (void *) table) {
+ TAILQ_REMOVE(efd_list, te, next);
+ rte_free(te);
+ break;
+ }
+ }
+
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
rte_ring_free(table->free_slots);
rte_free(table->offline_chunks);
rte_free(table->keys);
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 5f858174..9d5107dc 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -1594,7 +1594,7 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
RTE_ETHDEV_LOG(ERR,
- "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n",
+ "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, >= %hu, and a product of %hu\n",
nb_rx_desc, dev_info.rx_desc_lim.nb_max,
dev_info.rx_desc_lim.nb_min,
dev_info.rx_desc_lim.nb_align);
@@ -1698,7 +1698,7 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
RTE_ETHDEV_LOG(ERR,
- "Invalid value for nb_tx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n",
+ "Invalid value for nb_tx_desc(=%hu), should be: <= %hu, >= %hu, and a product of %hu\n",
nb_tx_desc, dev_info.tx_desc_lim.nb_max,
dev_info.tx_desc_lim.nb_min,
dev_info.tx_desc_lim.nb_align);
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 1960f3a2..a3c864a1 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -4159,9 +4159,6 @@ rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id,
}
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Process a burst of output packets on a transmit queue of an Ethernet device.
*
* The rte_eth_tx_prepare() function is invoked to prepare output packets to be
@@ -4225,7 +4222,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
if (!rte_eth_dev_is_valid_port(port_id)) {
RTE_ETHDEV_LOG(ERR, "Invalid TX port_id=%u\n", port_id);
- rte_errno = -EINVAL;
+ rte_errno = EINVAL;
return 0;
}
#endif
@@ -4235,7 +4232,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
if (queue_id >= dev->data->nb_tx_queues) {
RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
- rte_errno = -EINVAL;
+ rte_errno = EINVAL;
return 0;
}
#endif
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
index 8831bc35..8d178be1 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
@@ -912,7 +912,7 @@ rxa_intr_ring_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
*/
if (err)
RTE_EDEV_LOG_ERR("Failed to enqueue interrupt"
- " to ring: %s", strerror(err));
+ " to ring: %s", strerror(-err));
else
rte_eth_dev_rx_intr_disable(port_id, queue);
}
diff --git a/lib/librte_eventdev/rte_event_eth_tx_adapter.c b/lib/librte_eventdev/rte_event_eth_tx_adapter.c
index ccf8a755..67216a30 100644
--- a/lib/librte_eventdev/rte_event_eth_tx_adapter.c
+++ b/lib/librte_eventdev/rte_event_eth_tx_adapter.c
@@ -59,6 +59,20 @@ do {\
return -EINVAL; \
} while (0)
+#define TXA_CHECK_TXQ(dev, queue) \
+do {\
+ if ((dev)->data->nb_tx_queues == 0) { \
+ RTE_EDEV_LOG_ERR("No tx queues configured"); \
+ return -EINVAL; \
+ } \
+ if ((queue) != -1 && \
+ (uint16_t)(queue) >= (dev)->data->nb_tx_queues) { \
+ RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16, \
+ (uint16_t)(queue)); \
+ return -EINVAL; \
+ } \
+} while (0)
+
/* Tx retry callback structure */
struct txa_retry {
/* Ethernet port id */
@@ -795,20 +809,35 @@ txa_service_queue_del(uint8_t id,
struct rte_eth_dev_tx_buffer *tb;
uint16_t port_id;
+ txa = txa_service_id_to_data(id);
+ port_id = dev->data->port_id;
+
if (tx_queue_id == -1) {
- uint16_t i;
- int ret = -1;
+ uint16_t i, q, nb_queues;
+ int ret = 0;
- for (i = 0; i < dev->data->nb_tx_queues; i++) {
- ret = txa_service_queue_del(id, dev, i);
- if (ret != 0)
- break;
+ nb_queues = txa->nb_queues;
+ if (nb_queues == 0)
+ return 0;
+
+ i = 0;
+ q = 0;
+ tqi = txa->txa_ethdev[port_id].queues;
+
+ while (i < nb_queues) {
+
+ if (tqi[q].added) {
+ ret = txa_service_queue_del(id, dev, q);
+ if (ret != 0)
+ break;
+ }
+ i++;
+ q++;
}
return ret;
}
txa = txa_service_id_to_data(id);
- port_id = dev->data->port_id;
tqi = txa_service_queue(txa, port_id, tx_queue_id);
if (tqi == NULL || !tqi->added)
@@ -999,11 +1028,7 @@ rte_event_eth_tx_adapter_queue_add(uint8_t id,
TXA_CHECK_OR_ERR_RET(id);
eth_dev = &rte_eth_devices[eth_dev_id];
- if (queue != -1 && (uint16_t)queue >= eth_dev->data->nb_tx_queues) {
- RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16,
- (uint16_t)queue);
- return -EINVAL;
- }
+ TXA_CHECK_TXQ(eth_dev, queue);
caps = 0;
if (txa_dev_caps_get(id))
@@ -1034,11 +1059,6 @@ rte_event_eth_tx_adapter_queue_del(uint8_t id,
TXA_CHECK_OR_ERR_RET(id);
eth_dev = &rte_eth_devices[eth_dev_id];
- if (queue != -1 && (uint16_t)queue >= eth_dev->data->nb_tx_queues) {
- RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16,
- (uint16_t)queue);
- return -EINVAL;
- }
caps = 0;
diff --git a/lib/librte_eventdev/rte_event_timer_adapter.h b/lib/librte_eventdev/rte_event_timer_adapter.h
index d4ea6f17..db98dec4 100644
--- a/lib/librte_eventdev/rte_event_timer_adapter.h
+++ b/lib/librte_eventdev/rte_event_timer_adapter.h
@@ -461,61 +461,8 @@ rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter,
* - 0: Successfully reset;
* - <0: Failure; error code returned.
*/
-int __rte_experimental rte_event_timer_adapter_stats_reset(
- struct rte_event_timer_adapter *adapter);
-
-/**
- * Retrieve the service ID of the event timer adapter. If the adapter doesn't
- * use an rte_service function, this function returns -ESRCH.
- *
- * @param adapter
- * A pointer to an event timer adapter.
- *
- * @param [out] service_id
- * A pointer to a uint32_t, to be filled in with the service id.
- *
- * @return
- * - 0: Success
- * - <0: Error code on failure, if the event dev doesn't use a rte_service
- * function, this function returns -ESRCH.
- */
-int
-rte_event_timer_adapter_service_id_get(struct rte_event_timer_adapter *adapter,
- uint32_t *service_id);
-
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
- * Retrieve statistics for an event timer adapter instance.
- *
- * @param adapter
- * A pointer to an event timer adapter structure.
- * @param[out] stats
- * A pointer to a structure to fill with statistics.
- *
- * @return
- * - 0: Successfully retrieved.
- * - <0: Failure; error code returned.
- */
-int rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter,
- struct rte_event_timer_adapter_stats *stats);
-
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
- * Reset statistics for an event timer adapter instance.
- *
- * @param adapter
- * A pointer to an event timer adapter structure.
- *
- * @return
- * - 0: Successfully reset;
- * - <0: Failure; error code returned.
- */
-int rte_event_timer_adapter_stats_reset(
- struct rte_event_timer_adapter *adapter);
+int __rte_experimental
+rte_event_timer_adapter_stats_reset(struct rte_event_timer_adapter *adapter);
/**
* @warning
diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h
index d7eb69d1..ef10a855 100644
--- a/lib/librte_eventdev/rte_eventdev.h
+++ b/lib/librte_eventdev/rte_eventdev.h
@@ -1893,7 +1893,7 @@ rte_event_dev_xstats_names_get(uint8_t dev_id,
* @param ids
* The id numbers of the stats to get. The ids can be got from the stat
* position in the stat list from rte_event_dev_get_xstats_names(), or
- * by using rte_eventdev_get_xstats_by_name()
+ * by using rte_event_dev_xstats_by_name_get().
* @param[out] values
* The values for each stats request by ID.
* @param n
@@ -1921,7 +1921,7 @@ rte_event_dev_xstats_get(uint8_t dev_id,
* The stat name to retrieve
* @param[out] id
* If non-NULL, the numerical id of the stat will be returned, so that further
- * requests for the stat can be got using rte_eventdev_xstats_get, which will
+ * requests for the stat can be got using rte_event_dev_xstats_get, which will
* be faster as it doesn't need to scan a list of names for the stat.
* If the stat cannot be found, the id returned will be (unsigned)-1.
* @return
diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
index 2c0f35c6..7d128a43 100644
--- a/lib/librte_gro/gro_tcp4.c
+++ b/lib/librte_gro/gro_tcp4.c
@@ -198,7 +198,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
struct ipv4_hdr *ipv4_hdr;
struct tcp_hdr *tcp_hdr;
uint32_t sent_seq;
- uint16_t tcp_dl, ip_id, hdr_len, frag_off;
+ int32_t tcp_dl;
+ uint16_t ip_id, hdr_len, frag_off;
uint8_t is_atomic;
struct tcp4_flow_key key;
@@ -207,6 +208,13 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
int cmp;
uint8_t find;
+ /*
+ * Don't process the packet whose TCP header length is greater
+ * than 60 bytes or less than 20 bytes.
+ */
+ if (unlikely(INVALID_TCP_HDRLEN(pkt->l4_len)))
+ return -1;
+
eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index 6bb30cdb..d9792488 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -17,6 +17,11 @@
*/
#define MAX_IPV4_PKT_LENGTH UINT16_MAX
+/* The maximum TCP header length */
+#define MAX_TCP_HLEN 60
+#define INVALID_TCP_HDRLEN(len) \
+ (((len) < sizeof(struct tcp_hdr)) || ((len) > MAX_TCP_HLEN))
+
/* Header fields representing a TCP/IPv4 flow */
struct tcp4_flow_key {
struct ether_addr eth_saddr;
diff --git a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c
index ca86f010..acb9bc91 100644
--- a/lib/librte_gro/gro_vxlan_tcp4.c
+++ b/lib/librte_gro/gro_vxlan_tcp4.c
@@ -295,7 +295,8 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
struct udp_hdr *udp_hdr;
struct vxlan_hdr *vxlan_hdr;
uint32_t sent_seq;
- uint16_t tcp_dl, frag_off, outer_ip_id, ip_id;
+ int32_t tcp_dl;
+ uint16_t frag_off, outer_ip_id, ip_id;
uint8_t outer_is_atomic, is_atomic;
struct vxlan_tcp4_flow_key key;
@@ -305,6 +306,13 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
uint16_t hdr_len;
uint8_t find;
+ /*
+ * Don't process the packet whose TCP header length is greater
+ * than 60 bytes or less than 20 bytes.
+ */
+ if (unlikely(INVALID_TCP_HDRLEN(pkt->l4_len)))
+ return -1;
+
outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
pkt->outer_l2_len);
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
index 6cd764ff..b6ff1b88 100644
--- a/lib/librte_gso/gso_common.h
+++ b/lib/librte_gso/gso_common.h
@@ -22,12 +22,12 @@
(PKT_TX_TCP_SEG | PKT_TX_IPV4))
#define IS_IPV4_VXLAN_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
- PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_VXLAN)) == \
+ PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \
(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
PKT_TX_TUNNEL_VXLAN))
#define IS_IPV4_GRE_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
- PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_GRE)) == \
+ PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \
(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
PKT_TX_TUNNEL_GRE))
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index c55a4f26..c01489ba 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -1347,6 +1347,9 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
cached_free_slots->objs,
LCORE_CACHE_SIZE, NULL);
+ ERR_IF_TRUE((n_slots == 0),
+ "%s: could not enqueue free slots in global ring\n",
+ __func__);
cached_free_slots->len -= n_slots;
}
/* Put index of new free slot in cache. */
@@ -1552,6 +1555,7 @@ rte_hash_free_key_with_position(const struct rte_hash *h,
n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
cached_free_slots->objs,
LCORE_CACHE_SIZE, NULL);
+ RETURN_IF_TRUE((n_slots == 0), -EFAULT);
cached_free_slots->len -= n_slots;
}
/* Put index of new free slot in cache. */
@@ -2022,11 +2026,11 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
uint64_t *hit_mask, void *data[])
{
if (h->readwrite_concur_lf_support)
- return __rte_hash_lookup_bulk_lf(h, keys, num_keys,
- positions, hit_mask, data);
+ __rte_hash_lookup_bulk_lf(h, keys, num_keys, positions,
+ hit_mask, data);
else
- return __rte_hash_lookup_bulk_l(h, keys, num_keys,
- positions, hit_mask, data);
+ __rte_hash_lookup_bulk_l(h, keys, num_keys, positions,
+ hit_mask, data);
}
int
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index 5dfbbc48..eacdaa8d 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -29,6 +29,17 @@
#define RETURN_IF_TRUE(cond, retval)
#endif
+#if defined(RTE_LIBRTE_HASH_DEBUG)
+#define ERR_IF_TRUE(cond, fmt, args...) do { \
+ if (cond) { \
+ RTE_LOG(ERR, HASH, fmt, ##args); \
+ return; \
+ } \
+} while (0)
+#else
+#define ERR_IF_TRUE(cond, fmt, args...)
+#endif
+
#include <rte_hash_crc.h>
#include <rte_jhash.h>
diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
index a4ccaf9d..04fd9df5 100644
--- a/lib/librte_ip_frag/rte_ip_frag.h
+++ b/lib/librte_ip_frag/rte_ip_frag.h
@@ -115,6 +115,7 @@ struct rte_ip_frag_tbl {
#define RTE_IPV6_EHDR_MF_MASK 1
#define RTE_IPV6_EHDR_FO_SHIFT 3
#define RTE_IPV6_EHDR_FO_MASK (~((1 << RTE_IPV6_EHDR_FO_SHIFT) - 1))
+#define RTE_IPV6_EHDR_FO_ALIGN (1 << RTE_IPV6_EHDR_FO_SHIFT)
#define RTE_IPV6_FRAG_USED_MASK \
(RTE_IPV6_EHDR_MF_MASK | RTE_IPV6_EHDR_FO_MASK)
diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
index 62a7e4e8..b9437eb1 100644
--- a/lib/librte_ip_frag/rte_ipv6_fragmentation.c
+++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
@@ -77,11 +77,14 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
uint32_t out_pkt_pos, in_seg_data_pos;
uint32_t more_in_segs;
uint16_t fragment_offset, frag_size;
+ uint64_t frag_bytes_remaining;
- frag_size = (uint16_t)(mtu_size - sizeof(struct ipv6_hdr));
-
- /* Fragment size should be a multiple of 8. */
- RTE_ASSERT((frag_size & ~RTE_IPV6_EHDR_FO_MASK) == 0);
+ /*
+ * Ensure the IP payload length of all fragments (except the
+ * the last fragment) are a multiple of 8 bytes per RFC2460.
+ */
+ frag_size = RTE_ALIGN_FLOOR(mtu_size - sizeof(struct ipv6_hdr),
+ RTE_IPV6_EHDR_FO_ALIGN);
/* Check that pkts_out is big enough to hold all fragments */
if (unlikely (frag_size * nb_pkts_out <
@@ -111,6 +114,7 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
/* Reserve space for the IP header that will be built later */
out_pkt->data_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment);
out_pkt->pkt_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment);
+ frag_bytes_remaining = frag_size;
out_seg_prev = out_pkt;
more_out_segs = 1;
@@ -130,7 +134,7 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
/* Prepare indirect buffer */
rte_pktmbuf_attach(out_seg, in_seg);
- len = mtu_size - out_pkt->pkt_len;
+ len = frag_bytes_remaining;
if (len > (in_seg->data_len - in_seg_data_pos)) {
len = in_seg->data_len - in_seg_data_pos;
}
@@ -140,11 +144,11 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
out_pkt->pkt_len);
out_pkt->nb_segs += 1;
in_seg_data_pos += len;
+ frag_bytes_remaining -= len;
/* Current output packet (i.e. fragment) done ? */
- if (unlikely(out_pkt->pkt_len >= mtu_size)) {
+ if (unlikely(frag_bytes_remaining == 0))
more_out_segs = 0;
- }
/* Current input segment done ? */
if (unlikely(in_seg_data_pos == in_seg->data_len)) {
diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h
index f2a8904a..f9b90909 100644
--- a/lib/librte_net/rte_ip.h
+++ b/lib/librte_net/rte_ip.h
@@ -310,16 +310,20 @@ rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
* @param l4_hdr
* The pointer to the beginning of the L4 header.
* @return
- * The complemented checksum to set in the IP packet.
+ * The complemented checksum to set in the IP packet
+ * or 0 on error
*/
static inline uint16_t
rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr)
{
uint32_t cksum;
- uint32_t l4_len;
+ uint32_t l3_len, l4_len;
+
+ l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length);
+ if (l3_len < sizeof(struct ipv4_hdr))
+ return 0;
- l4_len = (uint32_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -
- sizeof(struct ipv4_hdr));
+ l4_len = l3_len - sizeof(struct ipv4_hdr);
cksum = rte_raw_cksum(l4_hdr, l4_len);
cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index 587d5e60..89c3d1e7 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -667,6 +667,7 @@ rte_sched_port_config(struct rte_sched_port_params *params)
params->red_params[i][j].min_th,
params->red_params[i][j].max_th,
params->red_params[i][j].maxp_inv) != 0) {
+ rte_free(port);
return NULL;
}
}
@@ -726,6 +727,7 @@ rte_sched_port_config(struct rte_sched_port_params *params)
bmp_mem_size);
if (port->bmp == NULL) {
RTE_LOG(ERR, SCHED, "Bitmap init error\n");
+ rte_free(port);
return NULL;
}
diff --git a/lib/librte_telemetry/rte_telemetry.c b/lib/librte_telemetry/rte_telemetry.c
index 016431f1..7fb247ea 100644
--- a/lib/librte_telemetry/rte_telemetry.c
+++ b/lib/librte_telemetry/rte_telemetry.c
@@ -558,7 +558,7 @@ rte_telemetry_send_ports_stats_values(uint32_t *metric_ids, int num_metric_ids,
}
ret = rte_telemetry_update_metrics_ethdev(telemetry,
- port_ids[i], telemetry->reg_index);
+ port_ids[i], telemetry->reg_index[i]);
if (ret < 0) {
TELEMETRY_LOG_ERR("Failed to update ethdev metrics");
return -1;
@@ -658,23 +658,45 @@ free_xstats:
static int32_t
rte_telemetry_initial_accept(struct telemetry_impl *telemetry)
{
+ struct driver_index {
+ const void *dev_ops;
+ int reg_index;
+ } drv_idx[RTE_MAX_ETHPORTS];
+ int nb_drv_idx = 0;
uint16_t pid;
int ret;
int selftest = 0;
RTE_ETH_FOREACH_DEV(pid) {
- telemetry->reg_index = rte_telemetry_reg_ethdev_to_metrics(pid);
- break;
- }
+ int i;
+ /* Different device types have different numbers of stats, so
+ * first check if the stats for this type of device have
+ * already been registered
+ */
+ for (i = 0; i < nb_drv_idx; i++) {
+ if (rte_eth_devices[pid].dev_ops == drv_idx[i].dev_ops) {
+ telemetry->reg_index[pid] = drv_idx[i].reg_index;
+ break;
+ }
+ }
+ if (i < nb_drv_idx)
+ continue; /* we found a match, go to next port */
- if (telemetry->reg_index < 0) {
- TELEMETRY_LOG_ERR("Failed to register ethdev metrics");
- return -1;
+ /* No match, register a new set of xstats for this port */
+ ret = rte_telemetry_reg_ethdev_to_metrics(pid);
+ if (ret < 0) {
+ TELEMETRY_LOG_ERR("Failed to register ethdev metrics");
+ return -1;
+ }
+ telemetry->reg_index[pid] = ret;
+ drv_idx[nb_drv_idx].dev_ops = rte_eth_devices[pid].dev_ops;
+ drv_idx[nb_drv_idx].reg_index = ret;
+ nb_drv_idx++;
}
telemetry->metrics_register_done = 1;
if (selftest) {
- ret = rte_telemetry_socket_messaging_testing(telemetry->reg_index,
+ ret = rte_telemetry_socket_messaging_testing(telemetry->reg_index[0],
telemetry->server_fd);
if (ret < 0)
return -1;
@@ -1299,7 +1321,7 @@ rte_telemetry_socket_messaging_testing(int index, int socket)
}
telemetry->server_fd = socket;
- telemetry->reg_index = index;
+ telemetry->reg_index[0] = index;
TELEMETRY_LOG_INFO("Beginning Telemetry socket message Selftest");
rte_telemetry_socket_test_setup(telemetry, &send_fd, &recv_fd);
TELEMETRY_LOG_INFO("Register valid client test");
diff --git a/lib/librte_telemetry/rte_telemetry_internal.h b/lib/librte_telemetry/rte_telemetry_internal.h
index de7afda3..c298c391 100644
--- a/lib/librte_telemetry/rte_telemetry_internal.h
+++ b/lib/librte_telemetry/rte_telemetry_internal.h
@@ -36,7 +36,7 @@ typedef struct telemetry_impl {
pthread_t thread_id;
int thread_status;
uint32_t socket_id;
- int reg_index;
+ int reg_index[RTE_MAX_ETHPORTS];
int metrics_register_done;
TAILQ_HEAD(, telemetry_client) client_list_head;
struct telemetry_client *request_client;
diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
index 590488c7..30c7b0ab 100644
--- a/lib/librte_timer/rte_timer.c
+++ b/lib/librte_timer/rte_timer.c
@@ -241,24 +241,17 @@ timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
}
}
-/*
- * add in list, lock if needed
+/* call with lock held as necessary
+ * add in list
* timer must be in config state
* timer must not be in a list
*/
static void
-timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
+timer_add(struct rte_timer *tim, unsigned int tim_lcore)
{
- unsigned lcore_id = rte_lcore_id();
unsigned lvl;
struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
- /* if timer needs to be scheduled on another core, we need to
- * lock the list; if it is on local core, we need to lock if
- * we are not called from rte_timer_manage() */
- if (tim_lcore != lcore_id || !local_is_locked)
- rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
-
/* find where exactly this element goes in the list of elements
* for each depth. */
timer_get_prev_entries(tim->expire, tim_lcore, prev);
@@ -282,9 +275,6 @@ timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
* NOTE: this is not atomic on 32-bit*/
priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
pending_head.sl_next[0]->expire;
-
- if (tim_lcore != lcore_id || !local_is_locked)
- rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
}
/*
@@ -379,8 +369,15 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
tim->f = fct;
tim->arg = arg;
+ /* if timer needs to be scheduled on another core, we need to
+ * lock the destination list; if it is on local core, we need to lock if
+ * we are not called from rte_timer_manage()
+ */
+ if (tim_lcore != lcore_id || !local_is_locked)
+ rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
+
__TIMER_STAT_ADD(pending, 1);
- timer_add(tim, tim_lcore, local_is_locked);
+ timer_add(tim, tim_lcore);
/* update state: as we are in CONFIG state, only us can modify
* the state so we don't need to use cmpset() here */
@@ -389,6 +386,9 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
status.owner = (int16_t)tim_lcore;
tim->status.u32 = status.u32;
+ if (tim_lcore != lcore_id || !local_is_locked)
+ rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
+
return 0;
}
diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
index 38347ab1..55d4856f 100644
--- a/lib/librte_vhost/fd_man.c
+++ b/lib/librte_vhost/fd_man.c
@@ -129,7 +129,9 @@ fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
pthread_mutex_lock(&pfdset->fd_mutex);
i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
if (i == -1) {
+ pthread_mutex_lock(&pfdset->fd_pooling_mutex);
fdset_shrink_nolock(pfdset);
+ pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
if (i == -1) {
pthread_mutex_unlock(&pfdset->fd_mutex);
@@ -246,7 +248,9 @@ fdset_event_dispatch(void *arg)
numfds = pfdset->num;
pthread_mutex_unlock(&pfdset->fd_mutex);
+ pthread_mutex_lock(&pfdset->fd_pooling_mutex);
val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
+ pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
if (val < 0)
continue;
diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
index 3331bcd9..3ab5cfdd 100644
--- a/lib/librte_vhost/fd_man.h
+++ b/lib/librte_vhost/fd_man.h
@@ -24,6 +24,7 @@ struct fdset {
struct pollfd rwfds[MAX_FDS];
struct fdentry fd[MAX_FDS];
pthread_mutex_t fd_mutex;
+ pthread_mutex_t fd_pooling_mutex;
int num; /* current fd number of this fdset */
union pipefds {
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 01b60ff9..9883b049 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -90,6 +90,7 @@ static struct vhost_user vhost_user = {
.fdset = {
.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
+ .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
.num = 0
},
.vsocket_cnt = 0,
@@ -960,13 +961,13 @@ rte_vhost_driver_unregister(const char *path)
int count;
struct vhost_user_connection *conn, *next;
+again:
pthread_mutex_lock(&vhost_user.mutex);
for (i = 0; i < vhost_user.vsocket_cnt; i++) {
struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
if (!strcmp(vsocket->path, path)) {
-again:
pthread_mutex_lock(&vsocket->conn_mutex);
for (conn = TAILQ_FIRST(&vsocket->conn_list);
conn != NULL;
@@ -982,6 +983,7 @@ again:
conn->connfd) == -1) {
pthread_mutex_unlock(
&vsocket->conn_mutex);
+ pthread_mutex_unlock(&vhost_user.mutex);
goto again;
}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 5218f1b1..552b9298 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -393,8 +393,10 @@ vq_is_packed(struct virtio_net *dev)
static inline bool
desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
{
- return wrap_counter == !!(desc->flags & VRING_DESC_F_AVAIL) &&
- wrap_counter != !!(desc->flags & VRING_DESC_F_USED);
+ uint16_t flags = *((volatile uint16_t *) &desc->flags);
+
+ return wrap_counter == !!(flags & VRING_DESC_F_AVAIL) &&
+ wrap_counter != !!(flags & VRING_DESC_F_USED);
}
#define VHOST_LOG_PAGE 4096
diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c
index dd01afc0..0694c0a7 100644
--- a/lib/librte_vhost/vhost_crypto.c
+++ b/lib/librte_vhost/vhost_crypto.c
@@ -466,12 +466,17 @@ vhost_crypto_msg_post_handler(int vid, void *msg)
}
static __rte_always_inline struct vring_desc *
-find_write_desc(struct vring_desc *head, struct vring_desc *desc)
+find_write_desc(struct vring_desc *head, struct vring_desc *desc,
+ uint32_t *nb_descs, uint32_t vq_size)
{
if (desc->flags & VRING_DESC_F_WRITE)
return desc;
while (desc->flags & VRING_DESC_F_NEXT) {
+ if (unlikely(*nb_descs == 0 || desc->next >= vq_size))
+ return NULL;
+ (*nb_descs)--;
+
desc = &head[desc->next];
if (desc->flags & VRING_DESC_F_WRITE)
return desc;
@@ -481,13 +486,18 @@ find_write_desc(struct vring_desc *head, struct vring_desc *desc)
}
static struct virtio_crypto_inhdr *
-reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc)
+reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc,
+ uint32_t *nb_descs, uint32_t vq_size)
{
uint64_t dlen;
struct virtio_crypto_inhdr *inhdr;
- while (desc->flags & VRING_DESC_F_NEXT)
+ while (desc->flags & VRING_DESC_F_NEXT) {
+ if (unlikely(*nb_descs == 0 || desc->next >= vq_size))
+ return NULL;
+ (*nb_descs)--;
desc = &vc_req->head[desc->next];
+ }
dlen = desc->len;
inhdr = IOVA_TO_VVA(struct virtio_crypto_inhdr *, vc_req, desc->addr,
@@ -500,15 +510,16 @@ reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc)
static __rte_always_inline int
move_desc(struct vring_desc *head, struct vring_desc **cur_desc,
- uint32_t size)
+ uint32_t size, uint32_t *nb_descs, uint32_t vq_size)
{
struct vring_desc *desc = *cur_desc;
- int left = size;
-
- rte_prefetch0(&head[desc->next]);
- left -= desc->len;
+ int left = size - desc->len;
while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
+ (*nb_descs)--;
+ if (unlikely(*nb_descs == 0 || desc->next >= vq_size))
+ return -1;
+
desc = &head[desc->next];
rte_prefetch0(&head[desc->next]);
left -= desc->len;
@@ -517,7 +528,14 @@ move_desc(struct vring_desc *head, struct vring_desc **cur_desc,
if (unlikely(left > 0))
return -1;
- *cur_desc = &head[desc->next];
+ if (unlikely(*nb_descs == 0))
+ *cur_desc = NULL;
+ else {
+ if (unlikely(desc->next >= vq_size))
+ return -1;
+ *cur_desc = &head[desc->next];
+ }
+
return 0;
}
@@ -539,7 +557,8 @@ get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc *cur_desc,
static int
copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
- struct vring_desc **cur_desc, uint32_t size)
+ struct vring_desc **cur_desc, uint32_t size,
+ uint32_t *nb_descs, uint32_t vq_size)
{
struct vring_desc *desc = *cur_desc;
uint64_t remain, addr, dlen, len;
@@ -548,7 +567,6 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
uint8_t *src;
int left = size;
- rte_prefetch0(&vc_req->head[desc->next]);
to_copy = RTE_MIN(desc->len, (uint32_t)left);
dlen = to_copy;
src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
@@ -582,6 +600,12 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
left -= to_copy;
while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
+ if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) {
+ VC_LOG_ERR("Invalid descriptors");
+ return -1;
+ }
+ (*nb_descs)--;
+
desc = &vc_req->head[desc->next];
rte_prefetch0(&vc_req->head[desc->next]);
to_copy = RTE_MIN(desc->len, (uint32_t)left);
@@ -624,7 +648,13 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
return -1;
}
- *cur_desc = &vc_req->head[desc->next];
+ if (unlikely(*nb_descs == 0))
+ *cur_desc = NULL;
+ else {
+ if (unlikely(desc->next >= vq_size))
+ return -1;
+ *cur_desc = &vc_req->head[desc->next];
+ }
return 0;
}
@@ -635,7 +665,6 @@ write_back_data(struct vhost_crypto_data_req *vc_req)
struct vhost_crypto_writeback_data *wb_data = vc_req->wb, *wb_last;
while (wb_data) {
- rte_prefetch0(wb_data->next);
rte_memcpy(wb_data->dst, wb_data->src, wb_data->len);
wb_last = wb_data;
wb_data = wb_data->next;
@@ -684,7 +713,8 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req,
struct vhost_crypto_writeback_data **end_wb_data,
uint8_t *src,
uint32_t offset,
- uint64_t write_back_len)
+ uint64_t write_back_len,
+ uint32_t *nb_descs, uint32_t vq_size)
{
struct vhost_crypto_writeback_data *wb_data, *head;
struct vring_desc *desc = *cur_desc;
@@ -731,6 +761,12 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req,
offset -= desc->len;
while (write_back_len) {
+ if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) {
+ VC_LOG_ERR("Invalid descriptors");
+ goto error_exit;
+ }
+ (*nb_descs)--;
+
desc = &vc_req->head[desc->next];
if (unlikely(!(desc->flags & VRING_DESC_F_WRITE))) {
VC_LOG_ERR("incorrect descriptor");
@@ -770,7 +806,13 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req,
wb_data->next = NULL;
}
- *cur_desc = &vc_req->head[desc->next];
+ if (unlikely(*nb_descs == 0))
+ *cur_desc = NULL;
+ else {
+ if (unlikely(desc->next >= vq_size))
+ goto error_exit;
+ *cur_desc = &vc_req->head[desc->next];
+ }
*end_wb_data = wb_data;
@@ -787,7 +829,8 @@ static uint8_t
prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
struct vhost_crypto_data_req *vc_req,
struct virtio_crypto_cipher_data_req *cipher,
- struct vring_desc *cur_desc)
+ struct vring_desc *cur_desc,
+ uint32_t *nb_descs, uint32_t vq_size)
{
struct vring_desc *desc = cur_desc;
struct vhost_crypto_writeback_data *ewb = NULL;
@@ -797,8 +840,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
/* prepare */
/* iv */
- if (unlikely(copy_data(iv_data, vc_req, &desc,
- cipher->para.iv_len) < 0)) {
+ if (unlikely(copy_data(iv_data, vc_req, &desc, cipher->para.iv_len,
+ nb_descs, vq_size) < 0)) {
ret = VIRTIO_CRYPTO_BADMSG;
goto error_exit;
}
@@ -818,7 +861,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
}
if (unlikely(move_desc(vc_req->head, &desc,
- cipher->para.src_data_len) < 0)) {
+ cipher->para.src_data_len, nb_descs,
+ vq_size) < 0)) {
VC_LOG_ERR("Incorrect descriptor");
ret = VIRTIO_CRYPTO_ERR;
goto error_exit;
@@ -835,8 +879,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
goto error_exit;
}
if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *),
- vc_req, &desc, cipher->para.src_data_len)
- < 0)) {
+ vc_req, &desc, cipher->para.src_data_len,
+ nb_descs, vq_size) < 0)) {
ret = VIRTIO_CRYPTO_BADMSG;
goto error_exit;
}
@@ -847,7 +891,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
}
/* dst */
- desc = find_write_desc(vc_req->head, desc);
+ desc = find_write_desc(vc_req->head, desc, nb_descs, vq_size);
if (unlikely(!desc)) {
VC_LOG_ERR("Cannot find write location");
ret = VIRTIO_CRYPTO_BADMSG;
@@ -866,7 +910,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
}
if (unlikely(move_desc(vc_req->head, &desc,
- cipher->para.dst_data_len) < 0)) {
+ cipher->para.dst_data_len,
+ nb_descs, vq_size) < 0)) {
VC_LOG_ERR("Incorrect descriptor");
ret = VIRTIO_CRYPTO_ERR;
goto error_exit;
@@ -877,7 +922,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
vc_req->wb = prepare_write_back_data(vc_req, &desc, &ewb,
rte_pktmbuf_mtod(m_src, uint8_t *), 0,
- cipher->para.dst_data_len);
+ cipher->para.dst_data_len, nb_descs, vq_size);
if (unlikely(vc_req->wb == NULL)) {
ret = VIRTIO_CRYPTO_ERR;
goto error_exit;
@@ -919,7 +964,8 @@ static uint8_t
prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
struct vhost_crypto_data_req *vc_req,
struct virtio_crypto_alg_chain_data_req *chain,
- struct vring_desc *cur_desc)
+ struct vring_desc *cur_desc,
+ uint32_t *nb_descs, uint32_t vq_size)
{
struct vring_desc *desc = cur_desc, *digest_desc;
struct vhost_crypto_writeback_data *ewb = NULL, *ewb2 = NULL;
@@ -932,7 +978,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
/* prepare */
/* iv */
if (unlikely(copy_data(iv_data, vc_req, &desc,
- chain->para.iv_len) < 0)) {
+ chain->para.iv_len, nb_descs, vq_size) < 0)) {
ret = VIRTIO_CRYPTO_BADMSG;
goto error_exit;
}
@@ -953,7 +999,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
}
if (unlikely(move_desc(vc_req->head, &desc,
- chain->para.src_data_len) < 0)) {
+ chain->para.src_data_len,
+ nb_descs, vq_size) < 0)) {
VC_LOG_ERR("Incorrect descriptor");
ret = VIRTIO_CRYPTO_ERR;
goto error_exit;
@@ -969,7 +1016,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
goto error_exit;
}
if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *),
- vc_req, &desc, chain->para.src_data_len)) < 0) {
+ vc_req, &desc, chain->para.src_data_len,
+ nb_descs, vq_size)) < 0) {
ret = VIRTIO_CRYPTO_BADMSG;
goto error_exit;
}
@@ -981,7 +1029,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
}
/* dst */
- desc = find_write_desc(vc_req->head, desc);
+ desc = find_write_desc(vc_req->head, desc, nb_descs, vq_size);
if (unlikely(!desc)) {
VC_LOG_ERR("Cannot find write location");
ret = VIRTIO_CRYPTO_BADMSG;
@@ -1000,7 +1048,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
}
if (unlikely(move_desc(vc_req->head, &desc,
- chain->para.dst_data_len) < 0)) {
+ chain->para.dst_data_len,
+ nb_descs, vq_size) < 0)) {
VC_LOG_ERR("Incorrect descriptor");
ret = VIRTIO_CRYPTO_ERR;
goto error_exit;
@@ -1017,7 +1066,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
}
if (unlikely(move_desc(vc_req->head, &desc,
- chain->para.hash_result_len) < 0)) {
+ chain->para.hash_result_len,
+ nb_descs, vq_size) < 0)) {
VC_LOG_ERR("Incorrect descriptor");
ret = VIRTIO_CRYPTO_ERR;
goto error_exit;
@@ -1029,7 +1079,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
rte_pktmbuf_mtod(m_src, uint8_t *),
chain->para.cipher_start_src_offset,
chain->para.dst_data_len -
- chain->para.cipher_start_src_offset);
+ chain->para.cipher_start_src_offset,
+ nb_descs, vq_size);
if (unlikely(vc_req->wb == NULL)) {
ret = VIRTIO_CRYPTO_ERR;
goto error_exit;
@@ -1042,14 +1093,16 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
/** create a wb_data for digest */
ewb->next = prepare_write_back_data(vc_req, &desc, &ewb2,
- digest_addr, 0, chain->para.hash_result_len);
+ digest_addr, 0, chain->para.hash_result_len,
+ nb_descs, vq_size);
if (unlikely(ewb->next == NULL)) {
ret = VIRTIO_CRYPTO_ERR;
goto error_exit;
}
if (unlikely(copy_data(digest_addr, vc_req, &digest_desc,
- chain->para.hash_result_len)) < 0) {
+ chain->para.hash_result_len,
+ nb_descs, vq_size)) < 0) {
ret = VIRTIO_CRYPTO_BADMSG;
goto error_exit;
}
@@ -1108,6 +1161,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
struct vring_desc *desc = NULL;
uint64_t session_id;
uint64_t dlen;
+ uint32_t nb_descs = vq->size;
int err = 0;
vc_req->desc_idx = desc_idx;
@@ -1116,6 +1170,10 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
if (likely(head->flags & VRING_DESC_F_INDIRECT)) {
dlen = head->len;
+ nb_descs = dlen / sizeof(struct vring_desc);
+ /* drop invalid descriptors */
+ if (unlikely(nb_descs > vq->size))
+ return -1;
desc = IOVA_TO_VVA(struct vring_desc *, vc_req, head->addr,
&dlen, VHOST_ACCESS_RO);
if (unlikely(!desc || dlen != head->len))
@@ -1138,8 +1196,8 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
goto error_exit;
case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
req = &tmp_req;
- if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req))
- < 0)) {
+ if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req),
+ &nb_descs, vq->size) < 0)) {
err = VIRTIO_CRYPTO_BADMSG;
VC_LOG_ERR("Invalid descriptor");
goto error_exit;
@@ -1152,7 +1210,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
}
} else {
if (unlikely(move_desc(vc_req->head, &desc,
- sizeof(*req)) < 0)) {
+ sizeof(*req), &nb_descs, vq->size) < 0)) {
VC_LOG_ERR("Incorrect descriptor");
goto error_exit;
}
@@ -1193,11 +1251,13 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
break;
case VIRTIO_CRYPTO_SYM_OP_CIPHER:
err = prepare_sym_cipher_op(vcrypto, op, vc_req,
- &req->u.sym_req.u.cipher, desc);
+ &req->u.sym_req.u.cipher, desc,
+ &nb_descs, vq->size);
break;
case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING:
err = prepare_sym_chain_op(vcrypto, op, vc_req,
- &req->u.sym_req.u.chain, desc);
+ &req->u.sym_req.u.chain, desc,
+ &nb_descs, vq->size);
break;
}
if (unlikely(err != 0)) {
@@ -1215,7 +1275,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
error_exit:
- inhdr = reach_inhdr(vc_req, desc);
+ inhdr = reach_inhdr(vc_req, desc, &nb_descs, vq->size);
if (likely(inhdr != NULL))
inhdr->status = (uint8_t)err;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 3ea64eba..19e04c95 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -489,6 +489,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len)
struct rte_vhost_mem_region *r;
uint32_t i;
+ if (unlikely(!dev || !dev->mem))
+ goto out_error;
+
/* Find the region where the address lives. */
for (i = 0; i < dev->mem->nregions; i++) {
r = &dev->mem->regions[i];
@@ -503,6 +506,7 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len)
r->host_user_addr;
}
}
+out_error:
*len = 0;
return 0;
@@ -537,7 +541,7 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
{
struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
struct vhost_vring_addr *addr = &vq->ring_addrs;
- uint64_t len;
+ uint64_t len, expected_len;
if (vq_is_packed(dev)) {
len = sizeof(struct vring_packed_desc) * vq->size;
@@ -603,11 +607,12 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
addr = &vq->ring_addrs;
len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size;
+ if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+ len += sizeof(uint16_t);
+ expected_len = len;
vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
vq, addr->avail_user_addr, &len);
- if (vq->avail == 0 ||
- len != sizeof(struct vring_avail) +
- sizeof(uint16_t) * vq->size) {
+ if (vq->avail == 0 || len != expected_len) {
RTE_LOG(DEBUG, VHOST_CONFIG,
"(%d) failed to map avail ring.\n",
dev->vid);
@@ -616,10 +621,12 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
len = sizeof(struct vring_used) +
sizeof(struct vring_used_elem) * vq->size;
+ if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+ len += sizeof(uint16_t);
+ expected_len = len;
vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
vq, addr->used_user_addr, &len);
- if (vq->used == 0 || len != sizeof(struct vring_used) +
- sizeof(struct vring_used_elem) * vq->size) {
+ if (vq->used == 0 || len != expected_len) {
RTE_LOG(DEBUG, VHOST_CONFIG,
"(%d) failed to map used ring.\n",
dev->vid);
@@ -726,13 +733,16 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
uint64_t host_phys_addr, uint64_t size)
{
struct guest_page *page, *last_page;
+ struct guest_page *old_pages;
if (dev->nr_guest_pages == dev->max_guest_pages) {
dev->max_guest_pages *= 2;
+ old_pages = dev->guest_pages;
dev->guest_pages = realloc(dev->guest_pages,
dev->max_guest_pages * sizeof(*page));
if (!dev->guest_pages) {
RTE_LOG(ERR, VHOST_CONFIG, "cannot realloc guest_pages\n");
+ free(old_pages);
return -1;
}
}
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 5e1a1a72..15d682c3 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -335,13 +335,22 @@ fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint16_t vec_id = *vec_idx;
uint32_t len = 0;
uint64_t dlen;
+ uint32_t nr_descs = vq->size;
+ uint32_t cnt = 0;
struct vring_desc *descs = vq->desc;
struct vring_desc *idesc = NULL;
+ if (unlikely(idx >= vq->size))
+ return -1;
+
*desc_chain_head = idx;
if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
dlen = vq->desc[idx].len;
+ nr_descs = dlen / sizeof(struct vring_desc);
+ if (unlikely(nr_descs > vq->size))
+ return -1;
+
descs = (struct vring_desc *)(uintptr_t)
vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
&dlen,
@@ -366,7 +375,7 @@ fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
while (1) {
- if (unlikely(idx >= vq->size)) {
+ if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) {
free_ind_table(idesc);
return -1;
}
@@ -520,6 +529,12 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
return -1;
+ /*
+ * The ordering between desc flags and desc
+ * content reads need to be enforced.
+ */
+ rte_smp_rmb();
+
*desc_count = 0;
*len = 0;
@@ -527,6 +542,9 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
if (unlikely(vec_id >= BUF_VECTOR_MAX))
return -1;
+ if (unlikely(*desc_count >= vq->size))
+ return -1;
+
*desc_count += 1;
*buf_id = descs[avail_idx].id;
@@ -791,6 +809,12 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
avail_head = *((volatile uint16_t *)&vq->avail->idx);
+ /*
+ * The ordering between avail index and
+ * desc reads needs to be enforced.
+ */
+ rte_smp_rmb();
+
for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
uint16_t nr_vec = 0;
@@ -1373,6 +1397,12 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
if (free_entries == 0)
return 0;
+ /*
+ * The ordering between avail index and
+ * desc reads needs to be enforced.
+ */
+ rte_smp_rmb();
+
VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
count = RTE_MIN(count, MAX_PKT_BURST);
diff --git a/lib/meson.build b/lib/meson.build
index bb7f443f..df4226c5 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -79,7 +79,7 @@ foreach l:libraries
foreach d:deps
if not is_variable('shared_rte_' + d)
error('Missing dependency ' + d +
- ' for library ' + lib_name)
+ ' for library ' + libname)
endif
shared_deps += [get_variable('shared_rte_' + d)]
static_deps += [get_variable('static_rte_' + d)]