aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_eal/linuxapp/eal
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_eal/linuxapp/eal')
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c150
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memalloc.c50
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c2
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.c91
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.h12
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c16
6 files changed, 293 insertions, 28 deletions
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 361744d4..30138b63 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -13,7 +13,9 @@
#include <syslog.h>
#include <getopt.h>
#include <sys/file.h>
+#include <dirent.h>
#include <fcntl.h>
+#include <fnmatch.h>
#include <stddef.h>
#include <errno.h>
#include <limits.h>
@@ -123,7 +125,7 @@ eal_create_runtime_dir(void)
/* create prefix-specific subdirectory under DPDK runtime dir */
ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
- tmp, internal_config.hugefile_prefix);
+ tmp, eal_get_hugefile_prefix());
if (ret < 0 || ret == sizeof(runtime_dir)) {
RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
return -1;
@@ -149,6 +151,91 @@ eal_create_runtime_dir(void)
return 0;
}
+int
+eal_clean_runtime_dir(void)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ int dir_fd, fd, lck_result;
+ static const char * const filters[] = {
+ "fbarray_*",
+ "mp_socket_*"
+ };
+
+ /* open directory */
+ dir = opendir(runtime_dir);
+ if (!dir) {
+ RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+ dir_fd = dirfd(dir);
+
+ /* lock the directory before doing anything, to avoid races */
+ if (flock(dir_fd, LOCK_EX) < 0) {
+ RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+
+ dirent = readdir(dir);
+ if (!dirent) {
+ RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+
+ while (dirent != NULL) {
+ unsigned int f_idx;
+ bool skip = true;
+
+ /* skip files that don't match the patterns */
+ for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) {
+ const char *filter = filters[f_idx];
+
+ if (fnmatch(filter, dirent->d_name, 0) == 0) {
+ skip = false;
+ break;
+ }
+ }
+ if (skip) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* try and lock the file */
+ fd = openat(dir_fd, dirent->d_name, O_RDONLY);
+
+ /* skip to next file */
+ if (fd == -1) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* non-blocking lock */
+ lck_result = flock(fd, LOCK_EX | LOCK_NB);
+
+ /* if lock succeeds, remove the file */
+ if (lck_result != -1)
+ unlinkat(dir_fd, dirent->d_name, 0);
+ close(fd);
+ dirent = readdir(dir);
+ }
+
+ /* closedir closes dir_fd and drops the lock */
+ closedir(dir);
+ return 0;
+
+error:
+ if (dir)
+ closedir(dir);
+
+ RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n",
+ strerror(errno));
+
+ return -1;
+}
+
const char *
rte_eal_get_runtime_dir(void)
{
@@ -494,10 +581,6 @@ eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
socket_arg[i] = val;
}
- /* check if we have a positive amount of total memory */
- if (total_mem == 0)
- return -1;
-
return 0;
}
@@ -639,13 +722,31 @@ eal_parse_args(int argc, char **argv)
exit(EXIT_SUCCESS);
case OPT_HUGE_DIR_NUM:
- internal_config.hugepage_dir = strdup(optarg);
+ {
+ char *hdir = strdup(optarg);
+ if (hdir == NULL)
+ RTE_LOG(ERR, EAL, "Could not store hugepage directory\n");
+ else {
+ /* free old hugepage dir */
+ if (internal_config.hugepage_dir != NULL)
+ free(internal_config.hugepage_dir);
+ internal_config.hugepage_dir = hdir;
+ }
break;
-
+ }
case OPT_FILE_PREFIX_NUM:
- internal_config.hugefile_prefix = strdup(optarg);
+ {
+ char *prefix = strdup(optarg);
+ if (prefix == NULL)
+ RTE_LOG(ERR, EAL, "Could not store file prefix\n");
+ else {
+ /* free old prefix */
+ if (internal_config.hugefile_prefix != NULL)
+ free(internal_config.hugefile_prefix);
+ internal_config.hugefile_prefix = prefix;
+ }
break;
-
+ }
case OPT_SOCKET_MEM_NUM:
if (eal_parse_socket_arg(optarg,
internal_config.socket_mem) < 0) {
@@ -695,10 +796,21 @@ eal_parse_args(int argc, char **argv)
break;
case OPT_MBUF_POOL_OPS_NAME_NUM:
- internal_config.user_mbuf_pool_ops_name =
- strdup(optarg);
+ {
+ char *ops_name = strdup(optarg);
+ if (ops_name == NULL)
+ RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n");
+ else {
+ /* free old ops name */
+ if (internal_config.user_mbuf_pool_ops_name !=
+ NULL)
+ free(internal_config.user_mbuf_pool_ops_name);
+
+ internal_config.user_mbuf_pool_ops_name =
+ ops_name;
+ }
break;
-
+ }
default:
if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
RTE_LOG(ERR, EAL, "Option %c is not supported "
@@ -1096,6 +1208,18 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ /*
+ * Clean up unused files in runtime directory. We do this at the end of
+ * init and not at the beginning because we want to clean stuff up
+ * whether we are primary or secondary process, but we cannot remove
+ * primary process' files because secondary should be able to run even
+ * if primary process is dead.
+ */
+ if (eal_clean_runtime_dir() < 0) {
+ rte_eal_init_alert("Cannot clear runtime directory\n");
+ return -1;
+ }
+
rte_eal_mcfg_complete();
/* Call each registered callback, if enabled */
@@ -1130,6 +1254,8 @@ rte_eal_cleanup(void)
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
rte_memseg_walk(mark_freeable, NULL);
rte_service_finalize();
+ rte_mp_channel_cleanup();
+ eal_cleanup_config(&internal_config);
return 0;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 78493956..f63d9ca6 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -23,6 +23,10 @@
#include <sys/time.h>
#include <signal.h>
#include <setjmp.h>
+#ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */
+#include <linux/memfd.h>
+#define MEMFD_SUPPORTED
+#endif
#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
#include <numa.h>
#include <numaif.h>
@@ -53,8 +57,8 @@ const int anonymous_hugepages_supported =
#endif
/*
- * we don't actually care if memfd itself is supported - we only need to check
- * if memfd supports hugetlbfs, as that already implies memfd support.
+ * we've already checked memfd support at compile-time, but we also need to
+ * check if we can create hugepage files with memfd.
*
* also, this is not a constant, because while we may be *compiled* with memfd
* hugetlbfs support, we might not be *running* on a system that supports memfd
@@ -63,10 +67,11 @@ const int anonymous_hugepages_supported =
*/
static int memfd_create_supported =
#ifdef MFD_HUGETLB
-#define MEMFD_SUPPORTED
1;
+#define RTE_MFD_HUGETLB MFD_HUGETLB
#else
0;
+#define RTE_MFD_HUGETLB 4U
#endif
/*
@@ -171,7 +176,7 @@ prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id)
RTE_LOG(ERR, EAL,
"Failed to get current mempolicy: %s. "
"Assuming MPOL_DEFAULT.\n", strerror(errno));
- oldpolicy = MPOL_DEFAULT;
+ *oldpolicy = MPOL_DEFAULT;
}
RTE_LOG(DEBUG, EAL,
"Setting policy MPOL_PREFERRED for socket %d\n",
@@ -338,12 +343,12 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused,
int fd;
char segname[250]; /* as per manpage, limit is 249 bytes plus null */
+ int flags = RTE_MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
+
if (internal_config.single_file_segments) {
fd = fd_list[list_idx].memseg_list_fd;
if (fd < 0) {
- int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
-
snprintf(segname, sizeof(segname), "seg_%i", list_idx);
fd = memfd_create(segname, flags);
if (fd < 0) {
@@ -357,8 +362,6 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused,
fd = fd_list[list_idx].fds[seg_idx];
if (fd < 0) {
- int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
-
snprintf(segname, sizeof(segname), "seg_%i-%i",
list_idx, seg_idx);
fd = memfd_create(segname, flags);
@@ -633,13 +636,13 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
int mmap_flags;
if (internal_config.in_memory && !memfd_create_supported) {
- int pagesz_flag, flags;
+ const int in_memory_flags = MAP_HUGETLB | MAP_FIXED |
+ MAP_PRIVATE | MAP_ANONYMOUS;
+ int pagesz_flag;
pagesz_flag = pagesz_flags(alloc_sz);
- flags = pagesz_flag | MAP_HUGETLB | MAP_FIXED |
- MAP_PRIVATE | MAP_ANONYMOUS;
fd = -1;
- mmap_flags = flags;
+ mmap_flags = in_memory_flags | pagesz_flag;
/* single-file segments codepath will never be active
* here because in-memory mode is incompatible with the
@@ -1542,6 +1545,17 @@ int
eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
{
int fd;
+
+ if (internal_config.in_memory || internal_config.no_hugetlbfs) {
+#ifndef MEMFD_SUPPORTED
+ /* in in-memory or no-huge mode, we rely on memfd support */
+ return -ENOTSUP;
+#endif
+ /* memfd supported, but hugetlbfs memfd may not be */
+ if (!internal_config.no_hugetlbfs && !memfd_create_supported)
+ return -ENOTSUP;
+ }
+
if (internal_config.single_file_segments) {
fd = fd_list[list_idx].memseg_list_fd;
} else if (fd_list[list_idx].len == 0) {
@@ -1565,7 +1579,7 @@ test_memfd_create(void)
int pagesz_flag = pagesz_flags(pagesz);
int flags;
- flags = pagesz_flag | MFD_HUGETLB;
+ flags = pagesz_flag | RTE_MFD_HUGETLB;
int fd = memfd_create("test", flags);
if (fd < 0) {
/* we failed - let memalloc know this isn't working */
@@ -1589,6 +1603,16 @@ eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ if (internal_config.in_memory || internal_config.no_hugetlbfs) {
+#ifndef MEMFD_SUPPORTED
+ /* in in-memory or no-huge mode, we rely on memfd support */
+ return -ENOTSUP;
+#endif
+ /* memfd supported, but hugetlbfs memfd may not be */
+ if (!internal_config.no_hugetlbfs && !memfd_create_supported)
+ return -ENOTSUP;
+ }
+
/* fd_list not initialized? */
if (fd_list[list_idx].len == 0)
return -ENODEV;
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 32feb415..e05da74c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -434,7 +434,7 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
}
snprintf(hugedir_str, sizeof(hugedir_str),
- "%s/%s", hpi->hugedir, internal_config.hugefile_prefix);
+ "%s/%s", hpi->hugedir, eal_get_hugefile_prefix());
/* parse numa map */
while (fgets(buf, sizeof(buf), f) != NULL) {
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 0516b159..c821e838 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -549,6 +549,65 @@ next:
}
}
+static int
+vfio_sync_default_container(void)
+{
+ struct rte_mp_msg mp_req, *mp_rep;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+ int iommu_type_id;
+ unsigned int i;
+
+ /* cannot be called from primary */
+ if (rte_eal_process_type() != RTE_PROC_SECONDARY)
+ return -1;
+
+ /* default container fd should have been opened in rte_vfio_enable() */
+ if (!default_vfio_cfg->vfio_enabled ||
+ default_vfio_cfg->vfio_container_fd < 0) {
+ RTE_LOG(ERR, EAL, "VFIO support is not initialized\n");
+ return -1;
+ }
+
+ /* find default container's IOMMU type */
+ p->req = SOCKET_REQ_IOMMU_TYPE;
+ strcpy(mp_req.name, EAL_VFIO_MP);
+ mp_req.len_param = sizeof(*p);
+ mp_req.num_fds = 0;
+
+ iommu_type_id = -1;
+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+ mp_reply.nb_received == 1) {
+ mp_rep = &mp_reply.msgs[0];
+ p = (struct vfio_mp_param *)mp_rep->param;
+ if (p->result == SOCKET_OK)
+ iommu_type_id = p->iommu_type_id;
+ free(mp_reply.msgs);
+ }
+ if (iommu_type_id < 0) {
+ RTE_LOG(ERR, EAL, "Could not get IOMMU type for default container\n");
+ return -1;
+ }
+
+ /* we now have an fd for default container, as well as its IOMMU type.
+ * now, set up default VFIO container config to match.
+ */
+ for (i = 0; i < RTE_DIM(iommu_types); i++) {
+ const struct vfio_iommu_type *t = &iommu_types[i];
+ if (t->type_id != iommu_type_id)
+ continue;
+
+ /* we found our IOMMU type */
+ default_vfio_cfg->vfio_iommu_type = t;
+
+ return 0;
+ }
+ RTE_LOG(ERR, EAL, "Could not find IOMMU type id (%i)\n",
+ iommu_type_id);
+ return -1;
+}
+
int
rte_vfio_clear_group(int vfio_group_fd)
{
@@ -745,6 +804,26 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
else
RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n");
}
+ } else if (rte_eal_process_type() != RTE_PROC_PRIMARY &&
+ vfio_cfg == default_vfio_cfg &&
+ vfio_cfg->vfio_iommu_type == NULL) {
+ /* if we're not a primary process, we do not set up the VFIO
+ * container because it's already been set up by the primary
+ * process. instead, we simply ask the primary about VFIO type
+ * we are using, and set the VFIO config up appropriately.
+ */
+ ret = vfio_sync_default_container();
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Could not sync default VFIO container\n");
+ close(vfio_group_fd);
+ rte_vfio_clear_group(vfio_group_fd);
+ return -1;
+ }
+ /* we have successfully initialized VFIO, notify user */
+ const struct vfio_iommu_type *t =
+ default_vfio_cfg->vfio_iommu_type;
+ RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",
+ t->type_id, t->name);
}
/* get a file descriptor for the device */
@@ -857,7 +936,8 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
/* if there are no active device groups, unregister the callback to
* avoid spurious attempts to map/unmap memory from VFIO.
*/
- if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0)
+ if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0 &&
+ rte_eal_process_type() != RTE_PROC_SECONDARY)
rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME,
NULL);
@@ -977,6 +1057,15 @@ vfio_get_default_container_fd(void)
return -1;
}
+int
+vfio_get_iommu_type(void)
+{
+ if (default_vfio_cfg->vfio_iommu_type == NULL)
+ return -1;
+
+ return default_vfio_cfg->vfio_iommu_type->type_id;
+}
+
const struct vfio_iommu_type *
vfio_set_iommu_type(int vfio_container_fd)
{
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 63ae115c..cb2d35fb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -5,6 +5,8 @@
#ifndef EAL_VFIO_H_
#define EAL_VFIO_H_
+#include <rte_common.h>
+
/*
* determine if VFIO is present on the system
*/
@@ -122,6 +124,9 @@ int vfio_get_default_container_fd(void);
const struct vfio_iommu_type *
vfio_set_iommu_type(int vfio_container_fd);
+int
+vfio_get_iommu_type(void);
+
/* check if we have any supported extensions */
int
vfio_has_supported_extensions(int vfio_container_fd);
@@ -133,6 +138,7 @@ int vfio_mp_sync_setup(void);
#define SOCKET_REQ_CONTAINER 0x100
#define SOCKET_REQ_GROUP 0x200
#define SOCKET_REQ_DEFAULT_CONTAINER 0x400
+#define SOCKET_REQ_IOMMU_TYPE 0x800
#define SOCKET_OK 0x0
#define SOCKET_NO_FD 0x1
#define SOCKET_ERR 0xFF
@@ -140,7 +146,11 @@ int vfio_mp_sync_setup(void);
struct vfio_mp_param {
int req;
int result;
- int group_num;
+ RTE_STD_C11
+ union {
+ int group_num;
+ int iommu_type_id;
+ };
};
#endif /* VFIO_PRESENT */
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index a1e8c834..2a47f29d 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -77,6 +77,22 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
reply.fds[0] = fd;
}
break;
+ case SOCKET_REQ_IOMMU_TYPE:
+ {
+ int iommu_type_id;
+
+ r->req = SOCKET_REQ_IOMMU_TYPE;
+
+ iommu_type_id = vfio_get_iommu_type();
+
+ if (iommu_type_id < 0)
+ r->result = SOCKET_ERR;
+ else {
+ r->iommu_type_id = iommu_type_id;
+ r->result = SOCKET_OK;
+ }
+ break;
+ }
default:
RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
return -1;