diff options
author | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2019-04-15 14:36:48 +0200 |
---|---|---|
committer | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2019-04-16 08:38:15 +0200 |
commit | 0b6b37f6a2ee1764e8912fe9f57dd4ed2baadecd (patch) | |
tree | e9c4dd16e978e7b5c4f59efb8a63fe110786592b /lib/librte_eal/linuxapp | |
parent | ba7d9829e24a32d31b31f5816e8b9a3a8799ba68 (diff) |
New upstream version 18.11.1
Change-Id: I2394f61ba94cc575bf2c55186f14e5d6fba9eec7
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
Diffstat (limited to 'lib/librte_eal/linuxapp')
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal.c | 150 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_memalloc.c | 50 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_memory.c | 2 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_vfio.c | 91 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_vfio.h | 12 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c | 16 |
6 files changed, 293 insertions, 28 deletions
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 361744d4..30138b63 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -13,7 +13,9 @@ #include <syslog.h> #include <getopt.h> #include <sys/file.h> +#include <dirent.h> #include <fcntl.h> +#include <fnmatch.h> #include <stddef.h> #include <errno.h> #include <limits.h> @@ -123,7 +125,7 @@ eal_create_runtime_dir(void) /* create prefix-specific subdirectory under DPDK runtime dir */ ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s", - tmp, internal_config.hugefile_prefix); + tmp, eal_get_hugefile_prefix()); if (ret < 0 || ret == sizeof(runtime_dir)) { RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n"); return -1; @@ -149,6 +151,91 @@ eal_create_runtime_dir(void) return 0; } +int +eal_clean_runtime_dir(void) +{ + DIR *dir; + struct dirent *dirent; + int dir_fd, fd, lck_result; + static const char * const filters[] = { + "fbarray_*", + "mp_socket_*" + }; + + /* open directory */ + dir = opendir(runtime_dir); + if (!dir) { + RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n", + runtime_dir); + goto error; + } + dir_fd = dirfd(dir); + + /* lock the directory before doing anything, to avoid races */ + if (flock(dir_fd, LOCK_EX) < 0) { + RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n", + runtime_dir); + goto error; + } + + dirent = readdir(dir); + if (!dirent) { + RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n", + runtime_dir); + goto error; + } + + while (dirent != NULL) { + unsigned int f_idx; + bool skip = true; + + /* skip files that don't match the patterns */ + for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) { + const char *filter = filters[f_idx]; + + if (fnmatch(filter, dirent->d_name, 0) == 0) { + skip = false; + break; + } + } + if (skip) { + dirent = readdir(dir); + continue; + } + + /* try and lock the file */ + fd = openat(dir_fd, dirent->d_name, O_RDONLY); + + /* skip to next file */ + if (fd == -1) { + dirent = readdir(dir); + continue; + } + + /* non-blocking lock */ + lck_result = flock(fd, LOCK_EX | LOCK_NB); + + /* if lock succeeds, remove the file */ + if (lck_result != -1) + unlinkat(dir_fd, dirent->d_name, 0); + close(fd); + dirent = readdir(dir); + } + + /* closedir closes dir_fd and drops the lock */ + closedir(dir); + return 0; + +error: + if (dir) + closedir(dir); + + RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n", + strerror(errno)); + + return -1; +} + const char * rte_eal_get_runtime_dir(void) { @@ -494,10 +581,6 @@ eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) socket_arg[i] = val; } - /* check if we have a positive amount of total memory */ - if (total_mem == 0) - return -1; - return 0; } @@ -639,13 +722,31 @@ eal_parse_args(int argc, char **argv) exit(EXIT_SUCCESS); case OPT_HUGE_DIR_NUM: - internal_config.hugepage_dir = strdup(optarg); + { + char *hdir = strdup(optarg); + if (hdir == NULL) + RTE_LOG(ERR, EAL, "Could not store hugepage directory\n"); + else { + /* free old hugepage dir */ + if (internal_config.hugepage_dir != NULL) + free(internal_config.hugepage_dir); + internal_config.hugepage_dir = hdir; + } break; - + } case OPT_FILE_PREFIX_NUM: - internal_config.hugefile_prefix = strdup(optarg); + { + char *prefix = strdup(optarg); + if (prefix == NULL) + RTE_LOG(ERR, EAL, "Could not store file prefix\n"); + else { + /* free old prefix */ + if (internal_config.hugefile_prefix != NULL) + free(internal_config.hugefile_prefix); + internal_config.hugefile_prefix = prefix; + } break; - + } case OPT_SOCKET_MEM_NUM: if (eal_parse_socket_arg(optarg, internal_config.socket_mem) < 0) { @@ -695,10 +796,21 @@ eal_parse_args(int argc, char **argv) break; case OPT_MBUF_POOL_OPS_NAME_NUM: - internal_config.user_mbuf_pool_ops_name = - strdup(optarg); + { + char *ops_name = strdup(optarg); + if (ops_name == NULL) + RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n"); + else { + /* free old ops name */ + if (internal_config.user_mbuf_pool_ops_name != + NULL) + free(internal_config.user_mbuf_pool_ops_name); + + internal_config.user_mbuf_pool_ops_name = + ops_name; + } break; - + } default: if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { RTE_LOG(ERR, EAL, "Option %c is not supported " @@ -1096,6 +1208,18 @@ rte_eal_init(int argc, char **argv) return -1; } + /* + * Clean up unused files in runtime directory. We do this at the end of + * init and not at the beginning because we want to clean stuff up + * whether we are primary or secondary process, but we cannot remove + * primary process' files because secondary should be able to run even + * if primary process is dead. + */ + if (eal_clean_runtime_dir() < 0) { + rte_eal_init_alert("Cannot clear runtime directory\n"); + return -1; + } + rte_eal_mcfg_complete(); /* Call each registered callback, if enabled */ @@ -1130,6 +1254,8 @@ rte_eal_cleanup(void) if (rte_eal_process_type() == RTE_PROC_PRIMARY) rte_memseg_walk(mark_freeable, NULL); rte_service_finalize(); + rte_mp_channel_cleanup(); + eal_cleanup_config(&internal_config); return 0; } diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c index 78493956..f63d9ca6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c +++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c @@ -23,6 +23,10 @@ #include <sys/time.h> #include <signal.h> #include <setjmp.h> +#ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */ +#include <linux/memfd.h> +#define MEMFD_SUPPORTED +#endif #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES #include <numa.h> #include <numaif.h> @@ -53,8 +57,8 @@ const int anonymous_hugepages_supported = #endif /* - * we don't actually care if memfd itself is supported - we only need to check - * if memfd supports hugetlbfs, as that already implies memfd support. + * we've already checked memfd support at compile-time, but we also need to + * check if we can create hugepage files with memfd. * * also, this is not a constant, because while we may be *compiled* with memfd * hugetlbfs support, we might not be *running* on a system that supports memfd @@ -63,10 +67,11 @@ const int anonymous_hugepages_supported = */ static int memfd_create_supported = #ifdef MFD_HUGETLB -#define MEMFD_SUPPORTED 1; +#define RTE_MFD_HUGETLB MFD_HUGETLB #else 0; +#define RTE_MFD_HUGETLB 4U #endif /* @@ -171,7 +176,7 @@ prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id) RTE_LOG(ERR, EAL, "Failed to get current mempolicy: %s. " "Assuming MPOL_DEFAULT.\n", strerror(errno)); - oldpolicy = MPOL_DEFAULT; + *oldpolicy = MPOL_DEFAULT; } RTE_LOG(DEBUG, EAL, "Setting policy MPOL_PREFERRED for socket %d\n", @@ -338,12 +343,12 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused, int fd; char segname[250]; /* as per manpage, limit is 249 bytes plus null */ + int flags = RTE_MFD_HUGETLB | pagesz_flags(hi->hugepage_sz); + if (internal_config.single_file_segments) { fd = fd_list[list_idx].memseg_list_fd; if (fd < 0) { - int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz); - snprintf(segname, sizeof(segname), "seg_%i", list_idx); fd = memfd_create(segname, flags); if (fd < 0) { @@ -357,8 +362,6 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused, fd = fd_list[list_idx].fds[seg_idx]; if (fd < 0) { - int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz); - snprintf(segname, sizeof(segname), "seg_%i-%i", list_idx, seg_idx); fd = memfd_create(segname, flags); @@ -633,13 +636,13 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id, int mmap_flags; if (internal_config.in_memory && !memfd_create_supported) { - int pagesz_flag, flags; + const int in_memory_flags = MAP_HUGETLB | MAP_FIXED | + MAP_PRIVATE | MAP_ANONYMOUS; + int pagesz_flag; pagesz_flag = pagesz_flags(alloc_sz); - flags = pagesz_flag | MAP_HUGETLB | MAP_FIXED | - MAP_PRIVATE | MAP_ANONYMOUS; fd = -1; - mmap_flags = flags; + mmap_flags = in_memory_flags | pagesz_flag; /* single-file segments codepath will never be active * here because in-memory mode is incompatible with the @@ -1542,6 +1545,17 @@ int eal_memalloc_get_seg_fd(int list_idx, int seg_idx) { int fd; + + if (internal_config.in_memory || internal_config.no_hugetlbfs) { +#ifndef MEMFD_SUPPORTED + /* in in-memory or no-huge mode, we rely on memfd support */ + return -ENOTSUP; +#endif + /* memfd supported, but hugetlbfs memfd may not be */ + if (!internal_config.no_hugetlbfs && !memfd_create_supported) + return -ENOTSUP; + } + if (internal_config.single_file_segments) { fd = fd_list[list_idx].memseg_list_fd; } else if (fd_list[list_idx].len == 0) { @@ -1565,7 +1579,7 @@ test_memfd_create(void) int pagesz_flag = pagesz_flags(pagesz); int flags; - flags = pagesz_flag | MFD_HUGETLB; + flags = pagesz_flag | RTE_MFD_HUGETLB; int fd = memfd_create("test", flags); if (fd < 0) { /* we failed - let memalloc know this isn't working */ @@ -1589,6 +1603,16 @@ eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + if (internal_config.in_memory || internal_config.no_hugetlbfs) { +#ifndef MEMFD_SUPPORTED + /* in in-memory or no-huge mode, we rely on memfd support */ + return -ENOTSUP; +#endif + /* memfd supported, but hugetlbfs memfd may not be */ + if (!internal_config.no_hugetlbfs && !memfd_create_supported) + return -ENOTSUP; + } + /* fd_list not initialized? */ if (fd_list[list_idx].len == 0) return -ENODEV; diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 32feb415..e05da74c 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -434,7 +434,7 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) } snprintf(hugedir_str, sizeof(hugedir_str), - "%s/%s", hpi->hugedir, internal_config.hugefile_prefix); + "%s/%s", hpi->hugedir, eal_get_hugefile_prefix()); /* parse numa map */ while (fgets(buf, sizeof(buf), f) != NULL) { diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 0516b159..c821e838 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -549,6 +549,65 @@ next: } } +static int +vfio_sync_default_container(void) +{ + struct rte_mp_msg mp_req, *mp_rep; + struct rte_mp_reply mp_reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; + int iommu_type_id; + unsigned int i; + + /* cannot be called from primary */ + if (rte_eal_process_type() != RTE_PROC_SECONDARY) + return -1; + + /* default container fd should have been opened in rte_vfio_enable() */ + if (!default_vfio_cfg->vfio_enabled || + default_vfio_cfg->vfio_container_fd < 0) { + RTE_LOG(ERR, EAL, "VFIO support is not initialized\n"); + return -1; + } + + /* find default container's IOMMU type */ + p->req = SOCKET_REQ_IOMMU_TYPE; + strcpy(mp_req.name, EAL_VFIO_MP); + mp_req.len_param = sizeof(*p); + mp_req.num_fds = 0; + + iommu_type_id = -1; + if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && + mp_reply.nb_received == 1) { + mp_rep = &mp_reply.msgs[0]; + p = (struct vfio_mp_param *)mp_rep->param; + if (p->result == SOCKET_OK) + iommu_type_id = p->iommu_type_id; + free(mp_reply.msgs); + } + if (iommu_type_id < 0) { + RTE_LOG(ERR, EAL, "Could not get IOMMU type for default container\n"); + return -1; + } + + /* we now have an fd for default container, as well as its IOMMU type. + * now, set up default VFIO container config to match. + */ + for (i = 0; i < RTE_DIM(iommu_types); i++) { + const struct vfio_iommu_type *t = &iommu_types[i]; + if (t->type_id != iommu_type_id) + continue; + + /* we found our IOMMU type */ + default_vfio_cfg->vfio_iommu_type = t; + + return 0; + } + RTE_LOG(ERR, EAL, "Could not find IOMMU type id (%i)\n", + iommu_type_id); + return -1; +} + int rte_vfio_clear_group(int vfio_group_fd) { @@ -745,6 +804,26 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, else RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n"); } + } else if (rte_eal_process_type() != RTE_PROC_PRIMARY && + vfio_cfg == default_vfio_cfg && + vfio_cfg->vfio_iommu_type == NULL) { + /* if we're not a primary process, we do not set up the VFIO + * container because it's already been set up by the primary + * process. instead, we simply ask the primary about VFIO type + * we are using, and set the VFIO config up appropriately. + */ + ret = vfio_sync_default_container(); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Could not sync default VFIO container\n"); + close(vfio_group_fd); + rte_vfio_clear_group(vfio_group_fd); + return -1; + } + /* we have successfully initialized VFIO, notify user */ + const struct vfio_iommu_type *t = + default_vfio_cfg->vfio_iommu_type; + RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n", + t->type_id, t->name); } /* get a file descriptor for the device */ @@ -857,7 +936,8 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, /* if there are no active device groups, unregister the callback to * avoid spurious attempts to map/unmap memory from VFIO. */ - if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0) + if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0 && + rte_eal_process_type() != RTE_PROC_SECONDARY) rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME, NULL); @@ -977,6 +1057,15 @@ vfio_get_default_container_fd(void) return -1; } +int +vfio_get_iommu_type(void) +{ + if (default_vfio_cfg->vfio_iommu_type == NULL) + return -1; + + return default_vfio_cfg->vfio_iommu_type->type_id; +} + const struct vfio_iommu_type * vfio_set_iommu_type(int vfio_container_fd) { diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 63ae115c..cb2d35fb 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -5,6 +5,8 @@ #ifndef EAL_VFIO_H_ #define EAL_VFIO_H_ +#include <rte_common.h> + /* * determine if VFIO is present on the system */ @@ -122,6 +124,9 @@ int vfio_get_default_container_fd(void); const struct vfio_iommu_type * vfio_set_iommu_type(int vfio_container_fd); +int +vfio_get_iommu_type(void); + /* check if we have any supported extensions */ int vfio_has_supported_extensions(int vfio_container_fd); @@ -133,6 +138,7 @@ int vfio_mp_sync_setup(void); #define SOCKET_REQ_CONTAINER 0x100 #define SOCKET_REQ_GROUP 0x200 #define SOCKET_REQ_DEFAULT_CONTAINER 0x400 +#define SOCKET_REQ_IOMMU_TYPE 0x800 #define SOCKET_OK 0x0 #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF @@ -140,7 +146,11 @@ int vfio_mp_sync_setup(void); struct vfio_mp_param { int req; int result; - int group_num; + RTE_STD_C11 + union { + int group_num; + int iommu_type_id; + }; }; #endif /* VFIO_PRESENT */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c index a1e8c834..2a47f29d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c @@ -77,6 +77,22 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer) reply.fds[0] = fd; } break; + case SOCKET_REQ_IOMMU_TYPE: + { + int iommu_type_id; + + r->req = SOCKET_REQ_IOMMU_TYPE; + + iommu_type_id = vfio_get_iommu_type(); + + if (iommu_type_id < 0) + r->result = SOCKET_ERR; + else { + r->iommu_type_id = iommu_type_id; + r->result = SOCKET_OK; + } + break; + } default: RTE_LOG(ERR, EAL, "vfio received invalid message!\n"); return -1; |