diff options
author | Luca Boccassi <luca.boccassi@gmail.com> | 2018-08-14 18:52:30 +0100 |
---|---|---|
committer | Luca Boccassi <luca.boccassi@gmail.com> | 2018-08-14 18:53:17 +0100 |
commit | b63264c8342e6a1b6971c79550d2af2024b6a4de (patch) | |
tree | 83114aac64286fe616506c0b3dfaec2ab86ef835 /lib/librte_eal/bsdapp/eal | |
parent | ca33590b6af032bff57d9cc70455660466a654b2 (diff) |
New upstream version 18.08upstream/18.08
Change-Id: I32fdf5e5016556d9c0a6d88ddaf1fc468961790a
Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'lib/librte_eal/bsdapp/eal')
-rw-r--r-- | lib/librte_eal/bsdapp/eal/Makefile | 11 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal.c | 290 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal_alarm.c | 299 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal_alarm_private.h | 19 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal_cpuflags.c | 21 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal_dev.c | 21 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal_hugepage_info.c | 69 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal_interrupts.c | 464 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal_memalloc.c | 54 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal_memory.c | 471 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal_thread.c | 2 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/meson.build | 5 |
12 files changed, 1558 insertions, 168 deletions
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index dd455e67..d27da3d1 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -18,21 +18,25 @@ CFLAGS += $(WERROR_FLAGS) -O3 LDLIBS += -lexecinfo LDLIBS += -lpthread LDLIBS += -lgcc_s +LDLIBS += -lrte_kvargs EXPORT_MAP := ../../rte_eal_version.map -LIBABIVER := 6 +LIBABIVER := 8 # specific to bsdapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_cpuflags.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memalloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_interrupts.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_alarm.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_dev.c # from common dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_lcore.c @@ -40,6 +44,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memalloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c @@ -48,14 +53,18 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hypervisor.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_class.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_bus.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_uuid.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_mp.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 4eafcb5a..d7ae9d68 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -18,6 +18,7 @@ #include <limits.h> #include <sys/mman.h> #include <sys/queue.h> +#include <sys/stat.h> #include <rte_compat.h> #include <rte_common.h> @@ -40,6 +41,7 @@ #include <rte_dev.h> #include <rte_devargs.h> #include <rte_version.h> +#include <rte_vfio.h> #include <rte_atomic.h> #include <malloc_heap.h> @@ -64,8 +66,8 @@ static int mem_cfg_fd = -1; static struct flock wr_lock = { .l_type = F_WRLCK, .l_whence = SEEK_SET, - .l_start = offsetof(struct rte_mem_config, memseg), - .l_len = sizeof(early_mem_config.memseg), + .l_start = offsetof(struct rte_mem_config, memsegs), + .l_len = sizeof(early_mem_config.memsegs), }; /* Address of global and public configuration */ @@ -82,20 +84,72 @@ struct internal_config internal_config; /* used by rte_rdtsc() */ int rte_cycles_vmware_tsc_map; -/* Return user provided mbuf pool ops name */ -const char * __rte_experimental -rte_eal_mbuf_user_pool_ops(void) -{ - return internal_config.user_mbuf_pool_ops_name; +/* platform-specific runtime dir */ +static char runtime_dir[PATH_MAX]; + +static const char *default_runtime_dir = "/var/run"; + +int +eal_create_runtime_dir(void) +{ + const char *directory = default_runtime_dir; + const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR"); + const char *fallback = "/tmp"; + char tmp[PATH_MAX]; + int ret; + + if (getuid() != 0) { + /* try XDG path first, fall back to /tmp */ + if (xdg_runtime_dir != NULL) + directory = xdg_runtime_dir; + else + directory = fallback; + } + /* create DPDK subdirectory under runtime dir */ + ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory); + if (ret < 0 || ret == sizeof(tmp)) { + RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n"); + return -1; + } + + /* create prefix-specific subdirectory under DPDK runtime dir */ + ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s", + tmp, internal_config.hugefile_prefix); + if (ret < 0 || ret == sizeof(runtime_dir)) { + RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n"); + return -1; + } + + /* create the path if it doesn't exist. no "mkdir -p" here, so do it + * step by step. + */ + ret = mkdir(tmp, 0700); + if (ret < 0 && errno != EEXIST) { + RTE_LOG(ERR, EAL, "Error creating '%s': %s\n", + tmp, strerror(errno)); + return -1; + } + + ret = mkdir(runtime_dir, 0700); + if (ret < 0 && errno != EEXIST) { + RTE_LOG(ERR, EAL, "Error creating '%s': %s\n", + runtime_dir, strerror(errno)); + return -1; + } + + return 0; } -/* Return mbuf pool ops name */ const char * -rte_eal_mbuf_default_mempool_ops(void) +eal_get_runtime_dir(void) { - if (internal_config.user_mbuf_pool_ops_name == NULL) - return RTE_MBUF_DEFAULT_MEMPOOL_OPS; + return runtime_dir; +} +/* Return user provided mbuf pool ops name */ +const char * +rte_eal_mbuf_user_pool_ops(void) +{ return internal_config.user_mbuf_pool_ops_name; } @@ -222,12 +276,17 @@ eal_proc_type_detect(void) enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; const char *pathname = eal_runtime_config_path(); - /* if we can open the file but not get a write-lock we are a secondary - * process. NOTE: if we get a file handle back, we keep that open - * and don't close it to prevent a race condition between multiple opens */ - if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && - (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) - ptype = RTE_PROC_SECONDARY; + /* if there no shared config, there can be no secondary processes */ + if (!internal_config.no_shconf) { + /* if we can open the file but not get a write-lock we are a + * secondary process. NOTE: if we get a file handle back, we + * keep that open and don't close it to prevent a race condition + * between multiple opens. + */ + if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && + (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) + ptype = RTE_PROC_SECONDARY; + } RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); @@ -289,7 +348,7 @@ eal_get_hugepage_mem_size(void) for (i = 0; i < internal_config.num_hugepage_sizes; i++) { struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) { + if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) { for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { size += hpi->hugepage_sz * hpi->num_pages[j]; } @@ -379,7 +438,8 @@ eal_parse_args(int argc, char **argv) switch (opt) { case OPT_MBUF_POOL_OPS_NAME_NUM: - internal_config.user_mbuf_pool_ops_name = optarg; + internal_config.user_mbuf_pool_ops_name = + strdup(optarg); break; case 'h': eal_usage(prgname); @@ -403,6 +463,14 @@ eal_parse_args(int argc, char **argv) } } + /* create runtime data directory */ + if (internal_config.no_shconf == 0 && + eal_create_runtime_dir() < 0) { + RTE_LOG(ERR, EAL, "Cannot create runtime directory\n"); + ret = -1; + goto out; + } + if (eal_adjust_config(&internal_config) != 0) { ret = -1; goto out; @@ -429,25 +497,29 @@ out: return ret; } +static int +check_socket(const struct rte_memseg_list *msl, void *arg) +{ + int *socket_id = arg; + + if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0) + return 1; + + return 0; +} + static void eal_check_mem_on_local_socket(void) { - const struct rte_memseg *ms; - int i, socket_id; + int socket_id; socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); - ms = rte_eal_get_physmem_layout(); - - for (i = 0; i < RTE_MAX_MEMSEG; i++) - if (ms[i].socket_id == socket_id && - ms[i].len > 0) - return; - - RTE_LOG(WARNING, EAL, "WARNING: Master core has no " - "memory on local socket!\n"); + if (rte_memseg_list_walk(check_socket, &socket_id) == 0) + RTE_LOG(WARNING, EAL, "WARNING: Master core has no memory on local socket!\n"); } + static int sync_func(__attribute__((unused)) void *arg) { @@ -531,6 +603,9 @@ rte_eal_init(int argc, char **argv) return -1; } + /* FreeBSD always uses legacy memory model */ + internal_config.legacy_mem = true; + if (eal_plugins_init() < 0) { rte_eal_init_alert("Cannot init plugins\n"); rte_errno = EINVAL; @@ -544,6 +619,24 @@ rte_eal_init(int argc, char **argv) return -1; } + rte_config_init(); + + if (rte_eal_intr_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + return -1; + } + + /* Put mp channel init before bus scan so that we can init the vdev + * bus through mp channel in the secondary process before the bus scan. + */ + if (rte_mp_channel_init() < 0) { + rte_eal_init_alert("failed to init mp channel\n"); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_errno = EFAULT; + return -1; + } + } + if (rte_bus_scan()) { rte_eal_init_alert("Cannot scan the buses for devices\n"); rte_errno = ENODEV; @@ -554,13 +647,17 @@ rte_eal_init(int argc, char **argv) /* autodetect the iova mapping mode (default is iova_pa) */ rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class(); - if (internal_config.no_hugetlbfs == 0 && - internal_config.process_type != RTE_PROC_SECONDARY && - eal_hugepage_info_init() < 0) { - rte_eal_init_alert("Cannot get hugepage information."); - rte_errno = EACCES; - rte_atomic32_clear(&run_once); - return -1; + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? + eal_hugepage_info_init() : + eal_hugepage_info_read(); + if (ret < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { @@ -583,14 +680,14 @@ rte_eal_init(int argc, char **argv) rte_srand(rte_rdtsc()); - rte_config_init(); - - if (rte_mp_channel_init() < 0) { - rte_eal_init_alert("failed to init mp channel\n"); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) { - rte_errno = EFAULT; - return -1; - } + /* in secondary processes, memory init may allocate additional fbarrays + * not present in primary processes, so to avoid any potential issues, + * initialize memzones first. + */ + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone\n"); + rte_errno = ENODEV; + return -1; } if (rte_eal_memory_init() < 0) { @@ -599,8 +696,8 @@ rte_eal_init(int argc, char **argv) return -1; } - if (rte_eal_memzone_init() < 0) { - rte_eal_init_alert("Cannot init memzone\n"); + if (rte_eal_malloc_heap_init() < 0) { + rte_eal_init_alert("Cannot init malloc heap\n"); rte_errno = ENODEV; return -1; } @@ -617,11 +714,6 @@ rte_eal_init(int argc, char **argv) return -1; } - if (rte_eal_intr_init() < 0) { - rte_eal_init_alert("Cannot init interrupt-handling thread\n"); - return -1; - } - if (rte_eal_timer_init() < 0) { rte_eal_init_alert("Cannot init HPET or TSC timers\n"); rte_errno = ENOTSUP; @@ -632,7 +724,7 @@ rte_eal_init(int argc, char **argv) eal_thread_init_master(rte_config.master_lcore); - ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset)); RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n", rte_config.master_lcore, thread_id, cpuset, @@ -658,7 +750,7 @@ rte_eal_init(int argc, char **argv) rte_panic("Cannot create thread\n"); /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + snprintf(thread_name, sizeof(thread_name), "lcore-slave-%d", i); rte_thread_setname(lcore_config[i].thread_id, thread_name); } @@ -735,18 +827,6 @@ rte_eal_vfio_intr_mode(void) return RTE_INTR_MODE_NONE; } -/* dummy forward declaration. */ -struct vfio_device_info; - -/* dummy prototypes. */ -int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, - int *vfio_dev_fd, struct vfio_device_info *device_info); -int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); -int rte_vfio_enable(const char *modname); -int rte_vfio_is_enabled(const char *modname); -int rte_vfio_noiommu_is_enabled(void); -int rte_vfio_clear_group(int vfio_group_fd); - int rte_vfio_setup_device(__rte_unused const char *sysfs_base, __rte_unused const char *dev_addr, __rte_unused int *vfio_dev_fd, @@ -781,3 +861,81 @@ int rte_vfio_clear_group(__rte_unused int vfio_group_fd) { return 0; } + +int +rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int +rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int +rte_vfio_get_group_num(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int *iommu_group_num) +{ + return -1; +} + +int +rte_vfio_get_container_fd(void) +{ + return -1; +} + +int +rte_vfio_get_group_fd(__rte_unused int iommu_group_num) +{ + return -1; +} + +int +rte_vfio_container_create(void) +{ + return -1; +} + +int +rte_vfio_container_destroy(__rte_unused int container_fd) +{ + return -1; +} + +int +rte_vfio_container_group_bind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int +rte_vfio_container_group_unbind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int +rte_vfio_container_dma_map(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int +rte_vfio_container_dma_unmap(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c index eb3913c9..51ea4b8c 100644 --- a/lib/librte_eal/bsdapp/eal/eal_alarm.c +++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c @@ -1,31 +1,314 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> #include <stdlib.h> +#include <string.h> +#include <time.h> #include <errno.h> #include <rte_alarm.h> +#include <rte_cycles.h> #include <rte_common.h> +#include <rte_errno.h> +#include <rte_interrupts.h> +#include <rte_spinlock.h> + #include "eal_private.h" +#include "eal_alarm_private.h" + +#define NS_PER_US 1000 + +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW +#else +#define CLOCK_TYPE_ID CLOCK_MONOTONIC +#endif + +struct alarm_entry { + LIST_ENTRY(alarm_entry) next; + struct rte_intr_handle handle; + struct timespec time; + rte_eal_alarm_callback cb_fn; + void *cb_arg; + volatile uint8_t executing; + volatile pthread_t executing_id; +}; + +static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER(); +static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER; + +static struct rte_intr_handle intr_handle = {.fd = -1 }; +static void eal_alarm_callback(void *arg); int rte_eal_alarm_init(void) { + intr_handle.type = RTE_INTR_HANDLE_ALARM; + + /* on FreeBSD, timers don't use fd's, and their identifiers are stored + * in separate namespace from fd's, so using any value is OK. however, + * EAL interrupts handler expects fd's to be unique, so use an actual fd + * to guarantee unique timer identifier. + */ + intr_handle.fd = open("/dev/zero", O_RDONLY); + + return 0; +} + +static inline int +timespec_cmp(const struct timespec *now, const struct timespec *at) +{ + if (now->tv_sec < at->tv_sec) + return -1; + if (now->tv_sec > at->tv_sec) + return 1; + if (now->tv_nsec < at->tv_nsec) + return -1; + if (now->tv_nsec > at->tv_nsec) + return 1; return 0; } +static inline uint64_t +diff_ns(struct timespec *now, struct timespec *at) +{ + uint64_t now_ns, at_ns; + + if (timespec_cmp(now, at) >= 0) + return 0; + + now_ns = now->tv_sec * NS_PER_S + now->tv_nsec; + at_ns = at->tv_sec * NS_PER_S + at->tv_nsec; + + return at_ns - now_ns; +} int -rte_eal_alarm_set(uint64_t us __rte_unused, - rte_eal_alarm_callback cb_fn __rte_unused, - void *cb_arg __rte_unused) +eal_alarm_get_timeout_ns(uint64_t *val) { - return -ENOTSUP; + struct alarm_entry *ap; + struct timespec now; + + if (clock_gettime(CLOCK_TYPE_ID, &now) < 0) + return -1; + + if (LIST_EMPTY(&alarm_list)) + return -1; + + ap = LIST_FIRST(&alarm_list); + + *val = diff_ns(&now, &ap->time); + + return 0; +} + +static int +unregister_current_callback(void) +{ + struct alarm_entry *ap; + int ret = 0; + + if (!LIST_EMPTY(&alarm_list)) { + ap = LIST_FIRST(&alarm_list); + + do { + ret = rte_intr_callback_unregister(&intr_handle, + eal_alarm_callback, &ap->time); + } while (ret == -EAGAIN); + } + + return ret; } +static int +register_first_callback(void) +{ + struct alarm_entry *ap; + int ret = 0; + + if (!LIST_EMPTY(&alarm_list)) { + ap = LIST_FIRST(&alarm_list); + + /* register a new callback */ + ret = rte_intr_callback_register(&intr_handle, + eal_alarm_callback, &ap->time); + } + return ret; +} + +static void +eal_alarm_callback(void *arg __rte_unused) +{ + struct timespec now; + struct alarm_entry *ap; + + rte_spinlock_lock(&alarm_list_lk); + ap = LIST_FIRST(&alarm_list); + + if (clock_gettime(CLOCK_TYPE_ID, &now) < 0) + return; + + while (ap != NULL && timespec_cmp(&now, &ap->time) >= 0) { + ap->executing = 1; + ap->executing_id = pthread_self(); + rte_spinlock_unlock(&alarm_list_lk); + + ap->cb_fn(ap->cb_arg); + + rte_spinlock_lock(&alarm_list_lk); + + LIST_REMOVE(ap, next); + free(ap); + + ap = LIST_FIRST(&alarm_list); + } + + /* timer has been deleted from the kqueue, so recreate it if needed */ + register_first_callback(); + + rte_spinlock_unlock(&alarm_list_lk); +} + + int -rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused, - void *cb_arg __rte_unused) +rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg) { - return -ENOTSUP; + struct alarm_entry *ap, *new_alarm; + struct timespec now; + uint64_t ns; + int ret = 0; + + /* check parameters, also ensure us won't cause a uint64_t overflow */ + if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL) + return -EINVAL; + + new_alarm = calloc(1, sizeof(*new_alarm)); + if (new_alarm == NULL) + return -ENOMEM; + + /* use current time to calculate absolute time of alarm */ + clock_gettime(CLOCK_TYPE_ID, &now); + + ns = us * NS_PER_US; + + new_alarm->cb_fn = cb_fn; + new_alarm->cb_arg = cb_arg; + new_alarm->time.tv_nsec = (now.tv_nsec + ns) % NS_PER_S; + new_alarm->time.tv_sec = now.tv_sec + ((now.tv_nsec + ns) / NS_PER_S); + + rte_spinlock_lock(&alarm_list_lk); + + if (LIST_EMPTY(&alarm_list)) + LIST_INSERT_HEAD(&alarm_list, new_alarm, next); + else { + LIST_FOREACH(ap, &alarm_list, next) { + if (timespec_cmp(&new_alarm->time, &ap->time) < 0) { + LIST_INSERT_BEFORE(ap, new_alarm, next); + break; + } + if (LIST_NEXT(ap, next) == NULL) { + LIST_INSERT_AFTER(ap, new_alarm, next); + break; + } + } + } + + /* re-register first callback just in case */ + register_first_callback(); + + rte_spinlock_unlock(&alarm_list_lk); + + return ret; +} + +int +rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) +{ + struct alarm_entry *ap, *ap_prev; + int count = 0; + int err = 0; + int executing; + + if (!cb_fn) { + rte_errno = EINVAL; + return -1; + } + + do { + executing = 0; + rte_spinlock_lock(&alarm_list_lk); + /* remove any matches at the start of the list */ + while (1) { + ap = LIST_FIRST(&alarm_list); + if (ap == NULL) + break; + if (cb_fn != ap->cb_fn) + break; + if (cb_arg != ap->cb_arg && cb_arg != (void *) -1) + break; + if (ap->executing == 0) { + LIST_REMOVE(ap, next); + free(ap); + count++; + } else { + /* If calling from other context, mark that + * alarm is executing so loop can spin till it + * finish. Otherwise we are trying to cancel + * ourselves - mark it by EINPROGRESS. + */ + if (pthread_equal(ap->executing_id, + pthread_self()) == 0) + executing++; + else + err = EINPROGRESS; + + break; + } + } + ap_prev = ap; + + /* now go through list, removing entries not at start */ + LIST_FOREACH(ap, &alarm_list, next) { + /* this won't be true first time through */ + if (cb_fn == ap->cb_fn && + (cb_arg == (void *)-1 || + cb_arg == ap->cb_arg)) { + if (ap->executing == 0) { + LIST_REMOVE(ap, next); + free(ap); + count++; + ap = ap_prev; + } else if (pthread_equal(ap->executing_id, + pthread_self()) == 0) { + executing++; + } else { + err = EINPROGRESS; + } + } + ap_prev = ap; + } + rte_spinlock_unlock(&alarm_list_lk); + } while (executing != 0); + + if (count == 0 && err == 0) + rte_errno = ENOENT; + else if (err) + rte_errno = err; + + rte_spinlock_lock(&alarm_list_lk); + + /* unregister if no alarms left, otherwise re-register first */ + if (LIST_EMPTY(&alarm_list)) + unregister_current_callback(); + else + register_first_callback(); + + rte_spinlock_unlock(&alarm_list_lk); + + return count; } diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm_private.h b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h new file mode 100644 index 00000000..65c71151 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef EAL_ALARM_PRIVATE_H +#define EAL_ALARM_PRIVATE_H + +#include <inttypes.h> + +/* + * FreeBSD needs a back-channel communication mechanism between interrupt and + * alarm thread, because on FreeBSD, timer period is set up inside the interrupt + * API and not inside alarm API like on Linux. + */ + +int +eal_alarm_get_timeout_ns(uint64_t *val); + +#endif // EAL_ALARM_PRIVATE_H diff --git a/lib/librte_eal/bsdapp/eal/eal_cpuflags.c b/lib/librte_eal/bsdapp/eal/eal_cpuflags.c new file mode 100644 index 00000000..69b161ea --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_cpuflags.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 Mellanox Technologies, Ltd + */ + +#include <rte_common.h> +#include <rte_cpuflags.h> + +unsigned long +rte_cpu_getauxval(unsigned long type __rte_unused) +{ + /* not implemented */ + return 0; +} + +int +rte_cpu_strcmp_auxval(unsigned long type __rte_unused, + const char *str __rte_unused) +{ + /* not implemented */ + return -1; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_dev.c b/lib/librte_eal/bsdapp/eal/eal_dev.c new file mode 100644 index 00000000..1c6c51bd --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_dev.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <rte_log.h> +#include <rte_compat.h> +#include <rte_dev.h> + +int __rte_experimental +rte_dev_event_monitor_start(void) +{ + RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n"); + return -1; +} + +int __rte_experimental +rte_dev_event_monitor_stop(void) +{ + RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n"); + return -1; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c index be2dbf0e..1e8f5df2 100644 --- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c @@ -19,10 +19,10 @@ * Used in this file to store the hugepage file map on disk */ static void * -create_shared_memory(const char *filename, const size_t mem_size) +map_shared_memory(const char *filename, const size_t mem_size, int flags) { void *retval; - int fd = open(filename, O_CREAT | O_RDWR, 0666); + int fd = open(filename, flags, 0666); if (fd < 0) return NULL; if (ftruncate(fd, mem_size) < 0) { @@ -34,6 +34,18 @@ create_shared_memory(const char *filename, const size_t mem_size) return retval; } +static void * +open_shared_memory(const char *filename, const size_t mem_size) +{ + return map_shared_memory(filename, mem_size, O_RDWR); +} + +static void * +create_shared_memory(const char *filename, const size_t mem_size) +{ + return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT); +} + /* * No hugepage support on freebsd, but we dummy it, using contigmem driver */ @@ -46,13 +58,16 @@ eal_hugepage_info_init(void) /* re-use the linux "internal config" structure for our memory data */ struct hugepage_info *hpi = &internal_config.hugepage_info[0]; struct hugepage_info *tmp_hpi; + unsigned int i; + + internal_config.num_hugepage_sizes = 1; sysctl_size = sizeof(num_buffers); error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers, &sysctl_size, NULL, 0); if (error != 0) { - RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers"); + RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers\n"); return -1; } @@ -61,7 +76,7 @@ eal_hugepage_info_init(void) &sysctl_size, NULL, 0); if (error != 0) { - RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size"); + RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size\n"); return -1; } @@ -81,25 +96,61 @@ eal_hugepage_info_init(void) RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n", num_buffers, (int)(buffer_size>>10)); - internal_config.num_hugepage_sizes = 1; - hpi->hugedir = CONTIGMEM_DEV; + strlcpy(hpi->hugedir, CONTIGMEM_DEV, sizeof(hpi->hugedir)); hpi->hugepage_sz = buffer_size; hpi->num_pages[0] = num_buffers; hpi->lock_descriptor = fd; + /* for no shared files mode, do not create shared memory config */ + if (internal_config.no_shconf) + return 0; + tmp_hpi = create_shared_memory(eal_hugepage_info_path(), - sizeof(struct hugepage_info)); + sizeof(internal_config.hugepage_info)); if (tmp_hpi == NULL ) { RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); return -1; } - memcpy(tmp_hpi, hpi, sizeof(struct hugepage_info)); + memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info)); + + /* we've copied file descriptors along with everything else, but they + * will be invalid in secondary process, so overwrite them + */ + for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) { + struct hugepage_info *tmp = &tmp_hpi[i]; + tmp->lock_descriptor = -1; + } - if ( munmap(tmp_hpi, sizeof(struct hugepage_info)) < 0) { + if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) { RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); return -1; } return 0; } + +/* copy stuff from shared info into internal config */ +int +eal_hugepage_info_read(void) +{ + struct hugepage_info *hpi = &internal_config.hugepage_info[0]; + struct hugepage_info *tmp_hpi; + + internal_config.num_hugepage_sizes = 1; + + tmp_hpi = open_shared_memory(eal_hugepage_info_path(), + sizeof(internal_config.hugepage_info)); + if (tmp_hpi == NULL) { + RTE_LOG(ERR, EAL, "Failed to open shared memory!\n"); + return -1; + } + + memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info)); + + if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) { + RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); + return -1; + } + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c index 290d53ab..2feee2d5 100644 --- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c +++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c @@ -1,51 +1,479 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ +#include <string.h> +#include <sys/types.h> +#include <sys/event.h> +#include <sys/queue.h> +#include <unistd.h> + +#include <rte_errno.h> +#include <rte_lcore.h> +#include <rte_spinlock.h> #include <rte_common.h> #include <rte_interrupts.h> + #include "eal_private.h" +#include "eal_alarm_private.h" + +#define MAX_INTR_EVENTS 16 + +/** + * union buffer for reading on different devices + */ +union rte_intr_read_buffer { + char charbuf[16]; /* for others */ +}; + +TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback); +TAILQ_HEAD(rte_intr_source_list, rte_intr_source); + +struct rte_intr_callback { + TAILQ_ENTRY(rte_intr_callback) next; + rte_intr_callback_fn cb_fn; /**< callback address */ + void *cb_arg; /**< parameter for callback */ +}; + +struct rte_intr_source { + TAILQ_ENTRY(rte_intr_source) next; + struct rte_intr_handle intr_handle; /**< interrupt handle */ + struct rte_intr_cb_list callbacks; /**< user callbacks */ + uint32_t active; +}; + +/* global spinlock for interrupt data operation */ +static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER; + +/* interrupt sources list */ +static struct rte_intr_source_list intr_sources; + +/* interrupt handling thread */ +static pthread_t intr_thread; + +static volatile int kq = -1; + +static int +intr_source_to_kevent(const struct rte_intr_handle *ih, struct kevent *ke) +{ + /* alarm callbacks are special case */ + if (ih->type == RTE_INTR_HANDLE_ALARM) { + uint64_t timeout_ns; + + /* get soonest alarm timeout */ + if (eal_alarm_get_timeout_ns(&timeout_ns) < 0) + return -1; + + ke->filter = EVFILT_TIMER; + /* timers are one shot */ + ke->flags |= EV_ONESHOT; + ke->fflags = NOTE_NSECONDS; + ke->data = timeout_ns; + } else { + ke->filter = EVFILT_READ; + } + ke->ident = ih->fd; + + return 0; +} int rte_intr_callback_register(const struct rte_intr_handle *intr_handle, - rte_intr_callback_fn cb, - void *cb_arg) + rte_intr_callback_fn cb, void *cb_arg) { - RTE_SET_USED(intr_handle); - RTE_SET_USED(cb); - RTE_SET_USED(cb_arg); + struct rte_intr_callback *callback = NULL; + struct rte_intr_source *src = NULL; + int ret, add_event; - return -ENOTSUP; + /* first do parameter checking */ + if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) { + RTE_LOG(ERR, EAL, + "Registering with invalid input parameter\n"); + return -EINVAL; + } + if (kq < 0) { + RTE_LOG(ERR, EAL, "Kqueue is not active: %d\n", kq); + return -ENODEV; + } + + /* allocate a new interrupt callback entity */ + callback = calloc(1, sizeof(*callback)); + if (callback == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + return -ENOMEM; + } + callback->cb_fn = cb; + callback->cb_arg = cb_arg; + + rte_spinlock_lock(&intr_lock); + + /* check if there is at least one callback registered for the fd */ + TAILQ_FOREACH(src, &intr_sources, next) { + if (src->intr_handle.fd == intr_handle->fd) { + /* we had no interrupts for this */ + if (TAILQ_EMPTY(&src->callbacks)) + add_event = 1; + + TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + ret = 0; + break; + } + } + + /* no existing callbacks for this - add new source */ + if (src == NULL) { + src = calloc(1, sizeof(*src)); + if (src == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + ret = -ENOMEM; + goto fail; + } else { + src->intr_handle = *intr_handle; + TAILQ_INIT(&src->callbacks); + TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + TAILQ_INSERT_TAIL(&intr_sources, src, next); + add_event = 1; + ret = 0; + } + } + + /* add events to the queue. timer events are special as we need to + * re-set the timer. + */ + if (add_event || src->intr_handle.type == RTE_INTR_HANDLE_ALARM) { + struct kevent ke; + + memset(&ke, 0, sizeof(ke)); + ke.flags = EV_ADD; /* mark for addition to the queue */ + + if (intr_source_to_kevent(intr_handle, &ke) < 0) { + RTE_LOG(ERR, EAL, "Cannot convert interrupt handle to kevent\n"); + ret = -ENODEV; + goto fail; + } + + /** + * add the intr file descriptor into wait list. + */ + if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) { + /* currently, nic_uio does not support interrupts, so + * this error will always be triggered and output to the + * user. so, don't output it unless debug log level set. + */ + if (errno == ENODEV) + RTE_LOG(DEBUG, EAL, "Interrupt handle %d not supported\n", + src->intr_handle.fd); + else + RTE_LOG(ERR, EAL, "Error adding fd %d " + "kevent, %s\n", + src->intr_handle.fd, + strerror(errno)); + ret = -errno; + goto fail; + } + } + rte_spinlock_unlock(&intr_lock); + + return ret; +fail: + /* clean up */ + if (src != NULL) { + TAILQ_REMOVE(&(src->callbacks), callback, next); + if (TAILQ_EMPTY(&(src->callbacks))) { + TAILQ_REMOVE(&intr_sources, src, next); + free(src); + } + } + free(callback); + rte_spinlock_unlock(&intr_lock); + return ret; } int rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, - rte_intr_callback_fn cb, - void *cb_arg) + rte_intr_callback_fn cb_fn, void *cb_arg) { - RTE_SET_USED(intr_handle); - RTE_SET_USED(cb); - RTE_SET_USED(cb_arg); + int ret; + struct rte_intr_source *src; + struct rte_intr_callback *cb, *next; - return -ENOTSUP; + /* do parameter checking first */ + if (intr_handle == NULL || intr_handle->fd < 0) { + RTE_LOG(ERR, EAL, + "Unregistering with invalid input parameter\n"); + return -EINVAL; + } + if (kq < 0) { + RTE_LOG(ERR, EAL, "Kqueue is not active\n"); + return -ENODEV; + } + + rte_spinlock_lock(&intr_lock); + + /* check if the insterrupt source for the fd is existent */ + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == intr_handle->fd) + break; + + /* No interrupt source registered for the fd */ + if (src == NULL) { + ret = -ENOENT; + + /* interrupt source has some active callbacks right now. */ + } else if (src->active != 0) { + ret = -EAGAIN; + + /* ok to remove. */ + } else { + struct kevent ke; + + ret = 0; + + /* remove it from the kqueue */ + memset(&ke, 0, sizeof(ke)); + ke.flags = EV_DELETE; /* mark for deletion from the queue */ + + if (intr_source_to_kevent(intr_handle, &ke) < 0) { + RTE_LOG(ERR, EAL, "Cannot convert to kevent\n"); + ret = -ENODEV; + goto out; + } + + /** + * remove intr file descriptor from wait list. + */ + if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) { + RTE_LOG(ERR, EAL, "Error removing fd %d kevent, %s\n", + src->intr_handle.fd, strerror(errno)); + /* removing non-existent even is an expected condition + * in some circumstances (e.g. oneshot events). + */ + } + + /*walk through the callbacks and remove all that match. */ + for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { + next = TAILQ_NEXT(cb, next); + if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || + cb->cb_arg == cb_arg)) { + TAILQ_REMOVE(&src->callbacks, cb, next); + free(cb); + ret++; + } + } + + /* all callbacks for that source are removed. */ + if (TAILQ_EMPTY(&src->callbacks)) { + TAILQ_REMOVE(&intr_sources, src, next); + free(src); + } + } +out: + rte_spinlock_unlock(&intr_lock); + + return ret; } int -rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_enable(const struct rte_intr_handle *intr_handle) { - return -ENOTSUP; + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type) { + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; } int -rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_disable(const struct rte_intr_handle *intr_handle) { - return -ENOTSUP; + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type) { + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; + /* not used at this moment */ + case RTE_INTR_HANDLE_DEV_EVENT: + return -1; + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +static void +eal_intr_process_interrupts(struct kevent *events, int nfds) +{ + struct rte_intr_callback active_cb; + union rte_intr_read_buffer buf; + struct rte_intr_callback *cb; + struct rte_intr_source *src; + bool call = false; + int n, bytes_read; + + for (n = 0; n < nfds; n++) { + int event_fd = events[n].ident; + + rte_spinlock_lock(&intr_lock); + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == event_fd) + break; + if (src == NULL) { + rte_spinlock_unlock(&intr_lock); + continue; + } + + /* mark this interrupt source as active and release the lock. */ + src->active = 1; + rte_spinlock_unlock(&intr_lock); + + /* set the length to be read dor different handle type */ + switch (src->intr_handle.type) { + case RTE_INTR_HANDLE_ALARM: + bytes_read = 0; + call = true; + break; + case RTE_INTR_HANDLE_VDEV: + case RTE_INTR_HANDLE_EXT: + bytes_read = 0; + call = true; + break; + case RTE_INTR_HANDLE_DEV_EVENT: + bytes_read = 0; + call = true; + break; + default: + bytes_read = 1; + break; + } + + if (bytes_read > 0) { + /** + * read out to clear the ready-to-be-read flag + * for epoll_wait. + */ + bytes_read = read(event_fd, &buf, bytes_read); + if (bytes_read < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) + continue; + + RTE_LOG(ERR, EAL, "Error reading from file " + "descriptor %d: %s\n", + event_fd, + strerror(errno)); + } else if (bytes_read == 0) + RTE_LOG(ERR, EAL, "Read nothing from file " + "descriptor %d\n", event_fd); + else + call = true; + } + + /* grab a lock, again to call callbacks and update status. */ + rte_spinlock_lock(&intr_lock); + + if (call) { + /* Finally, call all callbacks. */ + TAILQ_FOREACH(cb, &src->callbacks, next) { + + /* make a copy and unlock. */ + active_cb = *cb; + rte_spinlock_unlock(&intr_lock); + + /* call the actual callback */ + active_cb.cb_fn(active_cb.cb_arg); + + /*get the lock back. */ + rte_spinlock_lock(&intr_lock); + } + } + + /* we done with that interrupt source, release it. */ + src->active = 0; + rte_spinlock_unlock(&intr_lock); + } +} + +static void * +eal_intr_thread_main(void *arg __rte_unused) +{ + struct kevent events[MAX_INTR_EVENTS]; + int nfds; + + /* host thread, never break out */ + for (;;) { + /* do not change anything, just wait */ + nfds = kevent(kq, NULL, 0, events, MAX_INTR_EVENTS, NULL); + + /* kevent fail */ + if (nfds < 0) { + if (errno == EINTR) + continue; + RTE_LOG(ERR, EAL, + "kevent returns with fail\n"); + break; + } + /* kevent timeout, will never happen here */ + else if (nfds == 0) + continue; + + /* kevent has at least one fd ready to read */ + eal_intr_process_interrupts(events, nfds); + } + close(kq); + kq = -1; + return NULL; } int rte_eal_intr_init(void) { - return 0; + int ret = 0; + + /* init the global interrupt source head */ + TAILQ_INIT(&intr_sources); + + kq = kqueue(); + if (kq < 0) { + RTE_LOG(ERR, EAL, "Cannot create kqueue instance\n"); + return -1; + } + + /* create the host thread to wait/handle the interrupt */ + ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL, + eal_intr_thread_main, NULL); + if (ret != 0) { + rte_errno = -ret; + RTE_LOG(ERR, EAL, + "Failed to create thread for interrupt handling\n"); + } + + return ret; } int diff --git a/lib/librte_eal/bsdapp/eal/eal_memalloc.c b/lib/librte_eal/bsdapp/eal/eal_memalloc.c new file mode 100644 index 00000000..f7f07abd --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_memalloc.c @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#include <inttypes.h> + +#include <rte_log.h> +#include <rte_memory.h> + +#include "eal_memalloc.h" + +int +eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms __rte_unused, + int __rte_unused n_segs, size_t __rte_unused page_sz, + int __rte_unused socket, bool __rte_unused exact) +{ + RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n"); + return -1; +} + +struct rte_memseg * +eal_memalloc_alloc_seg(size_t __rte_unused page_sz, int __rte_unused socket) +{ + RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n"); + return NULL; +} + +int +eal_memalloc_free_seg(struct rte_memseg *ms __rte_unused) +{ + RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n"); + return -1; +} + +int +eal_memalloc_free_seg_bulk(struct rte_memseg **ms __rte_unused, + int n_segs __rte_unused) +{ + RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n"); + return -1; +} + +int +eal_memalloc_sync_with_primary(void) +{ + RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n"); + return -1; +} + +int +eal_memalloc_init(void) +{ + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c index bdfb8828..16d2bc7c 100644 --- a/lib/librte_eal/bsdapp/eal/eal_memory.c +++ b/lib/librte_eal/bsdapp/eal/eal_memory.c @@ -6,10 +6,13 @@ #include <sys/types.h> #include <sys/sysctl.h> #include <inttypes.h> +#include <errno.h> +#include <string.h> #include <fcntl.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> +#include <rte_errno.h> #include <rte_log.h> #include <rte_string_fns.h> #include "eal_private.h" @@ -41,129 +44,253 @@ rte_eal_hugepage_init(void) struct rte_mem_config *mcfg; uint64_t total_mem = 0; void *addr; - unsigned i, j, seg_idx = 0; + unsigned int i, j, seg_idx = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; /* for debug purposes, hugetlbfs can be disabled */ if (internal_config.no_hugetlbfs) { - addr = malloc(internal_config.memory); - mcfg->memseg[0].iova = (rte_iova_t)(uintptr_t)addr; - mcfg->memseg[0].addr = addr; - mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; - mcfg->memseg[0].len = internal_config.memory; - mcfg->memseg[0].socket_id = 0; + struct rte_memseg_list *msl; + struct rte_fbarray *arr; + struct rte_memseg *ms; + uint64_t page_sz; + int n_segs, cur_seg; + + /* create a memseg list */ + msl = &mcfg->memsegs[0]; + + page_sz = RTE_PGSIZE_4K; + n_segs = internal_config.memory / page_sz; + + if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs, + sizeof(struct rte_memseg))) { + RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n"); + return -1; + } + + addr = mmap(NULL, internal_config.memory, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, + strerror(errno)); + return -1; + } + msl->base_va = addr; + msl->page_sz = page_sz; + msl->socket_id = 0; + + /* populate memsegs. each memseg is 1 page long */ + for (cur_seg = 0; cur_seg < n_segs; cur_seg++) { + arr = &msl->memseg_arr; + + ms = rte_fbarray_get(arr, cur_seg); + if (rte_eal_iova_mode() == RTE_IOVA_VA) + ms->iova = (uintptr_t)addr; + else + ms->iova = RTE_BAD_IOVA; + ms->addr = addr; + ms->hugepage_sz = page_sz; + ms->len = page_sz; + ms->socket_id = 0; + + rte_fbarray_set_used(arr, cur_seg); + + addr = RTE_PTR_ADD(addr, page_sz); + } return 0; } /* map all hugepages and sort them */ for (i = 0; i < internal_config.num_hugepage_sizes; i ++){ struct hugepage_info *hpi; + rte_iova_t prev_end = 0; + int prev_ms_idx = -1; + uint64_t page_sz, mem_needed; + unsigned int n_pages, max_pages; hpi = &internal_config.hugepage_info[i]; - for (j = 0; j < hpi->num_pages[0]; j++) { + page_sz = hpi->hugepage_sz; + max_pages = hpi->num_pages[0]; + mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem, + page_sz); + + n_pages = RTE_MIN(mem_needed / page_sz, max_pages); + + for (j = 0; j < n_pages; j++) { + struct rte_memseg_list *msl; + struct rte_fbarray *arr; struct rte_memseg *seg; + int msl_idx, ms_idx; rte_iova_t physaddr; int error; size_t sysctl_size = sizeof(physaddr); char physaddr_str[64]; + bool is_adjacent; - addr = mmap(NULL, hpi->hugepage_sz, PROT_READ|PROT_WRITE, - MAP_SHARED, hpi->lock_descriptor, - j * EAL_PAGE_SIZE); - if (addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", - j, hpi->hugedir); - return -1; - } - - snprintf(physaddr_str, sizeof(physaddr_str), "hw.contigmem" - ".physaddr.%d", j); - error = sysctlbyname(physaddr_str, &physaddr, &sysctl_size, - NULL, 0); + /* first, check if this segment is IOVA-adjacent to + * the previous one. + */ + snprintf(physaddr_str, sizeof(physaddr_str), + "hw.contigmem.physaddr.%d", j); + error = sysctlbyname(physaddr_str, &physaddr, + &sysctl_size, NULL, 0); if (error < 0) { RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u " "from %s\n", j, hpi->hugedir); return -1; } - seg = &mcfg->memseg[seg_idx++]; + is_adjacent = prev_end != 0 && physaddr == prev_end; + prev_end = physaddr + hpi->hugepage_sz; + + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; + msl_idx++) { + bool empty, need_hole; + msl = &mcfg->memsegs[msl_idx]; + arr = &msl->memseg_arr; + + if (msl->page_sz != page_sz) + continue; + + empty = arr->count == 0; + + /* we need a hole if this isn't an empty memseg + * list, and if previous segment was not + * adjacent to current one. + */ + need_hole = !empty && !is_adjacent; + + /* we need 1, plus hole if not adjacent */ + ms_idx = rte_fbarray_find_next_n_free(arr, + 0, 1 + (need_hole ? 1 : 0)); + + /* memseg list is full? */ + if (ms_idx < 0) + continue; + + if (need_hole && prev_ms_idx == ms_idx - 1) + ms_idx++; + prev_ms_idx = ms_idx; + + break; + } + if (msl_idx == RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n", + RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE), + RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE)); + return -1; + } + arr = &msl->memseg_arr; + seg = rte_fbarray_get(arr, ms_idx); + + addr = RTE_PTR_ADD(msl->base_va, + (size_t)msl->page_sz * ms_idx); + + /* address is already mapped in memseg list, so using + * MAP_FIXED here is safe. + */ + addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE, + MAP_SHARED | MAP_FIXED, + hpi->lock_descriptor, + j * EAL_PAGE_SIZE); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", + j, hpi->hugedir); + return -1; + } + seg->addr = addr; seg->iova = physaddr; - seg->hugepage_sz = hpi->hugepage_sz; - seg->len = hpi->hugepage_sz; + seg->hugepage_sz = page_sz; + seg->len = page_sz; seg->nchannel = mcfg->nchannel; seg->nrank = mcfg->nrank; seg->socket_id = 0; + rte_fbarray_set_used(arr, ms_idx); + RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%" PRIx64", len %zu\n", - seg_idx, addr, physaddr, hpi->hugepage_sz); - if (total_mem >= internal_config.memory || - seg_idx >= RTE_MAX_MEMSEG) - break; + seg_idx++, addr, physaddr, page_sz); + + total_mem += seg->len; } + if (total_mem >= internal_config.memory) + break; + } + if (total_mem < internal_config.memory) { + RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, " + "requested: %" PRIu64 "M " + "available: %" PRIu64 "M\n", + internal_config.memory >> 20, total_mem >> 20); + return -1; } return 0; } +struct attach_walk_args { + int fd_hugepage; + int seg_idx; +}; +static int +attach_segment(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) +{ + struct attach_walk_args *wa = arg; + void *addr; + + addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, wa->fd_hugepage, + wa->seg_idx * EAL_PAGE_SIZE); + if (addr == MAP_FAILED || addr != ms->addr) + return -1; + wa->seg_idx++; + + return 0; +} + int rte_eal_hugepage_attach(void) { const struct hugepage_info *hpi; - int fd_hugepage_info, fd_hugepage = -1; - unsigned i = 0; - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int fd_hugepage = -1; + unsigned int i; - /* Obtain a file descriptor for hugepage_info */ - fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY); - if (fd_hugepage_info < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); - return -1; - } + hpi = &internal_config.hugepage_info[0]; - /* Map the shared hugepage_info into the process address spaces */ - hpi = mmap(NULL, sizeof(struct hugepage_info), PROT_READ, MAP_PRIVATE, - fd_hugepage_info, 0); - if (hpi == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); - goto error; - } - - /* Obtain a file descriptor for contiguous memory */ - fd_hugepage = open(hpi->hugedir, O_RDWR); - if (fd_hugepage < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", hpi->hugedir); - goto error; - } + for (i = 0; i < internal_config.num_hugepage_sizes; i++) { + const struct hugepage_info *cur_hpi = &hpi[i]; + struct attach_walk_args wa; - /* Map the contiguous memory into each memory segment */ - for (i = 0; i < hpi->num_pages[0]; i++) { + memset(&wa, 0, sizeof(wa)); - void *addr; - struct rte_memseg *seg = &mcfg->memseg[i]; + /* Obtain a file descriptor for contiguous memory */ + fd_hugepage = open(cur_hpi->hugedir, O_RDWR); + if (fd_hugepage < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", + cur_hpi->hugedir); + goto error; + } + wa.fd_hugepage = fd_hugepage; + wa.seg_idx = 0; - addr = mmap(seg->addr, hpi->hugepage_sz, PROT_READ|PROT_WRITE, - MAP_SHARED|MAP_FIXED, fd_hugepage, - i * EAL_PAGE_SIZE); - if (addr == MAP_FAILED || addr != seg->addr) { + /* Map the contiguous memory into each memory segment */ + if (rte_memseg_walk(attach_segment, &wa) < 0) { RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", - i, hpi->hugedir); + wa.seg_idx, cur_hpi->hugedir); goto error; } + close(fd_hugepage); + fd_hugepage = -1; } /* hugepage_info is no longer required */ - munmap((void *)(uintptr_t)hpi, sizeof(struct hugepage_info)); - close(fd_hugepage_info); - close(fd_hugepage); return 0; error: - if (fd_hugepage_info >= 0) - close(fd_hugepage_info); if (fd_hugepage >= 0) close(fd_hugepage); return -1; @@ -174,3 +301,217 @@ rte_eal_using_phys_addrs(void) { return 0; } + +static uint64_t +get_mem_amount(uint64_t page_sz, uint64_t max_mem) +{ + uint64_t area_sz, max_pages; + + /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */ + max_pages = RTE_MAX_MEMSEG_PER_LIST; + max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem); + + area_sz = RTE_MIN(page_sz * max_pages, max_mem); + + /* make sure the list isn't smaller than the page size */ + area_sz = RTE_MAX(area_sz, page_sz); + + return RTE_ALIGN(area_sz, page_sz); +} + +#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i" +static int +alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz, + int n_segs, int socket_id, int type_msl_idx) +{ + char name[RTE_FBARRAY_NAME_LEN]; + + snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id, + type_msl_idx); + if (rte_fbarray_init(&msl->memseg_arr, name, n_segs, + sizeof(struct rte_memseg))) { + RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n", + rte_strerror(rte_errno)); + return -1; + } + + msl->page_sz = page_sz; + msl->socket_id = socket_id; + msl->base_va = NULL; + + RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n", + (size_t)page_sz >> 10, socket_id); + + return 0; +} + +static int +alloc_va_space(struct rte_memseg_list *msl) +{ + uint64_t page_sz; + size_t mem_sz; + void *addr; + int flags = 0; + +#ifdef RTE_ARCH_PPC_64 + flags |= MAP_HUGETLB; +#endif + + page_sz = msl->page_sz; + mem_sz = page_sz * msl->memseg_arr.len; + + addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags); + if (addr == NULL) { + if (rte_errno == EADDRNOTAVAIL) + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n", + (unsigned long long)mem_sz, msl->base_va); + else + RTE_LOG(ERR, EAL, "Cannot reserve memory\n"); + return -1; + } + msl->base_va = addr; + + return 0; +} + + +static int +memseg_primary_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int hpi_idx, msl_idx = 0; + struct rte_memseg_list *msl; + uint64_t max_mem, total_mem; + + /* no-huge does not need this at all */ + if (internal_config.no_hugetlbfs) + return 0; + + /* FreeBSD has an issue where core dump will dump the entire memory + * contents, including anonymous zero-page memory. Therefore, while we + * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will + * also be further limiting total memory amount to whatever memory is + * available to us through contigmem driver (plus spacing blocks). + * + * so, at each stage, we will be checking how much memory we are + * preallocating, and adjust all the values accordingly. + */ + + max_mem = (uint64_t)RTE_MAX_MEM_MB << 20; + total_mem = 0; + + /* create memseg lists */ + for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes; + hpi_idx++) { + uint64_t max_type_mem, total_type_mem = 0; + uint64_t avail_mem; + int type_msl_idx, max_segs, avail_segs, total_segs = 0; + struct hugepage_info *hpi; + uint64_t hugepage_sz; + + hpi = &internal_config.hugepage_info[hpi_idx]; + hugepage_sz = hpi->hugepage_sz; + + /* no NUMA support on FreeBSD */ + + /* check if we've already exceeded total memory amount */ + if (total_mem >= max_mem) + break; + + /* first, calculate theoretical limits according to config */ + max_type_mem = RTE_MIN(max_mem - total_mem, + (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20); + max_segs = RTE_MAX_MEMSEG_PER_TYPE; + + /* now, limit all of that to whatever will actually be + * available to us, because without dynamic allocation support, + * all of that extra memory will be sitting there being useless + * and slowing down core dumps in case of a crash. + * + * we need (N*2)-1 segments because we cannot guarantee that + * each segment will be IOVA-contiguous with the previous one, + * so we will allocate more and put spaces inbetween segments + * that are non-contiguous. + */ + avail_segs = (hpi->num_pages[0] * 2) - 1; + avail_mem = avail_segs * hugepage_sz; + + max_type_mem = RTE_MIN(avail_mem, max_type_mem); + max_segs = RTE_MIN(avail_segs, max_segs); + + type_msl_idx = 0; + while (total_type_mem < max_type_mem && + total_segs < max_segs) { + uint64_t cur_max_mem, cur_mem; + unsigned int n_segs; + + if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { + RTE_LOG(ERR, EAL, + "No more space in memseg lists, please increase %s\n", + RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); + return -1; + } + + msl = &mcfg->memsegs[msl_idx++]; + + cur_max_mem = max_type_mem - total_type_mem; + + cur_mem = get_mem_amount(hugepage_sz, + cur_max_mem); + n_segs = cur_mem / hugepage_sz; + + if (alloc_memseg_list(msl, hugepage_sz, n_segs, + 0, type_msl_idx)) + return -1; + + total_segs += msl->memseg_arr.len; + total_type_mem = total_segs * hugepage_sz; + type_msl_idx++; + + if (alloc_va_space(msl)) { + RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n"); + return -1; + } + } + total_mem += total_type_mem; + } + return 0; +} + +static int +memseg_secondary_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int msl_idx = 0; + struct rte_memseg_list *msl; + + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { + + msl = &mcfg->memsegs[msl_idx]; + + /* skip empty memseg lists */ + if (msl->memseg_arr.len == 0) + continue; + + if (rte_fbarray_attach(&msl->memseg_arr)) { + RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n"); + return -1; + } + + /* preallocate VA space */ + if (alloc_va_space(msl)) { + RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n"); + return -1; + } + } + + return 0; +} + +int +rte_eal_memseg_init(void) +{ + return rte_eal_process_type() == RTE_PROC_PRIMARY ? + memseg_primary_init() : + memseg_secondary_init(); +} diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c index d602daf8..309b5872 100644 --- a/lib/librte_eal/bsdapp/eal/eal_thread.c +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c @@ -119,7 +119,7 @@ eal_thread_loop(__attribute__((unused)) void *arg) if (eal_thread_set_affinity() < 0) rte_panic("cannot set affinity\n"); - ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset)); RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n", lcore_id, thread_id, cpuset, ret == 0 ? "" : "..."); diff --git a/lib/librte_eal/bsdapp/eal/meson.build b/lib/librte_eal/bsdapp/eal/meson.build index e83fc919..3945b529 100644 --- a/lib/librte_eal/bsdapp/eal/meson.build +++ b/lib/librte_eal/bsdapp/eal/meson.build @@ -4,12 +4,17 @@ env_objs = [] env_headers = [] env_sources = files('eal_alarm.c', + 'eal_cpuflags.c', 'eal_debug.c', 'eal_hugepage_info.c', 'eal_interrupts.c', 'eal_lcore.c', + 'eal_memalloc.c', 'eal_thread.c', 'eal_timer.c', 'eal.c', 'eal_memory.c', + 'eal_dev.c' ) + +deps += ['kvargs'] |