From dc56569b146966c17433fef70d4eb5456871dcc6 Mon Sep 17 00:00:00 2001 From: Ido Barnea Date: Mon, 8 Aug 2016 13:07:20 +0300 Subject: removed dpdk22 files --- src/dpdk22/lib/librte_eal/linuxapp/eal/eal.c | 927 ------------ src/dpdk22/lib/librte_eal/linuxapp/eal/eal_alarm.c | 273 ---- src/dpdk22/lib/librte_eal/linuxapp/eal/eal_debug.c | 119 -- .../librte_eal/linuxapp/eal/eal_hugepage_info.c | 365 ----- .../lib/librte_eal/linuxapp/eal/eal_interrupts.c | 1236 --------------- .../lib/librte_eal/linuxapp/eal/eal_ivshmem.c | 958 ------------ src/dpdk22/lib/librte_eal/linuxapp/eal/eal_lcore.c | 110 -- src/dpdk22/lib/librte_eal/linuxapp/eal/eal_log.c | 146 -- .../lib/librte_eal/linuxapp/eal/eal_memory.c | 1599 -------------------- src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci.c | 656 -------- .../lib/librte_eal/linuxapp/eal/eal_pci_init.h | 111 -- .../lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 365 ----- .../lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 928 ------------ .../librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c | 405 ----- .../lib/librte_eal/linuxapp/eal/eal_thread.c | 199 --- src/dpdk22/lib/librte_eal/linuxapp/eal/eal_timer.c | 304 ---- src/dpdk22/lib/librte_eal/linuxapp/eal/eal_vfio.h | 59 - .../eal/include/exec-env/rte_dom0_common.h | 108 -- .../linuxapp/eal/include/exec-env/rte_interrupts.h | 228 --- .../linuxapp/eal/include/exec-env/rte_kni_common.h | 174 --- .../lib/librte_eal/linuxapp/igb_uio/compat.h | 116 -- .../lib/librte_eal/linuxapp/xen_dom0/compat.h | 15 - .../lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h | 107 -- 23 files changed, 9508 deletions(-) delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_alarm.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_debug.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_interrupts.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_ivshmem.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_lcore.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_log.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_memory.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_init.h delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_uio.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_thread.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_timer.c delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/eal_vfio.h delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/igb_uio/compat.h delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/compat.h delete mode 100644 src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h (limited to 'src/dpdk22/lib/librte_eal/linuxapp') diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal.c deleted file mode 100644 index 635ec363..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal.c +++ /dev/null @@ -1,927 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * Copyright(c) 2012-2014 6WIND S.A. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "eal_private.h" -#include "eal_thread.h" -#include "eal_internal_cfg.h" -#include "eal_filesystem.h" -#include "eal_hugepages.h" -#include "eal_options.h" - -#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) - -#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) - -/* Allow the application to print its usage message too if set */ -static rte_usage_hook_t rte_application_usage_hook = NULL; - -/* early configuration structure, when memory config is not mmapped */ -static struct rte_mem_config early_mem_config; - -/* define fd variable here, because file needs to be kept open for the - * duration of the program, as we hold a write lock on it in the primary proc */ -static int mem_cfg_fd = -1; - -static struct flock wr_lock = { - .l_type = F_WRLCK, - .l_whence = SEEK_SET, - .l_start = offsetof(struct rte_mem_config, memseg), - .l_len = sizeof(early_mem_config.memseg), -}; - -/* Address of global and public configuration */ -static struct rte_config rte_config = { - .mem_config = &early_mem_config, -}; - -/* internal configuration (per-core) */ -struct lcore_config lcore_config[RTE_MAX_LCORE]; - -/* internal configuration */ -struct internal_config internal_config; - -/* used by rte_rdtsc() */ -int rte_cycles_vmware_tsc_map; - -/* Return a pointer to the configuration structure */ -struct rte_config * -rte_eal_get_configuration(void) -{ - return &rte_config; -} - -/* parse a sysfs (or other) file containing one integer value */ -int -eal_parse_sysfs_value(const char *filename, unsigned long *val) -{ - FILE *f; - char buf[BUFSIZ]; - char *end = NULL; - - if ((f = fopen(filename, "r")) == NULL) { - RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", - __func__, filename); - return -1; - } - - if (fgets(buf, sizeof(buf), f) == NULL) { - RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", - __func__, filename); - fclose(f); - return -1; - } - *val = strtoul(buf, &end, 0); - if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { - RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n", - __func__, filename); - fclose(f); - return -1; - } - fclose(f); - return 0; -} - - -/* create memory configuration in shared/mmap memory. Take out - * a write lock on the memsegs, so we can auto-detect primary/secondary. - * This means we never close the file while running (auto-close on exit). - * We also don't lock the whole file, so that in future we can use read-locks - * on other parts, e.g. memzones, to detect if there are running secondary - * processes. */ -static void -rte_eal_config_create(void) -{ - void *rte_mem_cfg_addr; - int retval; - - const char *pathname = eal_runtime_config_path(); - - if (internal_config.no_shconf) - return; - - /* map the config before hugepage address so that we don't waste a page */ - if (internal_config.base_virtaddr != 0) - rte_mem_cfg_addr = (void *) - RTE_ALIGN_FLOOR(internal_config.base_virtaddr - - sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE)); - else - rte_mem_cfg_addr = NULL; - - if (mem_cfg_fd < 0){ - mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660); - if (mem_cfg_fd < 0) - rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); - } - - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); - if (retval < 0){ - close(mem_cfg_fd); - rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname); - } - - retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); - if (retval < 0){ - close(mem_cfg_fd); - rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary " - "process running?\n", pathname); - } - - rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); - - if (rte_mem_cfg_addr == MAP_FAILED){ - rte_panic("Cannot mmap memory for rte_config\n"); - } - memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); - rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; - - /* store address of the config in the config itself so that secondary - * processes could later map the config into this exact location */ - rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr; - -} - -/* attach to an existing shared memory config */ -static void -rte_eal_config_attach(void) -{ - struct rte_mem_config *mem_config; - - const char *pathname = eal_runtime_config_path(); - - if (internal_config.no_shconf) - return; - - if (mem_cfg_fd < 0){ - mem_cfg_fd = open(pathname, O_RDWR); - if (mem_cfg_fd < 0) - rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); - } - - /* map it as read-only first */ - mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), - PROT_READ, MAP_SHARED, mem_cfg_fd, 0); - if (mem_config == MAP_FAILED) - rte_panic("Cannot mmap memory for rte_config\n"); - - rte_config.mem_config = mem_config; -} - -/* reattach the shared config at exact memory location primary process has it */ -static void -rte_eal_config_reattach(void) -{ - struct rte_mem_config *mem_config; - void *rte_mem_cfg_addr; - - if (internal_config.no_shconf) - return; - - /* save the address primary process has mapped shared config to */ - rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr; - - /* unmap original config */ - munmap(rte_config.mem_config, sizeof(struct rte_mem_config)); - - /* remap the config at proper address */ - mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, - sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, - mem_cfg_fd, 0); - close(mem_cfg_fd); - if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) - rte_panic("Cannot mmap memory for rte_config\n"); - - rte_config.mem_config = mem_config; -} - -/* Detect if we are a primary or a secondary process */ -enum rte_proc_type_t -eal_proc_type_detect(void) -{ - enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; - const char *pathname = eal_runtime_config_path(); - - /* if we can open the file but not get a write-lock we are a secondary - * process. NOTE: if we get a file handle back, we keep that open - * and don't close it to prevent a race condition between multiple opens */ - if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && - (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) - ptype = RTE_PROC_SECONDARY; - - RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", - ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); - - return ptype; -} - -/* Sets up rte_config structure with the pointer to shared memory config.*/ -static void -rte_config_init(void) -{ - rte_config.process_type = internal_config.process_type; - - switch (rte_config.process_type){ - case RTE_PROC_PRIMARY: - rte_eal_config_create(); - break; - case RTE_PROC_SECONDARY: - rte_eal_config_attach(); - rte_eal_mcfg_wait_complete(rte_config.mem_config); - rte_eal_config_reattach(); - break; - case RTE_PROC_AUTO: - case RTE_PROC_INVALID: - rte_panic("Invalid process type\n"); - } -} - -/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ -static void -eal_hugedirs_unlock(void) -{ - int i; - - for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) - { - /* skip uninitialized */ - if (internal_config.hugepage_info[i].lock_descriptor < 0) - continue; - /* unlock hugepage file */ - flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN); - close(internal_config.hugepage_info[i].lock_descriptor); - /* reset the field */ - internal_config.hugepage_info[i].lock_descriptor = -1; - } -} - -/* display usage */ -static void -eal_usage(const char *prgname) -{ - printf("\nUsage: %s ", prgname); - eal_common_usage(); - printf("EAL Linux options:\n" - " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" - " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" - " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" - " --"OPT_BASE_VIRTADDR" Base virtual address\n" - " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" - " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" - " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n" - "\n"); - /* Allow the application to print its usage message too if hook is set */ - if ( rte_application_usage_hook ) { - printf("===== Application Usage =====\n\n"); - rte_application_usage_hook(prgname); - } -} - -/* Set a per-application usage message */ -rte_usage_hook_t -rte_set_application_usage_hook( rte_usage_hook_t usage_func ) -{ - rte_usage_hook_t old_func; - - /* Will be NULL on the first call to denote the last usage routine. */ - old_func = rte_application_usage_hook; - rte_application_usage_hook = usage_func; - - return old_func; -} - -static int -eal_parse_socket_mem(char *socket_mem) -{ - char * arg[RTE_MAX_NUMA_NODES]; - char *end; - int arg_num, i, len; - uint64_t total_mem = 0; - - len = strnlen(socket_mem, SOCKET_MEM_STRLEN); - if (len == SOCKET_MEM_STRLEN) { - RTE_LOG(ERR, EAL, "--socket-mem is too long\n"); - return -1; - } - - /* all other error cases will be caught later */ - if (!isdigit(socket_mem[len-1])) - return -1; - - /* split the optarg into separate socket values */ - arg_num = rte_strsplit(socket_mem, len, - arg, RTE_MAX_NUMA_NODES, ','); - - /* if split failed, or 0 arguments */ - if (arg_num <= 0) - return -1; - - internal_config.force_sockets = 1; - - /* parse each defined socket option */ - errno = 0; - for (i = 0; i < arg_num; i++) { - end = NULL; - internal_config.socket_mem[i] = strtoull(arg[i], &end, 10); - - /* check for invalid input */ - if ((errno != 0) || - (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) - return -1; - internal_config.socket_mem[i] *= 1024ULL; - internal_config.socket_mem[i] *= 1024ULL; - total_mem += internal_config.socket_mem[i]; - } - - /* check if we have a positive amount of total memory */ - if (total_mem == 0) - return -1; - - return 0; -} - -static int -eal_parse_base_virtaddr(const char *arg) -{ - char *end; - uint64_t addr; - - errno = 0; - addr = strtoull(arg, &end, 16); - - /* check for errors */ - if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0')) - return -1; - - /* make sure we don't exceed 32-bit boundary on 32-bit target */ -#ifndef RTE_ARCH_64 - if (addr >= UINTPTR_MAX) - return -1; -#endif - - /* align the addr on 16M boundary, 16MB is the minimum huge page - * size on IBM Power architecture. If the addr is aligned to 16MB, - * it can align to 2MB for x86. So this alignment can also be used - * on x86 */ - internal_config.base_virtaddr = - RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M); - - return 0; -} - -static int -eal_parse_vfio_intr(const char *mode) -{ - unsigned i; - static struct { - const char *name; - enum rte_intr_mode value; - } map[] = { - { "legacy", RTE_INTR_MODE_LEGACY }, - { "msi", RTE_INTR_MODE_MSI }, - { "msix", RTE_INTR_MODE_MSIX }, - }; - - for (i = 0; i < RTE_DIM(map); i++) { - if (!strcmp(mode, map[i].name)) { - internal_config.vfio_intr_mode = map[i].value; - return 0; - } - } - return -1; -} - -static inline size_t -eal_get_hugepage_mem_size(void) -{ - uint64_t size = 0; - unsigned i, j; - - for (i = 0; i < internal_config.num_hugepage_sizes; i++) { - struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) { - for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { - size += hpi->hugepage_sz * hpi->num_pages[j]; - } - } - } - - return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; -} - -/* Parse the arguments for --log-level only */ -static void -eal_log_level_parse(int argc, char **argv) -{ - int opt; - char **argvopt; - int option_index; - const int old_optind = optind; - const int old_optopt = optopt; - char * const old_optarg = optarg; - - argvopt = argv; - optind = 1; - - eal_reset_internal_config(&internal_config); - - while ((opt = getopt_long(argc, argvopt, eal_short_options, - eal_long_options, &option_index)) != EOF) { - - int ret; - - /* getopt is not happy, stop right now */ - if (opt == '?') - break; - - ret = (opt == OPT_LOG_LEVEL_NUM) ? - eal_parse_common_option(opt, optarg, &internal_config) : 0; - - /* common parser is not happy */ - if (ret < 0) - break; - } - - /* restore getopt lib */ - optind = old_optind; - optopt = old_optopt; - optarg = old_optarg; -} - -/* Parse the argument given in the command line of the application */ -static int -eal_parse_args(int argc, char **argv) -{ - int opt, ret; - char **argvopt; - int option_index; - char *prgname = argv[0]; - const int old_optind = optind; - const int old_optopt = optopt; - char * const old_optarg = optarg; - - argvopt = argv; - optind = 1; - - while ((opt = getopt_long(argc, argvopt, eal_short_options, - eal_long_options, &option_index)) != EOF) { - - /* getopt is not happy, stop right now */ - if (opt == '?') { - eal_usage(prgname); - ret = -1; - goto out; - } - - ret = eal_parse_common_option(opt, optarg, &internal_config); - /* common parser is not happy */ - if (ret < 0) { - eal_usage(prgname); - ret = -1; - goto out; - } - /* common parser handled this option */ - if (ret == 0) - continue; - - switch (opt) { - case 'h': - eal_usage(prgname); - exit(EXIT_SUCCESS); - - /* long options */ - case OPT_XEN_DOM0_NUM: -#ifdef RTE_LIBRTE_XEN_DOM0 - internal_config.xen_dom0_support = 1; -#else - RTE_LOG(ERR, EAL, "Can't support DPDK app " - "running on Dom0, please configure" - " RTE_LIBRTE_XEN_DOM0=y\n"); - ret = -1; - goto out; -#endif - break; - - case OPT_HUGE_DIR_NUM: - internal_config.hugepage_dir = optarg; - break; - - case OPT_FILE_PREFIX_NUM: - internal_config.hugefile_prefix = optarg; - break; - - case OPT_SOCKET_MEM_NUM: - if (eal_parse_socket_mem(optarg) < 0) { - RTE_LOG(ERR, EAL, "invalid parameters for --" - OPT_SOCKET_MEM "\n"); - eal_usage(prgname); - ret = -1; - goto out; - } - break; - - case OPT_BASE_VIRTADDR_NUM: - if (eal_parse_base_virtaddr(optarg) < 0) { - RTE_LOG(ERR, EAL, "invalid parameter for --" - OPT_BASE_VIRTADDR "\n"); - eal_usage(prgname); - ret = -1; - goto out; - } - break; - - case OPT_VFIO_INTR_NUM: - if (eal_parse_vfio_intr(optarg) < 0) { - RTE_LOG(ERR, EAL, "invalid parameters for --" - OPT_VFIO_INTR "\n"); - eal_usage(prgname); - ret = -1; - goto out; - } - break; - - case OPT_CREATE_UIO_DEV_NUM: - internal_config.create_uio_dev = 1; - break; - - default: - if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { - RTE_LOG(ERR, EAL, "Option %c is not supported " - "on Linux\n", opt); - } else if (opt >= OPT_LONG_MIN_NUM && - opt < OPT_LONG_MAX_NUM) { - RTE_LOG(ERR, EAL, "Option %s is not supported " - "on Linux\n", - eal_long_options[option_index].name); - } else { - RTE_LOG(ERR, EAL, "Option %d is not supported " - "on Linux\n", opt); - } - eal_usage(prgname); - ret = -1; - goto out; - } - } - - if (eal_adjust_config(&internal_config) != 0) { - ret = -1; - goto out; - } - - /* sanity checks */ - if (eal_check_common_options(&internal_config) != 0) { - eal_usage(prgname); - ret = -1; - goto out; - } - - /* --xen-dom0 doesn't make sense with --socket-mem */ - if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) { - RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified " - "together with --"OPT_XEN_DOM0"\n"); - eal_usage(prgname); - ret = -1; - goto out; - } - - if (optind >= 0) - argv[optind-1] = prgname; - ret = optind-1; - -out: - /* restore getopt lib */ - optind = old_optind; - optopt = old_optopt; - optarg = old_optarg; - - return ret; -} - -static void -eal_check_mem_on_local_socket(void) -{ - const struct rte_memseg *ms; - int i, socket_id; - - socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); - - ms = rte_eal_get_physmem_layout(); - - for (i = 0; i < RTE_MAX_MEMSEG; i++) - if (ms[i].socket_id == socket_id && - ms[i].len > 0) - return; - - RTE_LOG(WARNING, EAL, "WARNING: Master core has no " - "memory on local socket!\n"); -} - -static int -sync_func(__attribute__((unused)) void *arg) -{ - return 0; -} - -inline static void -rte_eal_mcfg_complete(void) -{ - /* ALL shared mem_config related INIT DONE */ - if (rte_config.process_type == RTE_PROC_PRIMARY) - rte_config.mem_config->magic = RTE_MAGIC; -} - -/* - * Request iopl privilege for all RPL, returns 0 on success - * iopl() call is mostly for the i386 architecture. For other architectures, - * return -1 to indicate IO privilege can't be changed in this way. - */ -int -rte_eal_iopl_init(void) -{ -#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) - if (iopl(3) != 0) - return -1; - return 0; -#else - return -1; -#endif -} - -/* Launch threads, called at application init(). */ -int -rte_eal_init(int argc, char **argv) -{ - int i, fctret, ret; - pthread_t thread_id; - static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0); - const char *logid; - char cpuset[RTE_CPU_AFFINITY_STR_LEN]; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; - - if (!rte_atomic32_test_and_set(&run_once)) - return -1; - - logid = strrchr(argv[0], '/'); - logid = strdup(logid ? logid + 1: argv[0]); - - thread_id = pthread_self(); - - if (rte_eal_log_early_init() < 0) - rte_panic("Cannot init early logs\n"); - - eal_log_level_parse(argc, argv); - - /* set log level as early as possible */ - rte_set_log_level(internal_config.log_level); - - if (rte_eal_cpu_init() < 0) - rte_panic("Cannot detect lcores\n"); - - fctret = eal_parse_args(argc, argv); - if (fctret < 0) - exit(1); - - if (internal_config.no_hugetlbfs == 0 && - internal_config.process_type != RTE_PROC_SECONDARY && - internal_config.xen_dom0_support == 0 && - eal_hugepage_info_init() < 0) - rte_panic("Cannot get hugepage information\n"); - - if (internal_config.memory == 0 && internal_config.force_sockets == 0) { - if (internal_config.no_hugetlbfs) - internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; - else - internal_config.memory = eal_get_hugepage_mem_size(); - } - - if (internal_config.vmware_tsc_map == 1) { -#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT - rte_cycles_vmware_tsc_map = 1; - RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " - "you must have monitor_control.pseudo_perfctr = TRUE\n"); -#else - RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " - "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); -#endif - } - - rte_srand(rte_rdtsc()); - - rte_config_init(); - - if (rte_eal_pci_init() < 0) - rte_panic("Cannot init PCI\n"); - -#ifdef RTE_LIBRTE_IVSHMEM - if (rte_eal_ivshmem_init() < 0) - rte_panic("Cannot init IVSHMEM\n"); -#endif - - if (rte_eal_memory_init() < 0) - rte_panic("Cannot init memory\n"); - - /* the directories are locked during eal_hugepage_info_init */ - eal_hugedirs_unlock(); - - if (rte_eal_memzone_init() < 0) - rte_panic("Cannot init memzone\n"); - - if (rte_eal_tailqs_init() < 0) - rte_panic("Cannot init tail queues for objects\n"); - -#ifdef RTE_LIBRTE_IVSHMEM - if (rte_eal_ivshmem_obj_init() < 0) - rte_panic("Cannot init IVSHMEM objects\n"); -#endif - - if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) - rte_panic("Cannot init logs\n"); - - if (rte_eal_alarm_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); - - if (rte_eal_timer_init() < 0) - rte_panic("Cannot init HPET or TSC timers\n"); - - eal_check_mem_on_local_socket(); - - rte_eal_mcfg_complete(); - - if (eal_plugins_init() < 0) - rte_panic("Cannot init plugins\n"); - - eal_thread_init_master(rte_config.master_lcore); - - ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); - - RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n", - rte_config.master_lcore, (int)thread_id, cpuset, - ret == 0 ? "" : "..."); - - if (rte_eal_dev_init() < 0) - rte_panic("Cannot init pmd devices\n"); - - if (rte_eal_intr_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); - - RTE_LCORE_FOREACH_SLAVE(i) { - - /* - * create communication pipes between master thread - * and children - */ - if (pipe(lcore_config[i].pipe_master2slave) < 0) - rte_panic("Cannot create pipe\n"); - if (pipe(lcore_config[i].pipe_slave2master) < 0) - rte_panic("Cannot create pipe\n"); - - lcore_config[i].state = WAIT; - - /* create a thread for each lcore */ - ret = pthread_create(&lcore_config[i].thread_id, NULL, - eal_thread_loop, NULL); - if (ret != 0) - rte_panic("Cannot create thread\n"); - - /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, - "lcore-slave-%d", i); - ret = rte_thread_setname(lcore_config[i].thread_id, - thread_name); - if (ret != 0) - RTE_LOG(ERR, EAL, - "Cannot set name for lcore thread\n"); - } - - /* - * Launch a dummy function on all slave lcores, so that master lcore - * knows they are all ready when this function returns. - */ - rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); - rte_eal_mp_wait_lcore(); - - /* Probe & Initialize PCI devices */ - if (rte_eal_pci_probe()) - rte_panic("Cannot probe PCI\n"); - - return fctret; -} - -/* get core role */ -enum rte_lcore_role_t -rte_eal_lcore_role(unsigned lcore_id) -{ - return rte_config.lcore_role[lcore_id]; -} - -enum rte_proc_type_t -rte_eal_process_type(void) -{ - return rte_config.process_type; -} - -int rte_eal_has_hugepages(void) -{ - return ! internal_config.no_hugetlbfs; -} - -int -rte_eal_check_module(const char *module_name) -{ - char mod_name[30]; /* Any module names can be longer than 30 bytes? */ - int ret = 0; - int n; - - if (NULL == module_name) - return -1; - - FILE *fd = fopen("/proc/modules", "r"); - if (NULL == fd) { - RTE_LOG(ERR, EAL, "Open /proc/modules failed!" - " error %i (%s)\n", errno, strerror(errno)); - return -1; - } - while (!feof(fd)) { - n = fscanf(fd, "%29s %*[^\n]", mod_name); - if ((n == 1) && !strcmp(mod_name, module_name)) { - ret = 1; - break; - } - } - fclose(fd); - - return ret; -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_alarm.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_alarm.c deleted file mode 100644 index 8b042abc..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_alarm.c +++ /dev/null @@ -1,273 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef TFD_NONBLOCK -#include -#define TFD_NONBLOCK O_NONBLOCK -#endif - -#define NS_PER_US 1000 -#define US_PER_MS 1000 -#define MS_PER_S 1000 -#define US_PER_S (US_PER_MS * MS_PER_S) - -#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ -#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW -#else -#define CLOCK_TYPE_ID CLOCK_MONOTONIC -#endif - -struct alarm_entry { - LIST_ENTRY(alarm_entry) next; - struct timeval time; - rte_eal_alarm_callback cb_fn; - void *cb_arg; - volatile uint8_t executing; - volatile pthread_t executing_id; -}; - -static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER(); -static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER; - -static struct rte_intr_handle intr_handle = {.fd = -1 }; -static int handler_registered = 0; -static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg); - -int -rte_eal_alarm_init(void) -{ - intr_handle.type = RTE_INTR_HANDLE_ALARM; - /* create a timerfd file descriptor */ - intr_handle.fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK); - if (intr_handle.fd == -1) - goto error; - - return 0; - -error: - rte_errno = errno; - return -1; -} - -static void -eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused, - void *arg __rte_unused) -{ - struct timespec now; - struct alarm_entry *ap; - - rte_spinlock_lock(&alarm_list_lk); - while ((ap = LIST_FIRST(&alarm_list)) !=NULL && - clock_gettime(CLOCK_TYPE_ID, &now) == 0 && - (ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec && - (ap->time.tv_usec * NS_PER_US) <= now.tv_nsec))) { - ap->executing = 1; - ap->executing_id = pthread_self(); - rte_spinlock_unlock(&alarm_list_lk); - - ap->cb_fn(ap->cb_arg); - - rte_spinlock_lock(&alarm_list_lk); - - LIST_REMOVE(ap, next); - rte_free(ap); - } - - if (!LIST_EMPTY(&alarm_list)) { - struct itimerspec atime = { .it_interval = { 0, 0 } }; - - ap = LIST_FIRST(&alarm_list); - atime.it_value.tv_sec = ap->time.tv_sec; - atime.it_value.tv_nsec = ap->time.tv_usec * NS_PER_US; - /* perform borrow for subtraction if necessary */ - if (now.tv_nsec > (ap->time.tv_usec * NS_PER_US)) - atime.it_value.tv_sec--, atime.it_value.tv_nsec += US_PER_S * NS_PER_US; - - atime.it_value.tv_sec -= now.tv_sec; - atime.it_value.tv_nsec -= now.tv_nsec; - timerfd_settime(intr_handle.fd, 0, &atime, NULL); - } - rte_spinlock_unlock(&alarm_list_lk); -} - -int -rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg) -{ - struct timespec now; - int ret = 0; - struct alarm_entry *ap, *new_alarm; - - /* Check parameters, including that us won't cause a uint64_t overflow */ - if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL) - return -EINVAL; - - new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0); - if (new_alarm == NULL) - return -ENOMEM; - - /* use current time to calculate absolute time of alarm */ - clock_gettime(CLOCK_TYPE_ID, &now); - - new_alarm->cb_fn = cb_fn; - new_alarm->cb_arg = cb_arg; - new_alarm->time.tv_usec = ((now.tv_nsec / NS_PER_US) + us) % US_PER_S; - new_alarm->time.tv_sec = now.tv_sec + (((now.tv_nsec / NS_PER_US) + us) / US_PER_S); - - rte_spinlock_lock(&alarm_list_lk); - if (!handler_registered) { - ret |= rte_intr_callback_register(&intr_handle, - eal_alarm_callback, NULL); - handler_registered = (ret == 0) ? 1 : 0; - } - - if (LIST_EMPTY(&alarm_list)) - LIST_INSERT_HEAD(&alarm_list, new_alarm, next); - else { - LIST_FOREACH(ap, &alarm_list, next) { - if (ap->time.tv_sec > new_alarm->time.tv_sec || - (ap->time.tv_sec == new_alarm->time.tv_sec && - ap->time.tv_usec > new_alarm->time.tv_usec)){ - LIST_INSERT_BEFORE(ap, new_alarm, next); - break; - } - if (LIST_NEXT(ap, next) == NULL) { - LIST_INSERT_AFTER(ap, new_alarm, next); - break; - } - } - } - - if (LIST_FIRST(&alarm_list) == new_alarm) { - struct itimerspec alarm_time = { - .it_interval = {0, 0}, - .it_value = { - .tv_sec = us / US_PER_S, - .tv_nsec = (us % US_PER_S) * NS_PER_US, - }, - }; - ret |= timerfd_settime(intr_handle.fd, 0, &alarm_time, NULL); - } - rte_spinlock_unlock(&alarm_list_lk); - - return ret; -} - -int -rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) -{ - struct alarm_entry *ap, *ap_prev; - int count = 0; - int err = 0; - int executing; - - if (!cb_fn) { - rte_errno = EINVAL; - return -1; - } - - do { - executing = 0; - rte_spinlock_lock(&alarm_list_lk); - /* remove any matches at the start of the list */ - while ((ap = LIST_FIRST(&alarm_list)) != NULL && - cb_fn == ap->cb_fn && - (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) { - - if (ap->executing == 0) { - LIST_REMOVE(ap, next); - rte_free(ap); - count++; - } else { - /* If calling from other context, mark that alarm is executing - * so loop can spin till it finish. Otherwise we are trying to - * cancel our self - mark it by EINPROGRESS */ - if (pthread_equal(ap->executing_id, pthread_self()) == 0) - executing++; - else - err = EINPROGRESS; - - break; - } - } - ap_prev = ap; - - /* now go through list, removing entries not at start */ - LIST_FOREACH(ap, &alarm_list, next) { - /* this won't be true first time through */ - if (cb_fn == ap->cb_fn && - (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) { - - if (ap->executing == 0) { - LIST_REMOVE(ap, next); - rte_free(ap); - count++; - ap = ap_prev; - } else if (pthread_equal(ap->executing_id, pthread_self()) == 0) - executing++; - else - err = EINPROGRESS; - } - ap_prev = ap; - } - rte_spinlock_unlock(&alarm_list_lk); - } while (executing != 0); - - if (count == 0 && err == 0) - rte_errno = ENOENT; - else if (err) - rte_errno = err; - - return count; -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_debug.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_debug.c deleted file mode 100644 index 907fbfa7..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_debug.c +++ /dev/null @@ -1,119 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define BACKTRACE_SIZE 256 - -/* dump the stack of the calling core */ -void rte_dump_stack(void) -{ - void *func[BACKTRACE_SIZE]; - char **symb = NULL; - int size; - - size = backtrace(func, BACKTRACE_SIZE); - symb = backtrace_symbols(func, size); - - if (symb == NULL) - return; - - while (size > 0) { - rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL, - "%d: [%s]\n", size, symb[size - 1]); - size --; - } - - free(symb); -} - -/* not implemented in this environment */ -void rte_dump_registers(void) -{ - return; -} - -/* call abort(), it will generate a coredump if enabled */ -void __rte_panic(const char *funcname, const char *format, ...) -{ - va_list ap; - - /* disable history */ - rte_log_set_history(0); - - rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname); - va_start(ap, format); - rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); - va_end(ap); - rte_dump_stack(); - rte_dump_registers(); - abort(); -} - -/* - * Like rte_panic this terminates the application. However, no traceback is - * provided and no core-dump is generated. - */ -void -rte_exit(int exit_code, const char *format, ...) -{ - va_list ap; - - /* disable history */ - rte_log_set_history(0); - - if (exit_code != 0) - RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n" - " Cause: ", exit_code); - - va_start(ap, format); - rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); - va_end(ap); - -#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR - exit(exit_code); -#else - rte_dump_stack(); - rte_dump_registers(); - abort(); -#endif -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c deleted file mode 100644 index 18858e2d..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ /dev/null @@ -1,365 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "rte_string_fns.h" -#include "eal_internal_cfg.h" -#include "eal_hugepages.h" -#include "eal_filesystem.h" - -static const char sys_dir_path[] = "/sys/kernel/mm/hugepages"; - -/* this function is only called from eal_hugepage_info_init which itself - * is only called from a primary process */ -static uint32_t -get_num_hugepages(const char *subdir) -{ - char path[PATH_MAX]; - long unsigned resv_pages, num_pages = 0; - const char *nr_hp_file = "free_hugepages"; - const char *nr_rsvd_file = "resv_hugepages"; - - /* first, check how many reserved pages kernel reports */ - snprintf(path, sizeof(path), "%s/%s/%s", - sys_dir_path, subdir, nr_rsvd_file); - if (eal_parse_sysfs_value(path, &resv_pages) < 0) - return 0; - - snprintf(path, sizeof(path), "%s/%s/%s", - sys_dir_path, subdir, nr_hp_file); - if (eal_parse_sysfs_value(path, &num_pages) < 0) - return 0; - - if (num_pages == 0) - RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n", - subdir); - - /* adjust num_pages */ - if (num_pages >= resv_pages) - num_pages -= resv_pages; - else if (resv_pages) - num_pages = 0; - - /* we want to return a uint32_t and more than this looks suspicious - * anyway ... */ - if (num_pages > UINT32_MAX) - num_pages = UINT32_MAX; - - return num_pages; -} - -static uint64_t -get_default_hp_size(void) -{ - const char proc_meminfo[] = "/proc/meminfo"; - const char str_hugepagesz[] = "Hugepagesize:"; - unsigned hugepagesz_len = sizeof(str_hugepagesz) - 1; - char buffer[256]; - unsigned long long size = 0; - - FILE *fd = fopen(proc_meminfo, "r"); - if (fd == NULL) - rte_panic("Cannot open %s\n", proc_meminfo); - while(fgets(buffer, sizeof(buffer), fd)){ - if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){ - size = rte_str_to_size(&buffer[hugepagesz_len]); - break; - } - } - fclose(fd); - if (size == 0) - rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo); - return size; -} - -static const char * -get_hugepage_dir(uint64_t hugepage_sz) -{ - enum proc_mount_fieldnames { - DEVICE = 0, - MOUNTPT, - FSTYPE, - OPTIONS, - _FIELDNAME_MAX - }; - static uint64_t default_size = 0; - const char proc_mounts[] = "/proc/mounts"; - const char hugetlbfs_str[] = "hugetlbfs"; - const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1; - const char pagesize_opt[] = "pagesize="; - const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1; - const char split_tok = ' '; - char *splitstr[_FIELDNAME_MAX]; - char buf[BUFSIZ]; - char *retval = NULL; - - FILE *fd = fopen(proc_mounts, "r"); - if (fd == NULL) - rte_panic("Cannot open %s\n", proc_mounts); - - if (default_size == 0) - default_size = get_default_hp_size(); - - while (fgets(buf, sizeof(buf), fd)){ - if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX, - split_tok) != _FIELDNAME_MAX) { - RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts); - break; /* return NULL */ - } - - /* we have a specified --huge-dir option, only examine that dir */ - if (internal_config.hugepage_dir != NULL && - strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0) - continue; - - if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){ - const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt); - - /* if no explicit page size, the default page size is compared */ - if (pagesz_str == NULL){ - if (hugepage_sz == default_size){ - retval = strdup(splitstr[MOUNTPT]); - break; - } - } - /* there is an explicit page size, so check it */ - else { - uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]); - if (pagesz == hugepage_sz) { - retval = strdup(splitstr[MOUNTPT]); - break; - } - } - } /* end if strncmp hugetlbfs */ - } /* end while fgets */ - - fclose(fd); - return retval; -} - -/* - * Clear the hugepage directory of whatever hugepage files - * there are. Checks if the file is locked (i.e. - * if it's in use by another DPDK process). - */ -static int -clear_hugedir(const char * hugedir) -{ - DIR *dir; - struct dirent *dirent; - int dir_fd, fd, lck_result; - const char filter[] = "*map_*"; /* matches hugepage files */ - - /* open directory */ - dir = opendir(hugedir); - if (!dir) { - RTE_LOG(ERR, EAL, "Unable to open hugepage directory %s\n", - hugedir); - goto error; - } - dir_fd = dirfd(dir); - - dirent = readdir(dir); - if (!dirent) { - RTE_LOG(ERR, EAL, "Unable to read hugepage directory %s\n", - hugedir); - goto error; - } - - while(dirent != NULL){ - /* skip files that don't match the hugepage pattern */ - if (fnmatch(filter, dirent->d_name, 0) > 0) { - dirent = readdir(dir); - continue; - } - - /* try and lock the file */ - fd = openat(dir_fd, dirent->d_name, O_RDONLY); - - /* skip to next file */ - if (fd == -1) { - dirent = readdir(dir); - continue; - } - - /* non-blocking lock */ - lck_result = flock(fd, LOCK_EX | LOCK_NB); - - /* if lock succeeds, unlock and remove the file */ - if (lck_result != -1) { - flock(fd, LOCK_UN); - unlinkat(dir_fd, dirent->d_name, 0); - } - close (fd); - dirent = readdir(dir); - } - - closedir(dir); - return 0; - -error: - if (dir) - closedir(dir); - - RTE_LOG(ERR, EAL, "Error while clearing hugepage dir: %s\n", - strerror(errno)); - - return -1; -} - -static int -compare_hpi(const void *a, const void *b) -{ - const struct hugepage_info *hpi_a = a; - const struct hugepage_info *hpi_b = b; - - return hpi_b->hugepage_sz - hpi_a->hugepage_sz; -} - -/* - * when we initialize the hugepage info, everything goes - * to socket 0 by default. it will later get sorted by memory - * initialization procedure. - */ -int -eal_hugepage_info_init(void) -{ - const char dirent_start_text[] = "hugepages-"; - const size_t dirent_start_len = sizeof(dirent_start_text) - 1; - unsigned i, num_sizes = 0; - DIR *dir; - struct dirent *dirent; - - dir = opendir(sys_dir_path); - if (dir == NULL) - rte_panic("Cannot open directory %s to read system hugepage " - "info\n", sys_dir_path); - - for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { - struct hugepage_info *hpi; - - if (strncmp(dirent->d_name, dirent_start_text, - dirent_start_len) != 0) - continue; - - if (num_sizes >= MAX_HUGEPAGE_SIZES) - break; - - hpi = &internal_config.hugepage_info[num_sizes]; - hpi->hugepage_sz = - rte_str_to_size(&dirent->d_name[dirent_start_len]); - hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz); - - /* first, check if we have a mountpoint */ - if (hpi->hugedir == NULL) { - uint32_t num_pages; - - num_pages = get_num_hugepages(dirent->d_name); - if (num_pages > 0) - RTE_LOG(NOTICE, EAL, - "%" PRIu32 " hugepages of size " - "%" PRIu64 " reserved, but no mounted " - "hugetlbfs found for that size\n", - num_pages, hpi->hugepage_sz); - continue; - } - - /* try to obtain a writelock */ - hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY); - - /* if blocking lock failed */ - if (flock(hpi->lock_descriptor, LOCK_EX) == -1) { - RTE_LOG(CRIT, EAL, - "Failed to lock hugepage directory!\n"); - break; - } - /* clear out the hugepages dir from unused pages */ - if (clear_hugedir(hpi->hugedir) == -1) - break; - - /* for now, put all pages into socket 0, - * later they will be sorted */ - hpi->num_pages[0] = get_num_hugepages(dirent->d_name); - -#ifndef RTE_ARCH_64 - /* for 32-bit systems, limit number of hugepages to - * 1GB per page size */ - hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0], - RTE_PGSIZE_1G / hpi->hugepage_sz); -#endif - - num_sizes++; - } - closedir(dir); - - /* something went wrong, and we broke from the for loop above */ - if (dirent != NULL) - return -1; - - internal_config.num_hugepage_sizes = num_sizes; - - /* sort the page directory entries by size, largest to smallest */ - qsort(&internal_config.hugepage_info[0], num_sizes, - sizeof(internal_config.hugepage_info[0]), compare_hpi); - - /* now we have all info, check we have at least one valid size */ - for (i = 0; i < num_sizes; i++) - if (internal_config.hugepage_info[i].hugedir != NULL && - internal_config.hugepage_info[i].num_pages[0] > 0) - return 0; - - /* no valid hugepage mounts available, return error */ - return -1; -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_interrupts.c deleted file mode 100644 index 5d3128e3..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ /dev/null @@ -1,1236 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "eal_private.h" -#include "eal_vfio.h" -#include "eal_thread.h" - -#define EAL_INTR_EPOLL_WAIT_FOREVER (-1) -#define NB_OTHER_INTR 1 - -static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ - -// TREX_PATCH -int eal_err_read_from_file_is_error = 1; - -/** - * union for pipe fds. - */ -union intr_pipefds{ - struct { - int pipefd[2]; - }; - struct { - int readfd; - int writefd; - }; -}; - -/** - * union buffer for reading on different devices - */ -union rte_intr_read_buffer { - int uio_intr_count; /* for uio device */ -#ifdef VFIO_PRESENT - uint64_t vfio_intr_count; /* for vfio device */ -#endif - uint64_t timerfd_num; /* for timerfd */ - char charbuf[16]; /* for others */ -}; - -TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback); -TAILQ_HEAD(rte_intr_source_list, rte_intr_source); - -struct rte_intr_callback { - TAILQ_ENTRY(rte_intr_callback) next; - rte_intr_callback_fn cb_fn; /**< callback address */ - void *cb_arg; /**< parameter for callback */ -}; - -struct rte_intr_source { - TAILQ_ENTRY(rte_intr_source) next; - struct rte_intr_handle intr_handle; /**< interrupt handle */ - struct rte_intr_cb_list callbacks; /**< user callbacks */ - uint32_t active; -}; - -/* global spinlock for interrupt data operation */ -static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER; - -/* union buffer for pipe read/write */ -static union intr_pipefds intr_pipe; - -/* interrupt sources list */ -static struct rte_intr_source_list intr_sources; - -/* interrupt handling thread */ -static pthread_t intr_thread; - -/* VFIO interrupts */ -#ifdef VFIO_PRESENT - -#define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int)) -/* irq set buffer length for queue interrupts and LSC interrupt */ -#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ - sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1)) - -/* enable legacy (INTx) interrupts */ -static int -vfio_enable_intx(struct rte_intr_handle *intr_handle) { - struct vfio_irq_set *irq_set; - char irq_set_buf[IRQ_SET_BUF_LEN]; - int len, ret; - int *fd_ptr; - - len = sizeof(irq_set_buf); - - /* enable INTx */ - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set->start = 0; - fd_ptr = (int *) &irq_set->data; - *fd_ptr = intr_handle->fd; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) { - RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", - intr_handle->fd); - return -1; - } - - /* unmask INTx after enabling */ - memset(irq_set, 0, len); - len = sizeof(struct vfio_irq_set); - irq_set->argsz = len; - irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set->start = 0; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) { - RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", - intr_handle->fd); - return -1; - } - return 0; -} - -/* disable legacy (INTx) interrupts */ -static int -vfio_disable_intx(struct rte_intr_handle *intr_handle) { - struct vfio_irq_set *irq_set; - char irq_set_buf[IRQ_SET_BUF_LEN]; - int len, ret; - - len = sizeof(struct vfio_irq_set); - - /* mask interrupts before disabling */ - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set->start = 0; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) { - RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", - intr_handle->fd); - return -1; - } - - /* disable INTx*/ - memset(irq_set, 0, len); - irq_set->argsz = len; - irq_set->count = 0; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; - irq_set->start = 0; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) { - RTE_LOG(ERR, EAL, - "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); - return -1; - } - return 0; -} - -/* enable MSI interrupts */ -static int -vfio_enable_msi(struct rte_intr_handle *intr_handle) { - int len, ret; - char irq_set_buf[IRQ_SET_BUF_LEN]; - struct vfio_irq_set *irq_set; - int *fd_ptr; - - len = sizeof(irq_set_buf); - - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; - irq_set->start = 0; - fd_ptr = (int *) &irq_set->data; - *fd_ptr = intr_handle->fd; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) { - RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", - intr_handle->fd); - return -1; - } - return 0; -} - -/* disable MSI interrupts */ -static int -vfio_disable_msi(struct rte_intr_handle *intr_handle) { - struct vfio_irq_set *irq_set; - char irq_set_buf[IRQ_SET_BUF_LEN]; - int len, ret; - - len = sizeof(struct vfio_irq_set); - - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - irq_set->count = 0; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; - irq_set->start = 0; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) - RTE_LOG(ERR, EAL, - "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); - - return ret; -} - -/* enable MSI-X interrupts */ -static int -vfio_enable_msix(struct rte_intr_handle *intr_handle) { - int len, ret; - char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; - struct vfio_irq_set *irq_set; - int *fd_ptr; - - len = sizeof(irq_set_buf); - - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - if (!intr_handle->max_intr) - intr_handle->max_intr = 1; - else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) - intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; - - irq_set->count = intr_handle->max_intr; - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = 0; - fd_ptr = (int *) &irq_set->data; - /* INTR vector offset 0 reserve for non-efds mapping */ - fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; - memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, - sizeof(*intr_handle->efds) * intr_handle->nb_efd); - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) { - RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", - intr_handle->fd); - return -1; - } - - return 0; -} - -/* disable MSI-X interrupts */ -static int -vfio_disable_msix(struct rte_intr_handle *intr_handle) { - struct vfio_irq_set *irq_set; - char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; - int len, ret; - - len = sizeof(struct vfio_irq_set); - - irq_set = (struct vfio_irq_set *) irq_set_buf; - irq_set->argsz = len; - irq_set->count = 0; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; - irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; - irq_set->start = 0; - - ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); - - if (ret) - RTE_LOG(ERR, EAL, - "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); - - return ret; -} -#endif - -static int -uio_intx_intr_disable(struct rte_intr_handle *intr_handle) -{ - unsigned char command_high; - - /* use UIO config file descriptor for uio_pci_generic */ - if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { - RTE_LOG(ERR, EAL, - "Error reading interrupts status for fd %d\n", - intr_handle->uio_cfg_fd); - return -1; - } - /* disable interrupts */ - command_high |= 0x4; - if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { - RTE_LOG(ERR, EAL, - "Error disabling interrupts for fd %d\n", - intr_handle->uio_cfg_fd); - return -1; - } - - return 0; -} - -static int -uio_intx_intr_enable(struct rte_intr_handle *intr_handle) -{ - unsigned char command_high; - - /* use UIO config file descriptor for uio_pci_generic */ - if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { - RTE_LOG(ERR, EAL, - "Error reading interrupts status for fd %d\n", - intr_handle->uio_cfg_fd); - return -1; - } - /* enable interrupts */ - command_high &= ~0x4; - if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { - RTE_LOG(ERR, EAL, - "Error enabling interrupts for fd %d\n", - intr_handle->uio_cfg_fd); - return -1; - } - - return 0; -} - -static int -uio_intr_disable(struct rte_intr_handle *intr_handle) -{ - const int value = 0; - - if (write(intr_handle->fd, &value, sizeof(value)) < 0) { - RTE_LOG(ERR, EAL, - "Error disabling interrupts for fd %d (%s)\n", - intr_handle->fd, strerror(errno)); - return -1; - } - return 0; -} - -static int -uio_intr_enable(struct rte_intr_handle *intr_handle) -{ - const int value = 1; - - if (write(intr_handle->fd, &value, sizeof(value)) < 0) { - RTE_LOG(ERR, EAL, - "Error enabling interrupts for fd %d (%s)\n", - intr_handle->fd, strerror(errno)); - return -1; - } - return 0; -} - -int -rte_intr_callback_register(struct rte_intr_handle *intr_handle, - rte_intr_callback_fn cb, void *cb_arg) -{ - int ret, wake_thread; - struct rte_intr_source *src; - struct rte_intr_callback *callback; - - wake_thread = 0; - - /* first do parameter checking */ - if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) { - RTE_LOG(ERR, EAL, - "Registering with invalid input parameter\n"); - return -EINVAL; - } - - /* allocate a new interrupt callback entity */ - callback = rte_zmalloc("interrupt callback list", - sizeof(*callback), 0); - if (callback == NULL) { - RTE_LOG(ERR, EAL, "Can not allocate memory\n"); - return -ENOMEM; - } - callback->cb_fn = cb; - callback->cb_arg = cb_arg; - - rte_spinlock_lock(&intr_lock); - - /* check if there is at least one callback registered for the fd */ - TAILQ_FOREACH(src, &intr_sources, next) { - if (src->intr_handle.fd == intr_handle->fd) { - /* we had no interrupts for this */ - if TAILQ_EMPTY(&src->callbacks) - wake_thread = 1; - - TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); - ret = 0; - break; - } - } - - /* no existing callbacks for this - add new source */ - if (src == NULL) { - if ((src = rte_zmalloc("interrupt source list", - sizeof(*src), 0)) == NULL) { - RTE_LOG(ERR, EAL, "Can not allocate memory\n"); - rte_free(callback); - ret = -ENOMEM; - } else { - src->intr_handle = *intr_handle; - TAILQ_INIT(&src->callbacks); - TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); - TAILQ_INSERT_TAIL(&intr_sources, src, next); - wake_thread = 1; - ret = 0; - } - } - - rte_spinlock_unlock(&intr_lock); - - /** - * check if need to notify the pipe fd waited by epoll_wait to - * rebuild the wait list. - */ - if (wake_thread) - if (write(intr_pipe.writefd, "1", 1) < 0) - return -EPIPE; - - return ret; -} - -int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, - rte_intr_callback_fn cb_fn, void *cb_arg) -{ - int ret; - struct rte_intr_source *src; - struct rte_intr_callback *cb, *next; - - /* do parameter checking first */ - if (intr_handle == NULL || intr_handle->fd < 0) { - RTE_LOG(ERR, EAL, - "Unregistering with invalid input parameter\n"); - return -EINVAL; - } - - rte_spinlock_lock(&intr_lock); - - /* check if the insterrupt source for the fd is existent */ - TAILQ_FOREACH(src, &intr_sources, next) - if (src->intr_handle.fd == intr_handle->fd) - break; - - /* No interrupt source registered for the fd */ - if (src == NULL) { - ret = -ENOENT; - - /* interrupt source has some active callbacks right now. */ - } else if (src->active != 0) { - ret = -EAGAIN; - - /* ok to remove. */ - } else { - ret = 0; - - /*walk through the callbacks and remove all that match. */ - for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { - - next = TAILQ_NEXT(cb, next); - - if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || - cb->cb_arg == cb_arg)) { - TAILQ_REMOVE(&src->callbacks, cb, next); - rte_free(cb); - ret++; - } - } - - /* all callbacks for that source are removed. */ - if (TAILQ_EMPTY(&src->callbacks)) { - TAILQ_REMOVE(&intr_sources, src, next); - rte_free(src); - } - } - - rte_spinlock_unlock(&intr_lock); - - /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ - if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) { - ret = -EPIPE; - } - - return ret; -} - -int -rte_intr_enable(struct rte_intr_handle *intr_handle) -{ - if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) - return -1; - - switch (intr_handle->type){ - /* write to the uio fd to enable the interrupt */ - case RTE_INTR_HANDLE_UIO: - if (uio_intr_enable(intr_handle)) - return -1; - break; - case RTE_INTR_HANDLE_UIO_INTX: - if (uio_intx_intr_enable(intr_handle)) - return -1; - break; - /* not used at this moment */ - case RTE_INTR_HANDLE_ALARM: - return -1; -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - if (vfio_enable_msix(intr_handle)) - return -1; - break; - case RTE_INTR_HANDLE_VFIO_MSI: - if (vfio_enable_msi(intr_handle)) - return -1; - break; - case RTE_INTR_HANDLE_VFIO_LEGACY: - if (vfio_enable_intx(intr_handle)) - return -1; - break; -#endif - /* unknown handle type */ - default: - RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); - return -1; - } - - return 0; -} - -int -rte_intr_disable(struct rte_intr_handle *intr_handle) -{ - if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) - return -1; - - switch (intr_handle->type){ - /* write to the uio fd to disable the interrupt */ - case RTE_INTR_HANDLE_UIO: - if (uio_intr_disable(intr_handle)) - return -1; - break; - case RTE_INTR_HANDLE_UIO_INTX: - if (uio_intx_intr_disable(intr_handle)) - return -1; - break; - /* not used at this moment */ - case RTE_INTR_HANDLE_ALARM: - return -1; -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - if (vfio_disable_msix(intr_handle)) - return -1; - break; - case RTE_INTR_HANDLE_VFIO_MSI: - if (vfio_disable_msi(intr_handle)) - return -1; - break; - case RTE_INTR_HANDLE_VFIO_LEGACY: - if (vfio_disable_intx(intr_handle)) - return -1; - break; -#endif - /* unknown handle type */ - default: - RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); - return -1; - } - - return 0; -} - -static int -eal_intr_process_interrupts(struct epoll_event *events, int nfds) -{ - int n, bytes_read; - struct rte_intr_source *src; - struct rte_intr_callback *cb; - union rte_intr_read_buffer buf; - struct rte_intr_callback active_cb; - - for (n = 0; n < nfds; n++) { - - /** - * if the pipe fd is ready to read, return out to - * rebuild the wait list. - */ - if (events[n].data.fd == intr_pipe.readfd){ - int r = read(intr_pipe.readfd, buf.charbuf, - sizeof(buf.charbuf)); - RTE_SET_USED(r); - return -1; - } - rte_spinlock_lock(&intr_lock); - TAILQ_FOREACH(src, &intr_sources, next) - if (src->intr_handle.fd == - events[n].data.fd) - break; - if (src == NULL){ - rte_spinlock_unlock(&intr_lock); - continue; - } - - /* mark this interrupt source as active and release the lock. */ - src->active = 1; - rte_spinlock_unlock(&intr_lock); - - /* set the length to be read dor different handle type */ - switch (src->intr_handle.type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: - bytes_read = sizeof(buf.uio_intr_count); - break; - case RTE_INTR_HANDLE_ALARM: - bytes_read = sizeof(buf.timerfd_num); - break; -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: - bytes_read = sizeof(buf.vfio_intr_count); - break; -#endif - case RTE_INTR_HANDLE_EXT: - default: - bytes_read = 1; - break; - } - - if (src->intr_handle.type != RTE_INTR_HANDLE_EXT) { - /** - * read out to clear the ready-to-be-read flag - * for epoll_wait. - */ - bytes_read = read(events[n].data.fd, &buf, bytes_read); - if (bytes_read < 0) { - if (errno == EINTR || errno == EWOULDBLOCK) - continue; - - // TREX_PATCH. Because of issues with e1000, we want this message to - // have lower priority only if running on e1000 card - if (eal_err_read_from_file_is_error) { - RTE_LOG(ERR, EAL, "Error reading from file " - "descriptor %d: %s\n", - events[n].data.fd, - strerror(errno)); - } else { - RTE_LOG(INFO, EAL, "Error reading from file " - "descriptor %d: %s\n", - events[n].data.fd, - strerror(errno)); - } - } else if (bytes_read == 0) - RTE_LOG(ERR, EAL, "Read nothing from file " - "descriptor %d\n", events[n].data.fd); - } - - /* grab a lock, again to call callbacks and update status. */ - rte_spinlock_lock(&intr_lock); - - if (bytes_read > 0) { - - /* Finally, call all callbacks. */ - TAILQ_FOREACH(cb, &src->callbacks, next) { - - /* make a copy and unlock. */ - active_cb = *cb; - rte_spinlock_unlock(&intr_lock); - - /* call the actual callback */ - active_cb.cb_fn(&src->intr_handle, - active_cb.cb_arg); - - /*get the lock back. */ - rte_spinlock_lock(&intr_lock); - } - } - - /* we done with that interrupt source, release it. */ - src->active = 0; - rte_spinlock_unlock(&intr_lock); - } - - return 0; -} - -/** - * It handles all the interrupts. - * - * @param pfd - * epoll file descriptor. - * @param totalfds - * The number of file descriptors added in epoll. - * - * @return - * void - */ -static void -eal_intr_handle_interrupts(int pfd, unsigned totalfds) -{ - struct epoll_event events[totalfds]; - int nfds = 0; - - for(;;) { - nfds = epoll_wait(pfd, events, totalfds, - EAL_INTR_EPOLL_WAIT_FOREVER); - /* epoll_wait fail */ - if (nfds < 0) { - if (errno == EINTR) - continue; - RTE_LOG(ERR, EAL, - "epoll_wait returns with fail\n"); - return; - } - /* epoll_wait timeout, will never happens here */ - else if (nfds == 0) - continue; - /* epoll_wait has at least one fd ready to read */ - if (eal_intr_process_interrupts(events, nfds) < 0) - return; - } -} - -/** - * It builds/rebuilds up the epoll file descriptor with all the - * file descriptors being waited on. Then handles the interrupts. - * - * @param arg - * pointer. (unused) - * - * @return - * never return; - */ -static __attribute__((noreturn)) void * -eal_intr_thread_main(__rte_unused void *arg) -{ - struct epoll_event ev; - - /* host thread, never break out */ - for (;;) { - /* build up the epoll fd with all descriptors we are to - * wait on then pass it to the handle_interrupts function - */ - static struct epoll_event pipe_event = { - .events = EPOLLIN | EPOLLPRI, - }; - struct rte_intr_source *src; - unsigned numfds = 0; - - /* create epoll fd */ - int pfd = epoll_create(1); - if (pfd < 0) - rte_panic("Cannot create epoll instance\n"); - - pipe_event.data.fd = intr_pipe.readfd; - /** - * add pipe fd into wait list, this pipe is used to - * rebuild the wait list. - */ - if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, - &pipe_event) < 0) { - rte_panic("Error adding fd to %d epoll_ctl, %s\n", - intr_pipe.readfd, strerror(errno)); - } - numfds++; - - rte_spinlock_lock(&intr_lock); - - TAILQ_FOREACH(src, &intr_sources, next) { - if (src->callbacks.tqh_first == NULL) - continue; /* skip those with no callbacks */ - ev.events = EPOLLIN | EPOLLPRI; - ev.data.fd = src->intr_handle.fd; - - /** - * add all the uio device file descriptor - * into wait list. - */ - if (epoll_ctl(pfd, EPOLL_CTL_ADD, - src->intr_handle.fd, &ev) < 0){ - rte_panic("Error adding fd %d epoll_ctl, %s\n", - src->intr_handle.fd, strerror(errno)); - } - else - numfds++; - } - rte_spinlock_unlock(&intr_lock); - /* serve the interrupt */ - eal_intr_handle_interrupts(pfd, numfds); - - /** - * when we return, we need to rebuild the - * list of fds to monitor. - */ - close(pfd); - } -} - -int -rte_eal_intr_init(void) -{ - int ret = 0, ret_1 = 0; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; - - /* init the global interrupt source head */ - TAILQ_INIT(&intr_sources); - - /** - * create a pipe which will be waited by epoll and notified to - * rebuild the wait list of epoll. - */ - if (pipe(intr_pipe.pipefd) < 0) - return -1; - - /* create the host thread to wait/handle the interrupt */ - ret = pthread_create(&intr_thread, NULL, - eal_intr_thread_main, NULL); - if (ret != 0) { - RTE_LOG(ERR, EAL, - "Failed to create thread for interrupt handling\n"); - } else { - /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, - "eal-intr-thread"); - ret_1 = rte_thread_setname(intr_thread, thread_name); - if (ret_1 != 0) - RTE_LOG(ERR, EAL, - "Failed to set thread name for interrupt handling\n"); - } - - return -ret; -} - -static void -eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) -{ - union rte_intr_read_buffer buf; - int bytes_read = 1; - int nbytes; - - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: - bytes_read = sizeof(buf.uio_intr_count); - break; -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: - bytes_read = sizeof(buf.vfio_intr_count); - break; -#endif - default: - bytes_read = 1; - RTE_LOG(INFO, EAL, "unexpected intr type\n"); - break; - } - - /** - * read out to clear the ready-to-be-read flag - * for epoll_wait. - */ - do { - nbytes = read(fd, &buf, bytes_read); - if (nbytes < 0) { - if (errno == EINTR || errno == EWOULDBLOCK || - errno == EAGAIN) - continue; - RTE_LOG(ERR, EAL, - "Error reading from fd %d: %s\n", - fd, strerror(errno)); - } else if (nbytes == 0) - RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd); - return; - } while (1); -} - -static int -eal_epoll_process_event(struct epoll_event *evs, unsigned int n, - struct rte_epoll_event *events) -{ - unsigned int i, count = 0; - struct rte_epoll_event *rev; - - for (i = 0; i < n; i++) { - rev = evs[i].data.ptr; - if (!rev || !rte_atomic32_cmpset(&rev->status, RTE_EPOLL_VALID, - RTE_EPOLL_EXEC)) - continue; - - events[count].status = RTE_EPOLL_VALID; - events[count].fd = rev->fd; - events[count].epfd = rev->epfd; - events[count].epdata.event = rev->epdata.event; - events[count].epdata.data = rev->epdata.data; - if (rev->epdata.cb_fun) - rev->epdata.cb_fun(rev->fd, - rev->epdata.cb_arg); - - rte_compiler_barrier(); - rev->status = RTE_EPOLL_VALID; - count++; - } - return count; -} - -static inline int -eal_init_tls_epfd(void) -{ - int pfd = epoll_create(255); - - if (pfd < 0) { - RTE_LOG(ERR, EAL, - "Cannot create epoll instance\n"); - return -1; - } - return pfd; -} - -int -rte_intr_tls_epfd(void) -{ - if (RTE_PER_LCORE(_epfd) == -1) - RTE_PER_LCORE(_epfd) = eal_init_tls_epfd(); - - return RTE_PER_LCORE(_epfd); -} - -int -rte_epoll_wait(int epfd, struct rte_epoll_event *events, - int maxevents, int timeout) -{ - struct epoll_event evs[maxevents]; - int rc; - - if (!events) { - RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); - return -1; - } - - /* using per thread epoll fd */ - if (epfd == RTE_EPOLL_PER_THREAD) - epfd = rte_intr_tls_epfd(); - - while (1) { - rc = epoll_wait(epfd, evs, maxevents, timeout); - if (likely(rc > 0)) { - /* epoll_wait has at least one fd ready to read */ - rc = eal_epoll_process_event(evs, rc, events); - break; - } else if (rc < 0) { - if (errno == EINTR) - continue; - /* epoll_wait fail */ - RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n", - strerror(errno)); - rc = -1; - break; - } else { - /* rc == 0, epoll_wait timed out */ - break; - } - } - - return rc; -} - -static inline void -eal_epoll_data_safe_free(struct rte_epoll_event *ev) -{ - while (!rte_atomic32_cmpset(&ev->status, RTE_EPOLL_VALID, - RTE_EPOLL_INVALID)) - while (ev->status != RTE_EPOLL_VALID) - rte_pause(); - memset(&ev->epdata, 0, sizeof(ev->epdata)); - ev->fd = -1; - ev->epfd = -1; -} - -int -rte_epoll_ctl(int epfd, int op, int fd, - struct rte_epoll_event *event) -{ - struct epoll_event ev; - - if (!event) { - RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); - return -1; - } - - /* using per thread epoll fd */ - if (epfd == RTE_EPOLL_PER_THREAD) - epfd = rte_intr_tls_epfd(); - - if (op == EPOLL_CTL_ADD) { - event->status = RTE_EPOLL_VALID; - event->fd = fd; /* ignore fd in event */ - event->epfd = epfd; - ev.data.ptr = (void *)event; - } - - ev.events = event->epdata.event; - if (epoll_ctl(epfd, op, fd, &ev) < 0) { - RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n", - op, fd, strerror(errno)); - if (op == EPOLL_CTL_ADD) - /* rollback status when CTL_ADD fail */ - event->status = RTE_EPOLL_INVALID; - return -1; - } - - if (op == EPOLL_CTL_DEL && event->status != RTE_EPOLL_INVALID) - eal_epoll_data_safe_free(event); - - return 0; -} - -int -rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, - int op, unsigned int vec, void *data) -{ - struct rte_epoll_event *rev; - struct rte_epoll_data *epdata; - int epfd_op; - unsigned int efd_idx; - int rc = 0; - - efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ? - (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec; - - if (!intr_handle || intr_handle->nb_efd == 0 || - efd_idx >= intr_handle->nb_efd) { - RTE_LOG(ERR, EAL, "Wrong intr vector number.\n"); - return -EPERM; - } - - switch (op) { - case RTE_INTR_EVENT_ADD: - epfd_op = EPOLL_CTL_ADD; - rev = &intr_handle->elist[efd_idx]; - if (rev->status != RTE_EPOLL_INVALID) { - RTE_LOG(INFO, EAL, "Event already been added.\n"); - return -EEXIST; - } - - /* attach to intr vector fd */ - epdata = &rev->epdata; - epdata->event = EPOLLIN | EPOLLPRI | EPOLLET; - epdata->data = data; - epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr; - epdata->cb_arg = (void *)intr_handle; - rc = rte_epoll_ctl(epfd, epfd_op, - intr_handle->efds[efd_idx], rev); - if (!rc) - RTE_LOG(DEBUG, EAL, - "efd %d associated with vec %d added on epfd %d" - "\n", rev->fd, vec, epfd); - else - rc = -EPERM; - break; - case RTE_INTR_EVENT_DEL: - epfd_op = EPOLL_CTL_DEL; - rev = &intr_handle->elist[efd_idx]; - if (rev->status == RTE_EPOLL_INVALID) { - RTE_LOG(INFO, EAL, "Event does not exist.\n"); - return -EPERM; - } - - rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev); - if (rc) - rc = -EPERM; - break; - default: - RTE_LOG(ERR, EAL, "event op type mismatch\n"); - rc = -EPERM; - } - - return rc; -} - -int -rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) -{ - uint32_t i; - int fd; - uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); - - assert(nb_efd != 0); - - if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) { - for (i = 0; i < n; i++) { - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, - "can't setup eventfd, error %i (%s)\n", - errno, strerror(errno)); - return -1; - } - intr_handle->efds[i] = fd; - } - intr_handle->nb_efd = n; - intr_handle->max_intr = NB_OTHER_INTR + n; - } else { - intr_handle->efds[0] = intr_handle->fd; - intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); - intr_handle->max_intr = NB_OTHER_INTR; - } - - return 0; -} - -void -rte_intr_efd_disable(struct rte_intr_handle *intr_handle) -{ - uint32_t i; - struct rte_epoll_event *rev; - - for (i = 0; i < intr_handle->nb_efd; i++) { - rev = &intr_handle->elist[i]; - if (rev->status == RTE_EPOLL_INVALID) - continue; - if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { - /* force free if the entry valid */ - eal_epoll_data_safe_free(rev); - rev->status = RTE_EPOLL_INVALID; - } - } - - if (intr_handle->max_intr > intr_handle->nb_efd) { - for (i = 0; i < intr_handle->nb_efd; i++) - close(intr_handle->efds[i]); - } - intr_handle->nb_efd = 0; - intr_handle->max_intr = 0; -} - -int -rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) -{ - return !(!intr_handle->nb_efd); -} - -int -rte_intr_allow_others(struct rte_intr_handle *intr_handle) -{ - if (!rte_intr_dp_is_en(intr_handle)) - return 1; - else - return !!(intr_handle->max_intr - intr_handle->nb_efd); -} - -int -rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) -{ - if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) - return 1; - - return 0; -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_ivshmem.c deleted file mode 100644 index 589019b1..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_ivshmem.c +++ /dev/null @@ -1,958 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "eal_internal_cfg.h" -#include "eal_private.h" - -#define PCI_VENDOR_ID_IVSHMEM 0x1Af4 -#define PCI_DEVICE_ID_IVSHMEM 0x1110 - -#define IVSHMEM_MAGIC 0x0BADC0DE - -#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2" -#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config" - -#define PHYS 0x1 -#define VIRT 0x2 -#define IOREMAP 0x4 -#define FULL (PHYS|VIRT|IOREMAP) - -#define METADATA_SIZE_ALIGNED \ - (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) - -#define CONTAINS(x,y)\ - (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len)) - -#define DIM(x) (sizeof(x)/sizeof(x[0])) - -struct ivshmem_pci_device { - char path[PATH_MAX]; - phys_addr_t ioremap_addr; -}; - -/* data type to store in config */ -struct ivshmem_segment { - struct rte_ivshmem_metadata_entry entry; - uint64_t align; - char path[PATH_MAX]; -}; -struct ivshmem_shared_config { - struct ivshmem_segment segment[RTE_MAX_MEMSEG]; - uint32_t segment_idx; - struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS]; - uint32_t pci_devs_idx; -}; -static struct ivshmem_shared_config * ivshmem_config; -static int memseg_idx; -static int pagesz; - -/* Tailq heads to add rings to */ -TAILQ_HEAD(rte_ring_list, rte_tailq_entry); - -/* - * Utility functions - */ - -static int -is_ivshmem_device(struct rte_pci_device * dev) -{ - return (dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM - && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM); -} - -static void * -map_metadata(int fd, uint64_t len) -{ - size_t metadata_len = sizeof(struct rte_ivshmem_metadata); - size_t aligned_len = METADATA_SIZE_ALIGNED; - - return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, len - aligned_len); -} - -static void -unmap_metadata(void * ptr) -{ - munmap(ptr, sizeof(struct rte_ivshmem_metadata)); -} - -static int -has_ivshmem_metadata(int fd, uint64_t len) -{ - struct rte_ivshmem_metadata metadata; - void * ptr; - - ptr = map_metadata(fd, len); - - if (ptr == MAP_FAILED) - return -1; - - metadata = *(struct rte_ivshmem_metadata*) (ptr); - - unmap_metadata(ptr); - - return metadata.magic_number == IVSHMEM_MAGIC; -} - -static void -remove_segment(struct ivshmem_segment * ms, int len, int idx) -{ - int i; - - for (i = idx; i < len - 1; i++) - memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment)); - memset(&ms[len-1], 0, sizeof(struct ivshmem_segment)); -} - -static int -overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2) -{ - uint64_t start1, end1, start2, end2; - uint64_t p_start1, p_end1, p_start2, p_end2; - uint64_t i_start1, i_end1, i_start2, i_end2; - int result = 0; - - /* gather virtual addresses */ - start1 = mz1->addr_64; - end1 = mz1->addr_64 + mz1->len; - start2 = mz2->addr_64; - end2 = mz2->addr_64 + mz2->len; - - /* gather physical addresses */ - p_start1 = mz1->phys_addr; - p_end1 = mz1->phys_addr + mz1->len; - p_start2 = mz2->phys_addr; - p_end2 = mz2->phys_addr + mz2->len; - - /* gather ioremap addresses */ - i_start1 = mz1->ioremap_addr; - i_end1 = mz1->ioremap_addr + mz1->len; - i_start2 = mz2->ioremap_addr; - i_end2 = mz2->ioremap_addr + mz2->len; - - /* check for overlap in virtual addresses */ - if (start1 >= start2 && start1 < end2) - result |= VIRT; - if (start2 >= start1 && start2 < end1) - result |= VIRT; - - /* check for overlap in physical addresses */ - if (p_start1 >= p_start2 && p_start1 < p_end2) - result |= PHYS; - if (p_start2 >= p_start1 && p_start2 < p_end1) - result |= PHYS; - - /* check for overlap in ioremap addresses */ - if (i_start1 >= i_start2 && i_start1 < i_end2) - result |= IOREMAP; - if (i_start2 >= i_start1 && i_start2 < i_end1) - result |= IOREMAP; - - return result; -} - -static int -adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2) -{ - uint64_t start1, end1, start2, end2; - uint64_t p_start1, p_end1, p_start2, p_end2; - uint64_t i_start1, i_end1, i_start2, i_end2; - int result = 0; - - /* gather virtual addresses */ - start1 = mz1->addr_64; - end1 = mz1->addr_64 + mz1->len; - start2 = mz2->addr_64; - end2 = mz2->addr_64 + mz2->len; - - /* gather physical addresses */ - p_start1 = mz1->phys_addr; - p_end1 = mz1->phys_addr + mz1->len; - p_start2 = mz2->phys_addr; - p_end2 = mz2->phys_addr + mz2->len; - - /* gather ioremap addresses */ - i_start1 = mz1->ioremap_addr; - i_end1 = mz1->ioremap_addr + mz1->len; - i_start2 = mz2->ioremap_addr; - i_end2 = mz2->ioremap_addr + mz2->len; - - /* check if segments are virtually adjacent */ - if (start1 == end2) - result |= VIRT; - if (start2 == end1) - result |= VIRT; - - /* check if segments are physically adjacent */ - if (p_start1 == p_end2) - result |= PHYS; - if (p_start2 == p_end1) - result |= PHYS; - - /* check if segments are ioremap-adjacent */ - if (i_start1 == i_end2) - result |= IOREMAP; - if (i_start2 == i_end1) - result |= IOREMAP; - - return result; -} - -static int -has_adjacent_segments(struct ivshmem_segment * ms, int len) -{ - int i, j, a; - - for (i = 0; i < len; i++) - for (j = i + 1; j < len; j++) { - a = adjacent(&ms[i].entry.mz, &ms[j].entry.mz); - - /* check if segments are adjacent virtually and/or physically but - * not ioremap (since that would indicate that they are from - * different PCI devices and thus don't need to be concatenated. - */ - if ((a & (VIRT|PHYS)) > 0 && (a & IOREMAP) == 0) - return 1; - } - return 0; -} - -static int -has_overlapping_segments(struct ivshmem_segment * ms, int len) -{ - int i, j; - - for (i = 0; i < len; i++) - for (j = i + 1; j < len; j++) - if (overlap(&ms[i].entry.mz, &ms[j].entry.mz)) - return 1; - return 0; -} - -static int -seg_compare(const void * a, const void * b) -{ - const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a; - const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b; - - /* move unallocated zones to the end */ - if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL) - return 0; - if (s1->entry.mz.addr == 0) - return 1; - if (s2->entry.mz.addr == 0) - return -1; - - return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr; -} - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG -static void -entry_dump(struct rte_ivshmem_metadata_entry *e) -{ - RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr, - RTE_PTR_ADD(e->mz.addr, e->mz.len)); - RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n", - e->mz.phys_addr, - e->mz.phys_addr + e->mz.len); - RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n", - e->mz.ioremap_addr, - e->mz.ioremap_addr + e->mz.len); - RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len); - RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset); -} -#endif - - - -/* - * Actual useful code - */ - -/* read through metadata mapped from the IVSHMEM device */ -static int -read_metadata(char * path, int path_len, int fd, uint64_t flen) -{ - struct rte_ivshmem_metadata metadata; - struct rte_ivshmem_metadata_entry * entry; - int idx, i; - void * ptr; - - ptr = map_metadata(fd, flen); - - if (ptr == MAP_FAILED) - return -1; - - metadata = *(struct rte_ivshmem_metadata*) (ptr); - - unmap_metadata(ptr); - - RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name); - - idx = ivshmem_config->segment_idx; - - for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES && - idx <= RTE_MAX_MEMSEG; i++) { - - if (idx == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Not enough memory segments!\n"); - return -1; - } - - entry = &metadata.entry[i]; - - /* stop on uninitialized memzone */ - if (entry->mz.len == 0) - break; - - /* copy metadata entry */ - memcpy(&ivshmem_config->segment[idx].entry, entry, - sizeof(struct rte_ivshmem_metadata_entry)); - - /* copy path */ - snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path); - - idx++; - } - ivshmem_config->segment_idx = idx; - - return 0; -} - -/* check through each segment and look for adjacent or overlapping ones. */ -static int -cleanup_segments(struct ivshmem_segment * ms, int tbl_len) -{ - struct ivshmem_segment * s, * tmp; - int i, j, concat, seg_adjacent, seg_overlapping; - uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2; - - qsort(ms, tbl_len, sizeof(struct ivshmem_segment), - seg_compare); - - while (has_overlapping_segments(ms, tbl_len) || - has_adjacent_segments(ms, tbl_len)) { - - for (i = 0; i < tbl_len; i++) { - s = &ms[i]; - - concat = 0; - - for (j = i + 1; j < tbl_len; j++) { - tmp = &ms[j]; - - /* check if this segment is overlapping with existing segment, - * or is adjacent to existing segment */ - seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz); - seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz); - - /* check if segments fully overlap or are fully adjacent */ - if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) { - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Concatenating segments\n"); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); - entry_dump(&s->entry); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); - entry_dump(&tmp->entry); -#endif - - start1 = s->entry.mz.addr_64; - start2 = tmp->entry.mz.addr_64; - p_start1 = s->entry.mz.phys_addr; - p_start2 = tmp->entry.mz.phys_addr; - i_start1 = s->entry.mz.ioremap_addr; - i_start2 = tmp->entry.mz.ioremap_addr; - end1 = s->entry.mz.addr_64 + s->entry.mz.len; - end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len; - - /* settle for minimum start address and maximum length */ - s->entry.mz.addr_64 = RTE_MIN(start1, start2); - s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2); - s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2); - s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset); - s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64; - concat = 1; - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Resulting segment:\n"); - entry_dump(&s->entry); - -#endif - } - /* if segments not fully overlap, we have an error condition. - * adjacent segments can coexist. - */ - else if (seg_overlapping > 0) { - RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j); -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); - entry_dump(&s->entry); - RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); - entry_dump(&tmp->entry); -#endif - return -1; - } - if (concat) - break; - } - /* if we concatenated, remove segment at j */ - if (concat) { - remove_segment(ms, tbl_len, j); - tbl_len--; - break; - } - } - } - - return tbl_len; -} - -static int -create_shared_config(void) -{ - char path[PATH_MAX]; - int fd; - - /* build ivshmem config file path */ - snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, - internal_config.hugefile_prefix); - - fd = open(path, O_CREAT | O_RDWR, 0600); - - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno)); - return -1; - } - - /* try ex-locking first - if the file is locked, we have a problem */ - if (flock(fd, LOCK_EX | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno)); - close(fd); - return -1; - } - - if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) { - RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno)); - return -1; - } - - ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), - PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - if (ivshmem_config == MAP_FAILED) - return -1; - - memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config)); - - /* change the exclusive lock we got earlier to a shared lock */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); - return -1; - } - - close(fd); - - return 0; -} - -/* open shared config file and, if present, map the config. - * having no config file is not an error condition, as we later check if - * ivshmem_config is NULL (if it is, that means nothing was mapped). */ -static int -open_shared_config(void) -{ - char path[PATH_MAX]; - int fd; - - /* build ivshmem config file path */ - snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, - internal_config.hugefile_prefix); - - fd = open(path, O_RDONLY); - - /* if the file doesn't exist, just return success */ - if (fd < 0 && errno == ENOENT) - return 0; - /* else we have an error condition */ - else if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s: %s\n", - path, strerror(errno)); - return -1; - } - - /* try ex-locking first - if the lock *does* succeed, this means it's a - * stray config file, so it should be deleted. - */ - if (flock(fd, LOCK_EX | LOCK_NB) != -1) { - - /* if we can't remove the file, something is wrong */ - if (unlink(path) < 0) { - RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path, - strerror(errno)); - return -1; - } - - /* release the lock */ - flock(fd, LOCK_UN); - close(fd); - - /* return success as having a stray config file is equivalent to not - * having config file at all. - */ - return 0; - } - - ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), - PROT_READ, MAP_SHARED, fd, 0); - - if (ivshmem_config == MAP_FAILED) - return -1; - - /* place a shared lock on config file */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); - return -1; - } - - close(fd); - - return 0; -} - -/* - * This function does the following: - * - * 1) Builds a table of ivshmem_segments with proper offset alignment - * 2) Cleans up that table so that we don't have any overlapping or adjacent - * memory segments - * 3) Creates memsegs from this table and maps them into memory. - */ -static inline int -map_all_segments(void) -{ - struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG]; - struct ivshmem_pci_device * pci_dev; - struct rte_mem_config * mcfg; - struct ivshmem_segment * seg; - int fd, fd_zero; - unsigned i, j; - struct rte_memzone mz; - struct rte_memseg ms; - void * base_addr; - uint64_t align, len; - phys_addr_t ioremap_addr; - - ioremap_addr = 0; - - memset(ms_tbl, 0, sizeof(ms_tbl)); - memset(&mz, 0, sizeof(struct rte_memzone)); - memset(&ms, 0, sizeof(struct rte_memseg)); - - /* first, build a table of memsegs to map, to avoid failed mmaps due to - * overlaps - */ - for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) { - if (i == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Too many segments requested!\n"); - return -1; - } - - seg = &ivshmem_config->segment[i]; - - /* copy segment to table */ - memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment)); - - /* find ioremap addr */ - for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) { - pci_dev = &ivshmem_config->pci_devs[j]; - if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) { - ioremap_addr = pci_dev->ioremap_addr; - break; - } - } - if (ioremap_addr == 0) { - RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n"); - return -1; - } - - /* work out alignments */ - align = seg->entry.mz.addr_64 - - RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000); - len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000); - - /* save original alignments */ - ms_tbl[i].align = align; - - /* create a memory zone */ - mz.addr_64 = seg->entry.mz.addr_64 - align; - mz.len = len; - mz.hugepage_sz = seg->entry.mz.hugepage_sz; - mz.phys_addr = seg->entry.mz.phys_addr - align; - - /* find true physical address */ - mz.ioremap_addr = ioremap_addr + seg->entry.offset - align; - - ms_tbl[i].entry.offset = seg->entry.offset - align; - - memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone)); - } - - /* clean up the segments */ - memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx); - - if (memseg_idx < 0) - return -1; - - mcfg = rte_eal_get_configuration()->mem_config; - - fd_zero = open("/dev/zero", O_RDWR); - - if (fd_zero < 0) { - RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno)); - return -1; - } - - /* create memsegs and put them into DPDK memory */ - for (i = 0; i < (unsigned) memseg_idx; i++) { - - seg = &ms_tbl[i]; - - ms.addr_64 = seg->entry.mz.addr_64; - ms.hugepage_sz = seg->entry.mz.hugepage_sz; - ms.len = seg->entry.mz.len; - ms.nchannel = rte_memory_get_nchannel(); - ms.nrank = rte_memory_get_nrank(); - ms.phys_addr = seg->entry.mz.phys_addr; - ms.ioremap_addr = seg->entry.mz.ioremap_addr; - ms.socket_id = seg->entry.mz.socket_id; - - base_addr = mmap(ms.addr, ms.len, - PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0); - - if (base_addr == MAP_FAILED || base_addr != ms.addr) { - RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n"); - return -1; - } - - fd = open(seg->path, O_RDWR); - - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path, - strerror(errno)); - return -1; - } - - munmap(ms.addr, ms.len); - - base_addr = mmap(ms.addr, ms.len, - PROT_READ | PROT_WRITE, MAP_SHARED, fd, - seg->entry.offset); - - - if (base_addr == MAP_FAILED || base_addr != ms.addr) { - RTE_LOG(ERR, EAL, "Cannot map segment into memory: " - "expected %p got %p (%s)\n", ms.addr, base_addr, - strerror(errno)); - return -1; - } - - RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at " - "offset 0x%" PRIx64 "\n", - ms.addr, ms.len, seg->entry.offset); - - /* put the pointers back into their real positions using original - * alignment */ - ms.addr_64 += seg->align; - ms.phys_addr += seg->align; - ms.ioremap_addr += seg->align; - ms.len -= seg->align; - - /* at this point, the rest of DPDK memory is not initialized, so we - * expect memsegs to be empty */ - memcpy(&mcfg->memseg[i], &ms, - sizeof(struct rte_memseg)); - - close(fd); - - RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n", - ms.len); - } - - return 0; -} - -/* this happens at a later stage, after general EAL memory initialization */ -int -rte_eal_ivshmem_obj_init(void) -{ - struct rte_ring_list* ring_list = NULL; - struct rte_mem_config * mcfg; - struct ivshmem_segment * seg; - struct rte_memzone * mz; - struct rte_ring * r; - struct rte_tailq_entry *te; - unsigned i, ms, idx; - uint64_t offset; - - /* secondary process would not need any object discovery - it'll all - * already be in shared config */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL) - return 0; - - /* check that we have an initialised ring tail queue */ - ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list); - if (ring_list == NULL) { - RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n"); - return -1; - } - - mcfg = rte_eal_get_configuration()->mem_config; - - /* create memzones */ - for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) { - - seg = &ivshmem_config->segment[i]; - - /* add memzone */ - if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) { - RTE_LOG(ERR, EAL, "No more memory zones available!\n"); - return -1; - } - - idx = mcfg->memzone_cnt; - - RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n", - seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len); - - memcpy(&mcfg->memzone[idx], &seg->entry.mz, - sizeof(struct rte_memzone)); - - /* find ioremap address */ - for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) { - if (ms == RTE_MAX_MEMSEG) { - RTE_LOG(ERR, EAL, "Physical address of segment not found!\n"); - return -1; - } - if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) { - offset = mcfg->memzone[idx].addr_64 - - mcfg->memseg[ms].addr_64; - mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr + - offset; - break; - } - } - - mcfg->memzone_cnt++; - } - - rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); - - /* find rings */ - for (i = 0; i < mcfg->memzone_cnt; i++) { - mz = &mcfg->memzone[i]; - - /* check if memzone has a ring prefix */ - if (strncmp(mz->name, RTE_RING_MZ_PREFIX, - sizeof(RTE_RING_MZ_PREFIX) - 1) != 0) - continue; - - r = (struct rte_ring*) (mz->addr_64); - - te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0); - if (te == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n"); - return -1; - } - - te->data = (void *) r; - - TAILQ_INSERT_TAIL(ring_list, te, next); - - RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr); - } - rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); - -#ifdef RTE_LIBRTE_IVSHMEM_DEBUG - rte_memzone_dump(stdout); - rte_ring_list_dump(stdout); -#endif - - return 0; -} - -/* initialize ivshmem structures */ -int rte_eal_ivshmem_init(void) -{ - struct rte_pci_device * dev; - struct rte_pci_resource * res; - int fd, ret; - char path[PATH_MAX]; - - /* initialize everything to 0 */ - memset(path, 0, sizeof(path)); - ivshmem_config = NULL; - - pagesz = getpagesize(); - - RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n"); - - if (rte_eal_process_type() == RTE_PROC_SECONDARY) { - - if (open_shared_config() < 0) { - RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n"); - return -1; - } - } - else { - - TAILQ_FOREACH(dev, &pci_device_list, next) { - - if (is_ivshmem_device(dev)) { - - /* IVSHMEM memory is always on BAR2 */ - res = &dev->mem_resource[2]; - - /* if we don't have a BAR2 */ - if (res->len == 0) - continue; - - /* construct pci device path */ - snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH, - dev->addr.domain, dev->addr.bus, dev->addr.devid, - dev->addr.function); - - /* try to find memseg */ - fd = open(path, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", path); - return -1; - } - - /* check if it's a DPDK IVSHMEM device */ - ret = has_ivshmem_metadata(fd, res->len); - - /* is DPDK device */ - if (ret == 1) { - - /* config file creation is deferred until the first - * DPDK device is found. then, it has to be created - * only once. */ - if (ivshmem_config == NULL && - create_shared_config() < 0) { - RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n"); - close(fd); - return -1; - } - - if (read_metadata(path, sizeof(path), fd, res->len) < 0) { - RTE_LOG(ERR, EAL, "Could not read metadata from" - " device %02x:%02x.%x!\n", dev->addr.bus, - dev->addr.devid, dev->addr.function); - close(fd); - return -1; - } - - if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) { - RTE_LOG(WARNING, EAL, - "IVSHMEM PCI device limit exceeded. Increase " - "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in " - "your config file.\n"); - break; - } - - RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n", - dev->addr.bus, dev->addr.devid, dev->addr.function); - - ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr; - snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path, - sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path), - "%s", path); - - ivshmem_config->pci_devs_idx++; - } - /* failed to read */ - else if (ret < 0) { - RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n", - strerror(errno)); - close(fd); - return -1; - } - /* not a DPDK device */ - else - RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n"); - - /* close the BAR fd */ - close(fd); - } - } - } - - /* ivshmem_config is not NULL only if config was created and/or mapped */ - if (ivshmem_config) { - if (map_all_segments() < 0) { - RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n"); - return -1; - } - } - else { - RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n"); - } - - return 0; -} - -#endif diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_lcore.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_lcore.c deleted file mode 100644 index de5b4260..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_lcore.c +++ /dev/null @@ -1,110 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "eal_private.h" -#include "eal_filesystem.h" -#include "eal_thread.h" - -#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u" -#define CORE_ID_FILE "topology/core_id" -#define NUMA_NODE_PATH "/sys/devices/system/node" - -/* Check if a cpu is present by the presence of the cpu information for it */ -int -eal_cpu_detected(unsigned lcore_id) -{ - char path[PATH_MAX]; - int len = snprintf(path, sizeof(path), SYS_CPU_DIR - "/"CORE_ID_FILE, lcore_id); - if (len <= 0 || (unsigned)len >= sizeof(path)) - return 0; - if (access(path, F_OK) != 0) - return 0; - - return 1; -} - -/* - * Get CPU socket id (NUMA node) for a logical core. - * - * This searches each nodeX directories in /sys for the symlink for the given - * lcore_id and returns the numa node where the lcore is found. If lcore is not - * found on any numa node, returns zero. - */ -unsigned -eal_cpu_socket_id(unsigned lcore_id) -{ - unsigned socket; - - for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { - char path[PATH_MAX]; - - snprintf(path, sizeof(path), "%s/node%u/cpu%u", NUMA_NODE_PATH, - socket, lcore_id); - if (access(path, F_OK) == 0) - return socket; - } - return 0; -} - -/* Get the cpu core id value from the /sys/.../cpuX core_id value */ -unsigned -eal_cpu_core_id(unsigned lcore_id) -{ - char path[PATH_MAX]; - unsigned long id; - - int len = snprintf(path, sizeof(path), SYS_CPU_DIR "/%s", lcore_id, CORE_ID_FILE); - if (len <= 0 || (unsigned)len >= sizeof(path)) - goto err; - if (eal_parse_sysfs_value(path, &id) != 0) - goto err; - return (unsigned)id; - -err: - RTE_LOG(ERR, EAL, "Error reading core id value from %s " - "for lcore %u - assuming core 0\n", SYS_CPU_DIR, lcore_id); - return 0; -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_log.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_log.c deleted file mode 100644 index 0b133c3e..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_log.c +++ /dev/null @@ -1,146 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "eal_private.h" - -/* - * default log function, used once mempool (hence log history) is - * available - */ -static ssize_t -console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) -{ - char copybuf[BUFSIZ + 1]; - ssize_t ret; - uint32_t loglevel; - - /* add this log in history */ - rte_log_add_in_history(buf, size); - - /* write on stdout */ - ret = fwrite(buf, 1, size, stdout); - fflush(stdout); - - /* truncate message if too big (should not happen) */ - if (size > BUFSIZ) - size = BUFSIZ; - - /* Syslog error levels are from 0 to 7, so subtract 1 to convert */ - loglevel = rte_log_cur_msg_loglevel() - 1; - memcpy(copybuf, buf, size); - copybuf[size] = '\0'; - - /* write on syslog too */ - syslog(loglevel, "%s", copybuf); - - return ret; -} - -static cookie_io_functions_t console_log_func = { - .write = console_log_write, -}; - -/* - * set the log to default function, called during eal init process, - * once memzones are available. - */ -int -rte_eal_log_init(const char *id, int facility) -{ - FILE *log_stream; - - log_stream = fopencookie(NULL, "w+", console_log_func); - if (log_stream == NULL) - return -1; - - openlog(id, LOG_NDELAY | LOG_PID, facility); - - if (rte_eal_common_log_init(log_stream) < 0) - return -1; - - return 0; -} - -/* early logs */ - -/* - * early log function, used during boot when mempool (hence log - * history) is not available - */ -static ssize_t -early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) -{ - ssize_t ret; - ret = fwrite(buf, size, 1, stdout); - fflush(stdout); - if (ret == 0) - return -1; - return ret; -} - -static cookie_io_functions_t early_log_func = { - .write = early_log_write, -}; -static FILE *early_log_stream; - -/* - * init the log library, called by rte_eal_init() to enable early - * logs - */ -int -rte_eal_log_early_init(void) -{ - early_log_stream = fopencookie(NULL, "w+", early_log_func); - if (early_log_stream == NULL) { - printf("Cannot configure early_log_stream\n"); - return -1; - } - rte_openlog_stream(early_log_stream); - return 0; -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_memory.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_memory.c deleted file mode 100644 index 846fd31f..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_memory.c +++ /dev/null @@ -1,1599 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -/* BSD LICENSE - * - * Copyright(c) 2013 6WIND. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#define _FILE_OFFSET_BITS 64 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "eal_private.h" -#include "eal_internal_cfg.h" -#include "eal_filesystem.h" -#include "eal_hugepages.h" - -#ifdef RTE_LIBRTE_XEN_DOM0 -int rte_xen_dom0_supported(void) -{ - return internal_config.xen_dom0_support; -} -#endif - -/** - * @file - * Huge page mapping under linux - * - * To reserve a big contiguous amount of memory, we use the hugepage - * feature of linux. For that, we need to have hugetlbfs mounted. This - * code will create many files in this directory (one per page) and - * map them in virtual memory. For each page, we will retrieve its - * physical address and remap it in order to have a virtual contiguous - * zone as well as a physical contiguous zone. - */ - -static uint64_t baseaddr_offset; - -static unsigned proc_pagemap_readable; - -#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" - -static void -test_proc_pagemap_readable(void) -{ - int fd = open("/proc/self/pagemap", O_RDONLY); - - if (fd < 0) { - RTE_LOG(ERR, EAL, - "Cannot open /proc/self/pagemap: %s. " - "virt2phys address translation will not work\n", - strerror(errno)); - return; - } - - /* Is readable */ - close(fd); - proc_pagemap_readable = 1; -} - -/* Lock page in physical memory and prevent from swapping. */ -int -rte_mem_lock_page(const void *virt) -{ - unsigned long virtual = (unsigned long)virt; - int page_size = getpagesize(); - unsigned long aligned = (virtual & ~ (page_size - 1)); - return mlock((void*)aligned, page_size); -} - -/* - * Get physical address of any mapped virtual address in the current process. - */ -phys_addr_t -rte_mem_virt2phy(const void *virtaddr) -{ - int fd; - uint64_t page, physaddr; - unsigned long virt_pfn; - int page_size; - off_t offset; - - /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ - if (!proc_pagemap_readable) - return RTE_BAD_PHYS_ADDR; - - /* standard page size */ - page_size = getpagesize(); - - fd = open("/proc/self/pagemap", O_RDONLY); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", - __func__, strerror(errno)); - return RTE_BAD_PHYS_ADDR; - } - - virt_pfn = (unsigned long)virtaddr / page_size; - offset = sizeof(uint64_t) * virt_pfn; - if (lseek(fd, offset, SEEK_SET) == (off_t) -1) { - RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", - __func__, strerror(errno)); - close(fd); - return RTE_BAD_PHYS_ADDR; - } - if (read(fd, &page, sizeof(uint64_t)) < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", - __func__, strerror(errno)); - close(fd); - return RTE_BAD_PHYS_ADDR; - } - - /* - * the pfn (page frame number) are bits 0-54 (see - * pagemap.txt in linux Documentation) - */ - physaddr = ((page & 0x7fffffffffffffULL) * page_size) - + ((unsigned long)virtaddr % page_size); - close(fd); - return physaddr; -} - -/* - * For each hugepage in hugepg_tbl, fill the physaddr value. We find - * it by browsing the /proc/self/pagemap special file. - */ -static int -find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) -{ - unsigned i; - phys_addr_t addr; - - for (i = 0; i < hpi->num_pages[0]; i++) { - addr = rte_mem_virt2phy(hugepg_tbl[i].orig_va); - if (addr == RTE_BAD_PHYS_ADDR) - return -1; - hugepg_tbl[i].physaddr = addr; - } - return 0; -} - -/* - * Check whether address-space layout randomization is enabled in - * the kernel. This is important for multi-process as it can prevent - * two processes mapping data to the same virtual address - * Returns: - * 0 - address space randomization disabled - * 1/2 - address space randomization enabled - * negative error code on error - */ -static int -aslr_enabled(void) -{ - char c; - int retval, fd = open(RANDOMIZE_VA_SPACE_FILE, O_RDONLY); - if (fd < 0) - return -errno; - retval = read(fd, &c, 1); - close(fd); - if (retval < 0) - return -errno; - if (retval == 0) - return -EIO; - switch (c) { - case '0' : return 0; - case '1' : return 1; - case '2' : return 2; - default: return -EINVAL; - } -} - -/* - * Try to mmap *size bytes in /dev/zero. If it is successful, return the - * pointer to the mmap'd area and keep *size unmodified. Else, retry - * with a smaller zone: decrease *size by hugepage_sz until it reaches - * 0. In this case, return NULL. Note: this function returns an address - * which is a multiple of hugepage size. - */ -static void * -get_virtual_area(size_t *size, size_t hugepage_sz) -{ - void *addr; - int fd; - long aligned_addr; - - if (internal_config.base_virtaddr != 0) { - addr = (void*) (uintptr_t) (internal_config.base_virtaddr + - baseaddr_offset); - } - else addr = NULL; - - RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); - - fd = open("/dev/zero", O_RDONLY); - if (fd < 0){ - RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); - return NULL; - } - do { - addr = mmap(addr, - (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0); - if (addr == MAP_FAILED) - *size -= hugepage_sz; - } while (addr == MAP_FAILED && *size > 0); - - if (addr == MAP_FAILED) { - close(fd); - RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n", - strerror(errno)); - return NULL; - } - - munmap(addr, (*size) + hugepage_sz); - close(fd); - - /* align addr to a huge page size boundary */ - aligned_addr = (long)addr; - aligned_addr += (hugepage_sz - 1); - aligned_addr &= (~(hugepage_sz - 1)); - addr = (void *)(aligned_addr); - - RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", - addr, *size); - - /* increment offset */ - baseaddr_offset += *size; - - return addr; -} - -/* - * Mmap all hugepages of hugepage table: it first open a file in - * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the - * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored - * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to - * map continguous physical blocks in contiguous virtual blocks. - */ -static int -map_all_hugepages(struct hugepage_file *hugepg_tbl, - struct hugepage_info *hpi, int orig) -{ - int fd; - unsigned i; - void *virtaddr; - void *vma_addr = NULL; - size_t vma_len = 0; - -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - RTE_SET_USED(vma_len); -#endif - - for (i = 0; i < hpi->num_pages[0]; i++) { - uint64_t hugepage_sz = hpi->hugepage_sz; - - if (orig) { - hugepg_tbl[i].file_id = i; - hugepg_tbl[i].size = hugepage_sz; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - eal_get_hugefile_temp_path(hugepg_tbl[i].filepath, - sizeof(hugepg_tbl[i].filepath), hpi->hugedir, - hugepg_tbl[i].file_id); -#else - eal_get_hugefile_path(hugepg_tbl[i].filepath, - sizeof(hugepg_tbl[i].filepath), hpi->hugedir, - hugepg_tbl[i].file_id); -#endif - hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; - } -#ifndef RTE_ARCH_64 - /* for 32-bit systems, don't remap 1G and 16G pages, just reuse - * original map address as final map address. - */ - else if ((hugepage_sz == RTE_PGSIZE_1G) - || (hugepage_sz == RTE_PGSIZE_16G)) { - hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; - hugepg_tbl[i].orig_va = NULL; - continue; - } -#endif - -#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS - else if (vma_len == 0) { - unsigned j, num_pages; - - /* reserve a virtual area for next contiguous - * physical block: count the number of - * contiguous physical pages. */ - for (j = i+1; j < hpi->num_pages[0] ; j++) { -#ifdef RTE_ARCH_PPC_64 - /* The physical addresses are sorted in - * descending order on PPC64 */ - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr - hugepage_sz) - break; -#else - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr + hugepage_sz) - break; -#endif - } - num_pages = j - i; - vma_len = num_pages * hugepage_sz; - - /* get the biggest virtual memory area up to - * vma_len. If it fails, vma_addr is NULL, so - * let the kernel provide the address. */ - vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); - if (vma_addr == NULL) - vma_len = hugepage_sz; - } -#endif - - /* try to create hugepage file */ - fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, - strerror(errno)); - return -1; - } - - virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - if (virtaddr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, - strerror(errno)); - close(fd); - return -1; - } - - if (orig) { - hugepg_tbl[i].orig_va = virtaddr; - memset(virtaddr, 0, hugepage_sz); - } - else { - hugepg_tbl[i].final_va = virtaddr; - } - - /* set shared flock on the file. */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", - __func__, strerror(errno)); - close(fd); - return -1; - } - - close(fd); - - vma_addr = (char *)vma_addr + hugepage_sz; - vma_len -= hugepage_sz; - } - return 0; -} - -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - -/* - * Remaps all hugepages into single file segments - */ -static int -remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) -{ - int fd; - unsigned i = 0, j, num_pages, page_idx = 0; - void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL; - size_t vma_len = 0; - size_t hugepage_sz = hpi->hugepage_sz; - size_t total_size, offset; - char filepath[MAX_HUGEPAGE_PATH]; - phys_addr_t physaddr; - int socket; - - while (i < hpi->num_pages[0]) { - -#ifndef RTE_ARCH_64 - /* for 32-bit systems, don't remap 1G pages and 16G pages, - * just reuse original map address as final map address. - */ - if ((hugepage_sz == RTE_PGSIZE_1G) - || (hugepage_sz == RTE_PGSIZE_16G)) { - hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; - hugepg_tbl[i].orig_va = NULL; - i++; - continue; - } -#endif - - /* reserve a virtual area for next contiguous - * physical block: count the number of - * contiguous physical pages. */ - for (j = i+1; j < hpi->num_pages[0] ; j++) { -#ifdef RTE_ARCH_PPC_64 - /* The physical addresses are sorted in descending - * order on PPC64 */ - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr - hugepage_sz) - break; -#else - if (hugepg_tbl[j].physaddr != - hugepg_tbl[j-1].physaddr + hugepage_sz) - break; -#endif - } - num_pages = j - i; - vma_len = num_pages * hugepage_sz; - - socket = hugepg_tbl[i].socket_id; - - /* get the biggest virtual memory area up to - * vma_len. If it fails, vma_addr is NULL, so - * let the kernel provide the address. */ - vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); - - /* If we can't find a big enough virtual area, work out how many pages - * we are going to get */ - if (vma_addr == NULL) - j = i + 1; - else if (vma_len != num_pages * hugepage_sz) { - num_pages = vma_len / hugepage_sz; - j = i + num_pages; - - } - - hugepg_tbl[page_idx].file_id = page_idx; - eal_get_hugefile_path(filepath, - sizeof(filepath), - hpi->hugedir, - hugepg_tbl[page_idx].file_id); - - /* try to create hugepage file */ - fd = open(filepath, O_CREAT | O_RDWR, 0755); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); - return -1; - } - - total_size = 0; - for (;i < j; i++) { - - /* unmap current segment */ - if (total_size > 0) - munmap(vma_addr, total_size); - - /* unmap original page */ - munmap(hugepg_tbl[i].orig_va, hugepage_sz); - unlink(hugepg_tbl[i].filepath); - - total_size += hugepage_sz; - - old_addr = vma_addr; - - /* map new, bigger segment */ - vma_addr = mmap(vma_addr, total_size, - PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - if (vma_addr == MAP_FAILED || vma_addr != old_addr) { - RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); - close(fd); - return -1; - } - - /* touch the page. this is needed because kernel postpones mapping - * creation until the first page fault. with this, we pin down - * the page and it is marked as used and gets into process' pagemap. - */ - for (offset = 0; offset < total_size; offset += hugepage_sz) - *((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset)); - } - - /* set shared flock on the file. */ - if (flock(fd, LOCK_SH | LOCK_NB) == -1) { - RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", - __func__, strerror(errno)); - close(fd); - return -1; - } - - snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", - filepath); - - physaddr = rte_mem_virt2phy(vma_addr); - - if (physaddr == RTE_BAD_PHYS_ADDR) - return -1; - - hugepg_tbl[page_idx].final_va = vma_addr; - - hugepg_tbl[page_idx].physaddr = physaddr; - - hugepg_tbl[page_idx].repeated = num_pages; - - hugepg_tbl[page_idx].socket_id = socket; - - close(fd); - - /* verify the memory segment - that is, check that every VA corresponds - * to the physical address we expect to see - */ - for (offset = 0; offset < vma_len; offset += hugepage_sz) { - uint64_t expected_physaddr; - - expected_physaddr = hugepg_tbl[page_idx].physaddr + offset; - page_addr = RTE_PTR_ADD(vma_addr, offset); - physaddr = rte_mem_virt2phy(page_addr); - - if (physaddr != expected_physaddr) { - RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr " - "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64 - " (expected 0x%" PRIx64 ")\n", - page_addr, offset, physaddr, expected_physaddr); - return -1; - } - } - - /* zero out the whole segment */ - memset(hugepg_tbl[page_idx].final_va, 0, total_size); - - page_idx++; - } - - /* zero out the rest */ - memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file)); - return page_idx; -} -#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */ - -/* Unmap all hugepages from original mapping */ -static int -unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) -{ - unsigned i; - for (i = 0; i < hpi->num_pages[0]; i++) { - if (hugepg_tbl[i].orig_va) { - munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz); - hugepg_tbl[i].orig_va = NULL; - } - } - return 0; -} -#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */ - -/* - * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge - * page. - */ -static int -find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) -{ - int socket_id; - char *end, *nodestr; - unsigned i, hp_count = 0; - uint64_t virt_addr; - char buf[BUFSIZ]; - char hugedir_str[PATH_MAX]; - FILE *f; - - f = fopen("/proc/self/numa_maps", "r"); - if (f == NULL) { - RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps," - " consider that all memory is in socket_id 0\n"); - return 0; - } - - snprintf(hugedir_str, sizeof(hugedir_str), - "%s/%s", hpi->hugedir, internal_config.hugefile_prefix); - - /* parse numa map */ - while (fgets(buf, sizeof(buf), f) != NULL) { - - /* ignore non huge page */ - if (strstr(buf, " huge ") == NULL && - strstr(buf, hugedir_str) == NULL) - continue; - - /* get zone addr */ - virt_addr = strtoull(buf, &end, 16); - if (virt_addr == 0 || end == buf) { - RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); - goto error; - } - - /* get node id (socket id) */ - nodestr = strstr(buf, " N"); - if (nodestr == NULL) { - RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); - goto error; - } - nodestr += 2; - end = strstr(nodestr, "="); - if (end == NULL) { - RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); - goto error; - } - end[0] = '\0'; - end = NULL; - - socket_id = strtoul(nodestr, &end, 0); - if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) { - RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); - goto error; - } - - /* if we find this page in our mappings, set socket_id */ - for (i = 0; i < hpi->num_pages[0]; i++) { - void *va = (void *)(unsigned long)virt_addr; - if (hugepg_tbl[i].orig_va == va) { - hugepg_tbl[i].socket_id = socket_id; - hp_count++; - } - } - } - - if (hp_count < hpi->num_pages[0]) - goto error; - - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} - -/* - * Sort the hugepg_tbl by physical address (lower addresses first on x86, - * higher address first on powerpc). We use a slow algorithm, but we won't - * have millions of pages, and this is only done at init time. - */ -static int -sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) -{ - unsigned i, j; - int compare_idx; - uint64_t compare_addr; - struct hugepage_file tmp; - - for (i = 0; i < hpi->num_pages[0]; i++) { - compare_addr = 0; - compare_idx = -1; - - /* - * browse all entries starting at 'i', and find the - * entry with the smallest addr - */ - for (j=i; j< hpi->num_pages[0]; j++) { - - if (compare_addr == 0 || -#ifdef RTE_ARCH_PPC_64 - hugepg_tbl[j].physaddr > compare_addr) { -#else - hugepg_tbl[j].physaddr < compare_addr) { -#endif - compare_addr = hugepg_tbl[j].physaddr; - compare_idx = j; - } - } - - /* should not happen */ - if (compare_idx == -1) { - RTE_LOG(ERR, EAL, "%s(): error in physaddr sorting\n", __func__); - return -1; - } - - /* swap the 2 entries in the table */ - memcpy(&tmp, &hugepg_tbl[compare_idx], - sizeof(struct hugepage_file)); - memcpy(&hugepg_tbl[compare_idx], &hugepg_tbl[i], - sizeof(struct hugepage_file)); - memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage_file)); - } - return 0; -} - -/* - * Uses mmap to create a shared memory area for storage of data - * Used in this file to store the hugepage file map on disk - */ -static void * -create_shared_memory(const char *filename, const size_t mem_size) -{ - void *retval; - int fd = open(filename, O_CREAT | O_RDWR, 0666); - if (fd < 0) - return NULL; - if (ftruncate(fd, mem_size) < 0) { - close(fd); - return NULL; - } - retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - close(fd); - return retval; -} - -/* - * this copies *active* hugepages from one hugepage table to another. - * destination is typically the shared memory. - */ -static int -copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size, - const struct hugepage_file * src, int src_size) -{ - int src_pos, dst_pos = 0; - - for (src_pos = 0; src_pos < src_size; src_pos++) { - if (src[src_pos].final_va != NULL) { - /* error on overflow attempt */ - if (dst_pos == dest_size) - return -1; - memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file)); - dst_pos++; - } - } - return 0; -} - -static int -unlink_hugepage_files(struct hugepage_file *hugepg_tbl, - unsigned num_hp_info) -{ - unsigned socket, size; - int page, nrpages = 0; - - /* get total number of hugepages */ - for (size = 0; size < num_hp_info; size++) - for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) - nrpages += - internal_config.hugepage_info[size].num_pages[socket]; - - for (page = 0; page < nrpages; page++) { - struct hugepage_file *hp = &hugepg_tbl[page]; - - if (hp->final_va != NULL && unlink(hp->filepath)) { - RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n", - __func__, hp->filepath, strerror(errno)); - } - } - return 0; -} - -/* - * unmaps hugepages that are not going to be used. since we originally allocate - * ALL hugepages (not just those we need), additional unmapping needs to be done. - */ -static int -unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, - struct hugepage_info *hpi, - unsigned num_hp_info) -{ - unsigned socket, size; - int page, nrpages = 0; - - /* get total number of hugepages */ - for (size = 0; size < num_hp_info; size++) - for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) - nrpages += internal_config.hugepage_info[size].num_pages[socket]; - - for (size = 0; size < num_hp_info; size++) { - for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { - unsigned pages_found = 0; - - /* traverse until we have unmapped all the unused pages */ - for (page = 0; page < nrpages; page++) { - struct hugepage_file *hp = &hugepg_tbl[page]; - -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* if this page was already cleared */ - if (hp->final_va == NULL) - continue; -#endif - - /* find a page that matches the criteria */ - if ((hp->size == hpi[size].hugepage_sz) && - (hp->socket_id == (int) socket)) { - - /* if we skipped enough pages, unmap the rest */ - if (pages_found == hpi[size].num_pages[socket]) { - uint64_t unmap_len; - -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - unmap_len = hp->size * hp->repeated; -#else - unmap_len = hp->size; -#endif - - /* get start addr and len of the remaining segment */ - munmap(hp->final_va, (size_t) unmap_len); - - hp->final_va = NULL; - if (unlink(hp->filepath) == -1) { - RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n", - __func__, hp->filepath, strerror(errno)); - return -1; - } - } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* else, check how much do we need to map */ - else { - int nr_pg_left = - hpi[size].num_pages[socket] - pages_found; - - /* if we need enough memory to fit into the segment */ - if (hp->repeated <= nr_pg_left) { - pages_found += hp->repeated; - } - /* truncate the segment */ - else { - uint64_t final_size = nr_pg_left * hp->size; - uint64_t seg_size = hp->repeated * hp->size; - - void * unmap_va = RTE_PTR_ADD(hp->final_va, - final_size); - int fd; - - munmap(unmap_va, seg_size - final_size); - - fd = open(hp->filepath, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - hp->filepath, strerror(errno)); - return -1; - } - if (ftruncate(fd, final_size) < 0) { - RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n", - hp->filepath, strerror(errno)); - return -1; - } - close(fd); - - pages_found += nr_pg_left; - hp->repeated = nr_pg_left; - } - } -#else - /* else, lock the page and skip */ - else - pages_found++; -#endif - - } /* match page */ - } /* foreach page */ - } /* foreach socket */ - } /* foreach pagesize */ - - return 0; -} - -static inline uint64_t -get_socket_mem_size(int socket) -{ - uint64_t size = 0; - unsigned i; - - for (i = 0; i < internal_config.num_hugepage_sizes; i++){ - struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) - size += hpi->hugepage_sz * hpi->num_pages[socket]; - } - - return size; -} - -/* - * This function is a NUMA-aware equivalent of calc_num_pages. - * It takes in the list of hugepage sizes and the - * number of pages thereof, and calculates the best number of - * pages of each size to fulfill the request for ram - */ -static int -calc_num_pages_per_socket(uint64_t * memory, - struct hugepage_info *hp_info, - struct hugepage_info *hp_used, - unsigned num_hp_info) -{ - unsigned socket, j, i = 0; - unsigned requested, available; - int total_num_pages = 0; - uint64_t remaining_mem, cur_mem; - uint64_t total_mem = internal_config.memory; - - if (num_hp_info == 0) - return -1; - - /* if specific memory amounts per socket weren't requested */ - if (internal_config.force_sockets == 0) { - int cpu_per_socket[RTE_MAX_NUMA_NODES]; - size_t default_size, total_size; - unsigned lcore_id; - - /* Compute number of cores per socket */ - memset(cpu_per_socket, 0, sizeof(cpu_per_socket)); - RTE_LCORE_FOREACH(lcore_id) { - cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++; - } - - /* - * Automatically spread requested memory amongst detected sockets according - * to number of cores from cpu mask present on each socket - */ - total_size = internal_config.memory; - for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { - - /* Set memory amount per socket */ - default_size = (internal_config.memory * cpu_per_socket[socket]) - / rte_lcore_count(); - - /* Limit to maximum available memory on socket */ - default_size = RTE_MIN(default_size, get_socket_mem_size(socket)); - - /* Update sizes */ - memory[socket] = default_size; - total_size -= default_size; - } - - /* - * If some memory is remaining, try to allocate it by getting all - * available memory from sockets, one after the other - */ - for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { - /* take whatever is available */ - default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket], - total_size); - - /* Update sizes */ - memory[socket] += default_size; - total_size -= default_size; - } - } - - for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) { - /* skips if the memory on specific socket wasn't requested */ - for (i = 0; i < num_hp_info && memory[socket] != 0; i++){ - hp_used[i].hugedir = hp_info[i].hugedir; - hp_used[i].num_pages[socket] = RTE_MIN( - memory[socket] / hp_info[i].hugepage_sz, - hp_info[i].num_pages[socket]); - - cur_mem = hp_used[i].num_pages[socket] * - hp_used[i].hugepage_sz; - - memory[socket] -= cur_mem; - total_mem -= cur_mem; - - total_num_pages += hp_used[i].num_pages[socket]; - - /* check if we have met all memory requests */ - if (memory[socket] == 0) - break; - - /* check if we have any more pages left at this size, if so - * move on to next size */ - if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket]) - continue; - /* At this point we know that there are more pages available that are - * bigger than the memory we want, so lets see if we can get enough - * from other page sizes. - */ - remaining_mem = 0; - for (j = i+1; j < num_hp_info; j++) - remaining_mem += hp_info[j].hugepage_sz * - hp_info[j].num_pages[socket]; - - /* is there enough other memory, if not allocate another page and quit */ - if (remaining_mem < memory[socket]){ - cur_mem = RTE_MIN(memory[socket], - hp_info[i].hugepage_sz); - memory[socket] -= cur_mem; - total_mem -= cur_mem; - hp_used[i].num_pages[socket]++; - total_num_pages++; - break; /* we are done with this socket*/ - } - } - /* if we didn't satisfy all memory requirements per socket */ - if (memory[socket] > 0) { - /* to prevent icc errors */ - requested = (unsigned) (internal_config.socket_mem[socket] / - 0x100000); - available = requested - - ((unsigned) (memory[socket] / 0x100000)); - RTE_LOG(ERR, EAL, "Not enough memory available on socket %u! " - "Requested: %uMB, available: %uMB\n", socket, - requested, available); - return -1; - } - } - - /* if we didn't satisfy total memory requirements */ - if (total_mem > 0) { - requested = (unsigned) (internal_config.memory / 0x100000); - available = requested - (unsigned) (total_mem / 0x100000); - RTE_LOG(ERR, EAL, "Not enough memory available! Requested: %uMB," - " available: %uMB\n", requested, available); - return -1; - } - return total_num_pages; -} - -/* - * Prepare physical memory mapping: fill configuration structure with - * these infos, return 0 on success. - * 1. map N huge pages in separate files in hugetlbfs - * 2. find associated physical addr - * 3. find associated NUMA socket ID - * 4. sort all huge pages by physical address - * 5. remap these N huge pages in the correct order - * 6. unmap the first mapping - * 7. fill memsegs in configuration with contiguous zones - */ -int -rte_eal_hugepage_init(void) -{ - struct rte_mem_config *mcfg; - struct hugepage_file *hugepage, *tmp_hp = NULL; - struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; - - uint64_t memory[RTE_MAX_NUMA_NODES]; - - unsigned hp_offset; - int i, j, new_memseg; - int nr_hugefiles, nr_hugepages = 0; - void *addr; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - int new_pages_count[MAX_HUGEPAGE_SIZES]; -#endif - - test_proc_pagemap_readable(); - - memset(used_hp, 0, sizeof(used_hp)); - - /* get pointer to global configuration */ - mcfg = rte_eal_get_configuration()->mem_config; - - /* hugetlbfs can be disabled */ - if (internal_config.no_hugetlbfs) { - addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); - if (addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, - strerror(errno)); - return -1; - } - mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; - mcfg->memseg[0].addr = addr; - mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; - mcfg->memseg[0].len = internal_config.memory; - mcfg->memseg[0].socket_id = 0; - return 0; - } - -/* check if app runs on Xen Dom0 */ - if (internal_config.xen_dom0_support) { -#ifdef RTE_LIBRTE_XEN_DOM0 - /* use dom0_mm kernel driver to init memory */ - if (rte_xen_dom0_memory_init() < 0) - return -1; - else - return 0; -#endif - } - - /* calculate total number of hugepages available. at this point we haven't - * yet started sorting them so they all are on socket 0 */ - for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { - /* meanwhile, also initialize used_hp hugepage sizes in used_hp */ - used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz; - - nr_hugepages += internal_config.hugepage_info[i].num_pages[0]; - } - - /* - * allocate a memory area for hugepage table. - * this isn't shared memory yet. due to the fact that we need some - * processing done on these pages, shared memory will be created - * at a later stage. - */ - tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file)); - if (tmp_hp == NULL) - goto fail; - - memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file)); - - hp_offset = 0; /* where we start the current page size entries */ - - /* map all hugepages and sort them */ - for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ - struct hugepage_info *hpi; - - /* - * we don't yet mark hugepages as used at this stage, so - * we just map all hugepages available to the system - * all hugepages are still located on socket 0 - */ - hpi = &internal_config.hugepage_info[i]; - - if (hpi->num_pages[0] == 0) - continue; - - /* map all hugepages available */ - if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; - } - - /* find physical addresses and sockets for each hugepage */ - if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; - } - - if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; - } - - if (sort_by_physaddr(&tmp_hp[hp_offset], hpi) < 0) - goto fail; - -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - /* remap all hugepages into single file segments */ - new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi); - if (new_pages_count[i] < 0){ - RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; - } - - /* we have processed a num of hugepages of this size, so inc offset */ - hp_offset += new_pages_count[i]; -#else - /* remap all hugepages */ - if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; - } - - /* unmap original mappings */ - if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0) - goto fail; - - /* we have processed a num of hugepages of this size, so inc offset */ - hp_offset += hpi->num_pages[0]; -#endif - } - -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - nr_hugefiles = 0; - for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { - nr_hugefiles += new_pages_count[i]; - } -#else - nr_hugefiles = nr_hugepages; -#endif - - - /* clean out the numbers of pages */ - for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) - for (j = 0; j < RTE_MAX_NUMA_NODES; j++) - internal_config.hugepage_info[i].num_pages[j] = 0; - - /* get hugepages for each socket */ - for (i = 0; i < nr_hugefiles; i++) { - int socket = tmp_hp[i].socket_id; - - /* find a hugepage info with right size and increment num_pages */ - const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES, - (int)internal_config.num_hugepage_sizes); - for (j = 0; j < nb_hpsizes; j++) { - if (tmp_hp[i].size == - internal_config.hugepage_info[j].hugepage_sz) { -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - internal_config.hugepage_info[j].num_pages[socket] += - tmp_hp[i].repeated; -#else - internal_config.hugepage_info[j].num_pages[socket]++; -#endif - } - } - } - - /* make a copy of socket_mem, needed for number of pages calculation */ - for (i = 0; i < RTE_MAX_NUMA_NODES; i++) - memory[i] = internal_config.socket_mem[i]; - - /* calculate final number of pages */ - nr_hugepages = calc_num_pages_per_socket(memory, - internal_config.hugepage_info, used_hp, - internal_config.num_hugepage_sizes); - - /* error if not enough memory available */ - if (nr_hugepages < 0) - goto fail; - - /* reporting in! */ - for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { - for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { - if (used_hp[i].num_pages[j] > 0) { - RTE_LOG(DEBUG, EAL, - "Requesting %u pages of size %uMB" - " from socket %i\n", - used_hp[i].num_pages[j], - (unsigned) - (used_hp[i].hugepage_sz / 0x100000), - j); - } - } - } - - /* create shared memory */ - hugepage = create_shared_memory(eal_hugepage_info_path(), - nr_hugefiles * sizeof(struct hugepage_file)); - - if (hugepage == NULL) { - RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); - goto fail; - } - memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file)); - - /* - * unmap pages that we won't need (looks at used_hp). - * also, sets final_va to NULL on pages that were unmapped. - */ - if (unmap_unneeded_hugepages(tmp_hp, used_hp, - internal_config.num_hugepage_sizes) < 0) { - RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n"); - goto fail; - } - - /* - * copy stuff from malloc'd hugepage* to the actual shared memory. - * this procedure only copies those hugepages that have final_va - * not NULL. has overflow protection. - */ - if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles, - tmp_hp, nr_hugefiles) < 0) { - RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n"); - goto fail; - } - - /* free the hugepage backing files */ - if (internal_config.hugepage_unlink && - unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) { - RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n"); - goto fail; - } - - /* free the temporary hugepage table */ - free(tmp_hp); - tmp_hp = NULL; - - /* find earliest free memseg - this is needed because in case of IVSHMEM, - * segments might have already been initialized */ - for (j = 0; j < RTE_MAX_MEMSEG; j++) - if (mcfg->memseg[j].addr == NULL) { - /* move to previous segment and exit loop */ - j--; - break; - } - - for (i = 0; i < nr_hugefiles; i++) { - new_memseg = 0; - - /* if this is a new section, create a new memseg */ - if (i == 0) - new_memseg = 1; - else if (hugepage[i].socket_id != hugepage[i-1].socket_id) - new_memseg = 1; - else if (hugepage[i].size != hugepage[i-1].size) - new_memseg = 1; - -#ifdef RTE_ARCH_PPC_64 - /* On PPC64 architecture, the mmap always start from higher - * virtual address to lower address. Here, both the physical - * address and virtual address are in descending order */ - else if ((hugepage[i-1].physaddr - hugepage[i].physaddr) != - hugepage[i].size) - new_memseg = 1; - else if (((unsigned long)hugepage[i-1].final_va - - (unsigned long)hugepage[i].final_va) != hugepage[i].size) - new_memseg = 1; -#else - else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) != - hugepage[i].size) - new_memseg = 1; - else if (((unsigned long)hugepage[i].final_va - - (unsigned long)hugepage[i-1].final_va) != hugepage[i].size) - new_memseg = 1; -#endif - - if (new_memseg) { - j += 1; - if (j == RTE_MAX_MEMSEG) - break; - - mcfg->memseg[j].phys_addr = hugepage[i].physaddr; - mcfg->memseg[j].addr = hugepage[i].final_va; -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated; -#else - mcfg->memseg[j].len = hugepage[i].size; -#endif - mcfg->memseg[j].socket_id = hugepage[i].socket_id; - mcfg->memseg[j].hugepage_sz = hugepage[i].size; - } - /* continuation of previous memseg */ - else { -#ifdef RTE_ARCH_PPC_64 - /* Use the phy and virt address of the last page as segment - * address for IBM Power architecture */ - mcfg->memseg[j].phys_addr = hugepage[i].physaddr; - mcfg->memseg[j].addr = hugepage[i].final_va; -#endif - mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; - } - hugepage[i].memseg_id = j; - } - - if (i < nr_hugefiles) { - RTE_LOG(ERR, EAL, "Can only reserve %d pages " - "from %d requested\n" - "Current %s=%d is not enough\n" - "Please either increase it or request less amount " - "of memory.\n", - i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG), - RTE_MAX_MEMSEG); - return -ENOMEM; - } - - return 0; - -fail: - if (tmp_hp) - free(tmp_hp); - return -1; -} - -/* - * uses fstat to report the size of a file on disk - */ -static off_t -getFileSize(int fd) -{ - struct stat st; - if (fstat(fd, &st) < 0) - return 0; - return st.st_size; -} - -/* - * This creates the memory mappings in the secondary process to match that of - * the server process. It goes through each memory segment in the DPDK runtime - * configuration and finds the hugepages which form that segment, mapping them - * in order to form a contiguous block in the virtual memory space - */ -int -rte_eal_hugepage_attach(void) -{ - const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - const struct hugepage_file *hp = NULL; - unsigned num_hp = 0; - unsigned i, s = 0; /* s used to track the segment number */ - off_t size; - int fd, fd_zero = -1, fd_hugepage = -1; - - if (aslr_enabled() > 0) { - RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization " - "(ASLR) is enabled in the kernel.\n"); - RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory " - "into secondary processes\n"); - } - - test_proc_pagemap_readable(); - - if (internal_config.xen_dom0_support) { -#ifdef RTE_LIBRTE_XEN_DOM0 - if (rte_xen_dom0_memory_attach() < 0) { - RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay " - "process\n"); - return -1; - } - return 0; -#endif - } - - fd_zero = open("/dev/zero", O_RDONLY); - if (fd_zero < 0) { - RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); - goto error; - } - fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY); - if (fd_hugepage < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); - goto error; - } - - /* map all segments into memory to make sure we get the addrs */ - for (s = 0; s < RTE_MAX_MEMSEG; ++s) { - void *base_addr; - - /* - * the first memory segment with len==0 is the one that - * follows the last valid segment. - */ - if (mcfg->memseg[s].len == 0) - break; - -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * doesn't need mapping as it was already mapped earlier - */ - if (mcfg->memseg[s].ioremap_addr != 0) - continue; -#endif - - /* - * fdzero is mmapped to get a contiguous block of virtual - * addresses of the appropriate memseg size. - * use mmap to get identical addresses as the primary process. - */ - base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, - PROT_READ, MAP_PRIVATE, fd_zero, 0); - if (base_addr == MAP_FAILED || - base_addr != mcfg->memseg[s].addr) { - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " - "in /dev/zero to requested address [%p]: '%s'\n", - (unsigned long long)mcfg->memseg[s].len, - mcfg->memseg[s].addr, strerror(errno)); - if (aslr_enabled() > 0) { - RTE_LOG(ERR, EAL, "It is recommended to " - "disable ASLR in the kernel " - "and retry running both primary " - "and secondary processes\n"); - } - goto error; - } - } - - size = getFileSize(fd_hugepage); - hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); - if (hp == NULL) { - RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); - goto error; - } - - num_hp = size / sizeof(struct hugepage_file); - RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp); - - s = 0; - while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){ - void *addr, *base_addr; - uintptr_t offset = 0; - size_t mapping_size; -#ifdef RTE_LIBRTE_IVSHMEM - /* - * if segment has ioremap address set, it's an IVSHMEM segment and - * doesn't need mapping as it was already mapped earlier - */ - if (mcfg->memseg[s].ioremap_addr != 0) { - s++; - continue; - } -#endif - /* - * free previously mapped memory so we can map the - * hugepages into the space - */ - base_addr = mcfg->memseg[s].addr; - munmap(base_addr, mcfg->memseg[s].len); - - /* find the hugepages for this segment and map them - * we don't need to worry about order, as the server sorted the - * entries before it did the second mmap of them */ - for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){ - if (hp[i].memseg_id == (int)s){ - fd = open(hp[i].filepath, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", - hp[i].filepath); - goto error; - } -#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS - mapping_size = hp[i].size * hp[i].repeated; -#else - mapping_size = hp[i].size; -#endif - addr = mmap(RTE_PTR_ADD(base_addr, offset), - mapping_size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - close(fd); /* close file both on success and on failure */ - if (addr == MAP_FAILED || - addr != RTE_PTR_ADD(base_addr, offset)) { - RTE_LOG(ERR, EAL, "Could not mmap %s\n", - hp[i].filepath); - goto error; - } - offset+=mapping_size; - } - } - RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s, - (unsigned long long)mcfg->memseg[s].len); - s++; - } - /* unmap the hugepage config file, since we are done using it */ - munmap((void *)(uintptr_t)hp, size); - close(fd_zero); - close(fd_hugepage); - return 0; - -error: - if (fd_zero >= 0) - close(fd_zero); - if (fd_hugepage >= 0) - close(fd_hugepage); - return -1; -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci.c deleted file mode 100644 index bc5b5bee..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci.c +++ /dev/null @@ -1,656 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "eal_filesystem.h" -#include "eal_private.h" -#include "eal_pci_init.h" - -/** - * @file - * PCI probing under linux - * - * This code is used to simulate a PCI probe by parsing information in sysfs. - * When a registered device matches a driver, it is then initialized with - * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). - */ - -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev) -{ - int n; - FILE *f; - char filename[PATH_MAX]; - char buf[BUFSIZ]; - struct rte_pci_addr *loc = &dev->addr; - - /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ - snprintf(filename, sizeof(filename), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind", - loc->domain, loc->bus, loc->devid, loc->function); - - f = fopen(filename, "w"); - if (f == NULL) /* device was not bound */ - return 0; - - n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n", - loc->domain, loc->bus, loc->devid, loc->function); - if ((n < 0) || (n >= (int)sizeof(buf))) { - RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__); - goto error; - } - if (fwrite(buf, n, 1, f) == 0) { - RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__, - filename); - goto error; - } - - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} - -static int -pci_get_kernel_driver_by_path(const char *filename, char *dri_name) -{ - int count; - char path[PATH_MAX]; - char *name; - - if (!filename || !dri_name) - return -1; - - count = readlink(filename, path, PATH_MAX); - if (count >= PATH_MAX) - return -1; - - /* For device does not have a driver */ - if (count < 0) - return 1; - - path[count] = '\0'; - - name = strrchr(path, '/'); - if (name) { - strncpy(dri_name, name + 1, strlen(name + 1) + 1); - return 0; - } - - return -1; -} - -/* Map pci device */ -int -pci_map_device(struct rte_pci_device *dev) -{ - int ret = -1; - - /* try mapping the NIC resources using VFIO if it exists */ - switch (dev->kdrv) { - case RTE_KDRV_VFIO: -#ifdef VFIO_PRESENT - if (pci_vfio_is_enabled()) - ret = pci_vfio_map_resource(dev); -#endif - break; - case RTE_KDRV_IGB_UIO: - case RTE_KDRV_UIO_GENERIC: - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - ret = 1; - break; - } - - return ret; -} - -/* Unmap pci device */ -void -pci_unmap_device(struct rte_pci_device *dev) -{ - /* try unmapping the NIC resources using VFIO if it exists */ - switch (dev->kdrv) { - case RTE_KDRV_VFIO: - RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n"); - break; - case RTE_KDRV_IGB_UIO: - case RTE_KDRV_UIO_GENERIC: - /* unmap resources for devices that use uio */ - pci_uio_unmap_resource(dev); - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - break; - } -} - -void * -pci_find_max_end_va(void) -{ - const struct rte_memseg *seg = rte_eal_get_physmem_layout(); - const struct rte_memseg *last = seg; - unsigned i = 0; - - for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { - if (seg->addr == NULL) - break; - - if (seg->addr > last->addr) - last = seg; - - } - return RTE_PTR_ADD(last->addr, last->len); -} - -/* parse the "resource" sysfs file */ -static int -pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) -{ - FILE *f; - char buf[BUFSIZ]; - union pci_resource_info { - struct { - char *phys_addr; - char *end_addr; - char *flags; - }; - char *ptrs[PCI_RESOURCE_FMT_NVAL]; - } res_info; - int i; - uint64_t phys_addr, end_addr, flags; - - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); - return -1; - } - - for (i = 0; imem_resource[i].phys_addr = phys_addr; - dev->mem_resource[i].len = end_addr - phys_addr + 1; - /* not mapped for now */ - dev->mem_resource[i].addr = NULL; - } - } - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} - -/* Scan one pci sysfs entry, and fill the devices list from it. */ -static int -pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, - uint8_t devid, uint8_t function) -{ - char filename[PATH_MAX]; - unsigned long tmp; - struct rte_pci_device *dev; - char driver[PATH_MAX]; - int ret; - - dev = malloc(sizeof(*dev)); - if (dev == NULL) - return -1; - - memset(dev, 0, sizeof(*dev)); - dev->addr.domain = domain; - dev->addr.bus = bus; - dev->addr.devid = devid; - dev->addr.function = function; - - /* get vendor id */ - snprintf(filename, sizeof(filename), "%s/vendor", dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.vendor_id = (uint16_t)tmp; - - /* get device id */ - snprintf(filename, sizeof(filename), "%s/device", dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.device_id = (uint16_t)tmp; - - /* get subsystem_vendor id */ - snprintf(filename, sizeof(filename), "%s/subsystem_vendor", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.subsystem_vendor_id = (uint16_t)tmp; - - /* get subsystem_device id */ - snprintf(filename, sizeof(filename), "%s/subsystem_device", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.subsystem_device_id = (uint16_t)tmp; - - /* get max_vfs */ - dev->max_vfs = 0; - snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); - if (!access(filename, F_OK) && - eal_parse_sysfs_value(filename, &tmp) == 0) - dev->max_vfs = (uint16_t)tmp; - else { - /* for non igb_uio driver, need kernel version >= 3.8 */ - snprintf(filename, sizeof(filename), - "%s/sriov_numvfs", dirname); - if (!access(filename, F_OK) && - eal_parse_sysfs_value(filename, &tmp) == 0) - dev->max_vfs = (uint16_t)tmp; - } - - /* get numa node */ - snprintf(filename, sizeof(filename), "%s/numa_node", - dirname); - if (access(filename, R_OK) != 0) { - /* if no NUMA support, set default to 0 */ - dev->numa_node = 0; - } else { - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->numa_node = tmp; - } - - /* parse resources */ - snprintf(filename, sizeof(filename), "%s/resource", dirname); - if (pci_parse_sysfs_resource(filename, dev) < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); - free(dev); - return -1; - } - - /* parse driver */ - snprintf(filename, sizeof(filename), "%s/driver", dirname); - ret = pci_get_kernel_driver_by_path(filename, driver); - if (ret < 0) { - RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); - free(dev); - return -1; - } - - if (!ret) { - if (!strcmp(driver, "vfio-pci")) - dev->kdrv = RTE_KDRV_VFIO; - else if (!strcmp(driver, "igb_uio")) - dev->kdrv = RTE_KDRV_IGB_UIO; - else if (!strcmp(driver, "uio_pci_generic")) - dev->kdrv = RTE_KDRV_UIO_GENERIC; - else - dev->kdrv = RTE_KDRV_UNKNOWN; - } else - dev->kdrv = RTE_KDRV_UNKNOWN; - - /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&pci_device_list)) { - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); - } else { - struct rte_pci_device *dev2; - int ret; - - TAILQ_FOREACH(dev2, &pci_device_list, next) { - ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); - if (ret > 0) - continue; - - if (ret < 0) { - TAILQ_INSERT_BEFORE(dev2, dev, next); - } else { /* already registered */ - dev2->kdrv = dev->kdrv; - dev2->max_vfs = dev->max_vfs; - memmove(dev2->mem_resource, dev->mem_resource, - sizeof(dev->mem_resource)); - free(dev); - } - return 0; - } - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); - } - - return 0; -} - -/* - * split up a pci address into its constituent parts. - */ -static int -parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, - uint8_t *bus, uint8_t *devid, uint8_t *function) -{ - /* first split on ':' */ - union splitaddr { - struct { - char *domain; - char *bus; - char *devid; - char *function; - }; - char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */ - } splitaddr; - - char *buf_copy = strndup(buf, bufsize); - if (buf_copy == NULL) - return -1; - - if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':') - != PCI_FMT_NVAL - 1) - goto error; - /* final split is on '.' between devid and function */ - splitaddr.function = strchr(splitaddr.devid,'.'); - if (splitaddr.function == NULL) - goto error; - *splitaddr.function++ = '\0'; - - /* now convert to int values */ - errno = 0; - *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); - *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); - *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); - *function = (uint8_t)strtoul(splitaddr.function, NULL, 10); - if (errno != 0) - goto error; - - free(buf_copy); /* free the copy made with strdup */ - return 0; -error: - free(buf_copy); - return -1; -} - -/* - * Scan the content of the PCI bus, and the devices in the devices - * list - */ -int -rte_eal_pci_scan(void) -{ - struct dirent *e; - DIR *dir; - char dirname[PATH_MAX]; - uint16_t domain; - uint8_t bus, devid, function; - - dir = opendir(SYSFS_PCI_DEVICES); - if (dir == NULL) { - RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", - __func__, strerror(errno)); - return -1; - } - - while ((e = readdir(dir)) != NULL) { - if (e->d_name[0] == '.') - continue; - - if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain, - &bus, &devid, &function) != 0) - continue; - - snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES, - e->d_name); - if (pci_scan_one(dirname, domain, bus, devid, function) < 0) - goto error; - } - closedir(dir); - return 0; - -error: - closedir(dir); - return -1; -} - -#ifdef RTE_PCI_CONFIG -static int -pci_config_extended_tag(struct rte_pci_device *dev) -{ - struct rte_pci_addr *loc = &dev->addr; - char filename[PATH_MAX]; - char buf[BUFSIZ]; - FILE *f; - - /* not configured, let it as is */ - if (strncmp(RTE_PCI_EXTENDED_TAG, "on", 2) != 0 && - strncmp(RTE_PCI_EXTENDED_TAG, "off", 3) != 0) - return 0; - - snprintf(filename, sizeof(filename), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/" "extended_tag", - loc->domain, loc->bus, loc->devid, loc->function); - f = fopen(filename, "rw+"); - if (!f) - return -1; - - fgets(buf, sizeof(buf), f); - if (strncmp(RTE_PCI_EXTENDED_TAG, "on", 2) == 0) { - /* enable Extended Tag*/ - if (strncmp(buf, "on", 2) != 0) { - fseek(f, 0, SEEK_SET); - fputs("on", f); - } - } else { - /* disable Extended Tag */ - if (strncmp(buf, "off", 3) != 0) { - fseek(f, 0, SEEK_SET); - fputs("off", f); - } - } - fclose(f); - - return 0; -} - -static int -pci_config_max_read_request_size(struct rte_pci_device *dev) -{ - struct rte_pci_addr *loc = &dev->addr; - char filename[PATH_MAX]; - char buf[BUFSIZ], param[BUFSIZ]; - FILE *f; - /* size can be 128, 256, 512, 1024, 2048, 4096 */ - uint32_t max_size = RTE_PCI_MAX_READ_REQUEST_SIZE; - - /* not configured, let it as is */ - if (!max_size) - return 0; - - snprintf(filename, sizeof(filename), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/" "max_read_request_size", - loc->domain, loc->bus, loc->devid, loc->function); - f = fopen(filename, "rw+"); - if (!f) - return -1; - - fgets(buf, sizeof(buf), f); - snprintf(param, sizeof(param), "%d", max_size); - - /* check if the size to be set is the same as current */ - if (strcmp(buf, param) == 0) { - fclose(f); - return 0; - } - fseek(f, 0, SEEK_SET); - fputs(param, f); - fclose(f); - - return 0; -} - -void -pci_config_space_set(struct rte_pci_device *dev) -{ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return; - - /* configure extended tag */ - pci_config_extended_tag(dev); - - /* configure max read request size */ - pci_config_max_read_request_size(dev); -} -#endif - -/* Read PCI config space. */ -int rte_eal_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &device->intr_handle; - - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: - return pci_uio_read_config(intr_handle, buf, len, offset); - -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: - return pci_vfio_read_config(intr_handle, buf, len, offset); -#endif - default: - RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); - return -1; - } -} - -/* Write PCI config space. */ -int rte_eal_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &device->intr_handle; - - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: - return pci_uio_write_config(intr_handle, buf, len, offset); - -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: - return pci_vfio_write_config(intr_handle, buf, len, offset); -#endif - default: - RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); - return -1; - } -} - -/* Init the PCI EAL subsystem */ -int -rte_eal_pci_init(void) -{ - TAILQ_INIT(&pci_driver_list); - TAILQ_INIT(&pci_device_list); - - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - if (rte_eal_pci_scan() < 0) { - RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); - return -1; - } -#ifdef VFIO_PRESENT - pci_vfio_enable(); - - if (pci_vfio_is_enabled()) { - - /* if we are primary process, create a thread to communicate with - * secondary processes. the thread will use a socket to wait for - * requests from secondary process to send open file descriptors, - * because VFIO does not allow multiple open descriptors on a group or - * VFIO container. - */ - if (internal_config.process_type == RTE_PROC_PRIMARY && - pci_vfio_mp_sync_setup() < 0) - return -1; - } -#endif - return 0; -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_init.h deleted file mode 100644 index a17c7083..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ /dev/null @@ -1,111 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef EAL_PCI_INIT_H_ -#define EAL_PCI_INIT_H_ - -#include "eal_vfio.h" - -/* - * Helper function to map PCI resources right after hugepages in virtual memory - */ -extern void *pci_map_addr; -void *pci_find_max_end_va(void); - -int pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res); -void pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res); -int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx); - -int pci_uio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs); -int pci_uio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs); - -#ifdef VFIO_PRESENT - -#define VFIO_MAX_GROUPS 64 - -int pci_vfio_enable(void); -int pci_vfio_is_enabled(void); -int pci_vfio_mp_sync_setup(void); - -/* access config space */ -int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs); -int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs); - -/* map VFIO resource prototype */ -int pci_vfio_map_resource(struct rte_pci_device *dev); -int pci_vfio_get_group_fd(int iommu_group_fd); -int pci_vfio_get_container_fd(void); - -/* - * Function prototypes for VFIO multiprocess sync functions - */ -int vfio_mp_sync_send_request(int socket, int req); -int vfio_mp_sync_receive_request(int socket); -int vfio_mp_sync_send_fd(int socket, int fd); -int vfio_mp_sync_receive_fd(int socket); -int vfio_mp_sync_connect_to_primary(void); - -/* socket comm protocol definitions */ -#define SOCKET_REQ_CONTAINER 0x100 -#define SOCKET_REQ_GROUP 0x200 -#define SOCKET_OK 0x0 -#define SOCKET_NO_FD 0x1 -#define SOCKET_ERR 0xFF - -/* - * we don't need to store device fd's anywhere since they can be obtained from - * the group fd via an ioctl() call. - */ -struct vfio_group { - int group_no; - int fd; -}; - -struct vfio_config { - int vfio_enabled; - int vfio_container_fd; - int vfio_container_has_dma; - int vfio_group_idx; - struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; -}; - -#endif - -#endif /* EAL_PCI_INIT_H_ */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_uio.c deleted file mode 100644 index ac50e13f..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ /dev/null @@ -1,365 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "eal_filesystem.h" -#include "eal_pci_init.h" - -void *pci_map_addr = NULL; - -#define OFF_MAX ((uint64_t)(off_t)-1) - -int -pci_uio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offset) -{ - return pread(intr_handle->uio_cfg_fd, buf, len, offset); -} - -int -pci_uio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offset) -{ - return pwrite(intr_handle->uio_cfg_fd, buf, len, offset); -} - -static int -pci_uio_set_bus_master(int dev_fd) -{ - uint16_t reg; - int ret; - - ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, - "Cannot read command from PCI config space!\n"); - return -1; - } - - /* return if bus mastering is already on */ - if (reg & PCI_COMMAND_MASTER) - return 0; - - reg |= PCI_COMMAND_MASTER; - - ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, - "Cannot write command to PCI config space!\n"); - return -1; - } - - return 0; -} - -static int -pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) -{ - FILE *f; - char filename[PATH_MAX]; - int ret; - unsigned major, minor; - dev_t dev; - - /* get the name of the sysfs file that contains the major and minor - * of the uio device and read its content */ - snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path); - - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n", - __func__); - return -1; - } - - ret = fscanf(f, "%u:%u", &major, &minor); - if (ret != 2) { - RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n", - __func__); - fclose(f); - return -1; - } - fclose(f); - - /* create the char device "mknod /dev/uioX c major minor" */ - snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); - dev = makedev(major, minor); - ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); - if (f == NULL) { - RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", - __func__, strerror(errno)); - return -1; - } - - return ret; -} - -/* - * Return the uioX char device used for a pci device. On success, return - * the UIO number and fill dstbuf string with the path of the device in - * sysfs. On error, return a negative value. In this case dstbuf is - * invalid. - */ -static int -pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, - unsigned int buflen) -{ - struct rte_pci_addr *loc = &dev->addr; - unsigned int uio_num; - struct dirent *e; - DIR *dir; - char dirname[PATH_MAX]; - - /* depending on kernel version, uio can be located in uio/uioX - * or uio:uioX */ - - snprintf(dirname, sizeof(dirname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio", - loc->domain, loc->bus, loc->devid, loc->function); - - dir = opendir(dirname); - if (dir == NULL) { - /* retry with the parent directory */ - snprintf(dirname, sizeof(dirname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT, - loc->domain, loc->bus, loc->devid, loc->function); - dir = opendir(dirname); - - if (dir == NULL) { - RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); - return -1; - } - } - - /* take the first file starting with "uio" */ - while ((e = readdir(dir)) != NULL) { - /* format could be uio%d ...*/ - int shortprefix_len = sizeof("uio") - 1; - /* ... or uio:uio%d */ - int longprefix_len = sizeof("uio:uio") - 1; - char *endptr; - - if (strncmp(e->d_name, "uio", 3) != 0) - continue; - - /* first try uio%d */ - errno = 0; - uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); - if (errno == 0 && endptr != (e->d_name + shortprefix_len)) { - snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); - break; - } - - /* then try uio:uio%d */ - errno = 0; - uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); - if (errno == 0 && endptr != (e->d_name + longprefix_len)) { - snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num); - break; - } - } - closedir(dir); - - /* No uio resource found */ - if (e == NULL) - return -1; - - /* create uio device if we've been asked to */ - if (internal_config.create_uio_dev && - pci_mknod_uio_dev(dstbuf, uio_num) < 0) - RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num); - - return uio_num; -} - -void -pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res) -{ - rte_free(uio_res); - - if (dev->intr_handle.uio_cfg_fd >= 0) { - close(dev->intr_handle.uio_cfg_fd); - dev->intr_handle.uio_cfg_fd = -1; - } - if (dev->intr_handle.fd) { - close(dev->intr_handle.fd); - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - } -} - -int -pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res) -{ - char dirname[PATH_MAX]; - char cfgname[PATH_MAX]; - char devname[PATH_MAX]; /* contains the /dev/uioX */ - int uio_num; - struct rte_pci_addr *loc; - - loc = &dev->addr; - - /* find uio resource */ - uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname)); - if (uio_num < 0) { - RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " - "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); - return 1; - } - snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); - - /* save fd if in primary process */ - dev->intr_handle.fd = open(devname, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - - snprintf(cfgname, sizeof(cfgname), - "/sys/class/uio/uio%u/device/config", uio_num); - dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); - if (dev->intr_handle.uio_cfg_fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - cfgname, strerror(errno)); - goto error; - } - - if (dev->kdrv == RTE_KDRV_IGB_UIO) - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - else { - dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; - - /* set bus master that is not done by uio_pci_generic */ - if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { - RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); - goto error; - } - } - - /* allocate the mapping details for secondary processes*/ - *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); - if (*uio_res == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - goto error; - } - - snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); - memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); - - return 0; - -error: - pci_uio_free_resource(dev, *uio_res); - return -1; -} - -int -pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx) -{ - int fd; - char devname[PATH_MAX]; /* contains the /dev/uioX */ - void *mapaddr; - struct rte_pci_addr *loc; - struct pci_map *maps; - - loc = &dev->addr; - maps = uio_res->maps; - - /* update devname for mmap */ - snprintf(devname, sizeof(devname), - SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d", - loc->domain, loc->bus, loc->devid, - loc->function, res_idx); - - /* allocate memory to keep path */ - maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); - if (maps[map_idx].path == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", - strerror(errno)); - return -1; - } - - /* - * open resource file, to mmap it - */ - fd = open(devname, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - - /* try mapping somewhere close to the end of hugepages */ - if (pci_map_addr == NULL) - pci_map_addr = pci_find_max_end_va(); - - mapaddr = pci_map_resource(pci_map_addr, fd, 0, - (size_t)dev->mem_resource[res_idx].len, 0); - close(fd); - if (mapaddr == MAP_FAILED) - goto error; - - pci_map_addr = RTE_PTR_ADD(mapaddr, - (size_t)dev->mem_resource[res_idx].len); - - maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; - maps[map_idx].size = dev->mem_resource[res_idx].len; - maps[map_idx].addr = mapaddr; - maps[map_idx].offset = 0; - strcpy(maps[map_idx].path, devname); - dev->mem_resource[res_idx].addr = mapaddr; - - return 0; - -error: - rte_free(maps[map_idx].path); - return -1; -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c deleted file mode 100644 index 74f91bad..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ /dev/null @@ -1,928 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "eal_filesystem.h" -#include "eal_pci_init.h" -#include "eal_vfio.h" - -/** - * @file - * PCI probing under linux (VFIO version) - * - * This code tries to determine if the PCI device is bound to VFIO driver, - * and initialize it (map BARs, set up interrupts) if that's the case. - * - * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". - */ - -#ifdef VFIO_PRESENT - -#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) -#define PAGE_MASK (~(PAGE_SIZE - 1)) - -static struct rte_tailq_elem rte_vfio_tailq = { - .name = "VFIO_RESOURCE_LIST", -}; -EAL_REGISTER_TAILQ(rte_vfio_tailq) - -#define VFIO_DIR "/dev/vfio" -#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" -#define VFIO_GROUP_FMT "/dev/vfio/%u" -#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) - -/* per-process VFIO config */ -static struct vfio_config vfio_cfg; - -int -pci_vfio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs) -{ - return pread64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); -} - -int -pci_vfio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs) -{ - return pwrite64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); -} - -/* get PCI BAR number where MSI-X interrupts are */ -static int -pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, - uint32_t *msix_table_size) -{ - int ret; - uint32_t reg; - uint16_t flags; - uint8_t cap_id, cap_offset; - - /* read PCI capability pointer from config space */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_CAPABILITY_LIST); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " - "config space!\n"); - return -1; - } - - /* we need first byte */ - cap_offset = reg & 0xFF; - - while (cap_offset) { - - /* read PCI capability ID */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " - "config space!\n"); - return -1; - } - - /* we need first byte */ - cap_id = reg & 0xFF; - - /* if we haven't reached MSI-X, check next capability */ - if (cap_id != PCI_CAP_ID_MSIX) { - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " - "config space!\n"); - return -1; - } - - /* we need second byte */ - cap_offset = (reg & 0xFF00) >> 8; - - continue; - } - /* else, read table offset */ - else { - /* table offset resides in the next 4 bytes */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 4); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " - "space!\n"); - return -1; - } - - ret = pread64(fd, &flags, sizeof(flags), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 2); - if (ret != sizeof(flags)) { - RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " - "space!\n"); - return -1; - } - - *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR; - *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; - *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); - - return 0; - } - } - return 0; -} - -/* set PCI bus mastering */ -static int -pci_vfio_set_bus_master(int dev_fd) -{ - uint16_t reg; - int ret; - - ret = pread64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); - return -1; - } - - /* set the master bit */ - reg |= PCI_COMMAND_MASTER; - - ret = pwrite64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); - return -1; - } - - return 0; -} - -/* set up DMA mappings */ -static int -pci_vfio_setup_dma_maps(int vfio_container_fd) -{ - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - int i, ret; - - ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU, - VFIO_TYPE1_IOMMU); - if (ret) { - RTE_LOG(ERR, EAL, " cannot set IOMMU type, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - - /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - struct vfio_iommu_type1_dma_map dma_map; - - if (ms[i].addr == NULL) - break; - - memset(&dma_map, 0, sizeof(dma_map)); - dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); - dma_map.vaddr = ms[i].addr_64; - dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; - dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; - - ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); - - if (ret) { - RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - } - - return 0; -} - -/* set up interrupt support (but not enable interrupts) */ -static int -pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) -{ - int i, ret, intr_idx; - - /* default to invalid index */ - intr_idx = VFIO_PCI_NUM_IRQS; - - /* get interrupt type from internal config (MSI-X by default, can be - * overriden from the command line - */ - switch (internal_config.vfio_intr_mode) { - case RTE_INTR_MODE_MSIX: - intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; - break; - case RTE_INTR_MODE_MSI: - intr_idx = VFIO_PCI_MSI_IRQ_INDEX; - break; - case RTE_INTR_MODE_LEGACY: - intr_idx = VFIO_PCI_INTX_IRQ_INDEX; - break; - /* don't do anything if we want to automatically determine interrupt type */ - case RTE_INTR_MODE_NONE: - break; - default: - RTE_LOG(ERR, EAL, " unknown default interrupt type!\n"); - return -1; - } - - /* start from MSI-X interrupt type */ - for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { - struct vfio_irq_info irq = { .argsz = sizeof(irq) }; - int fd = -1; - - /* skip interrupt modes we don't want */ - if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE && - i != intr_idx) - continue; - - irq.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); - if (ret < 0) { - RTE_LOG(ERR, EAL, " cannot get IRQ info, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - - /* if this vector cannot be used with eventfd, fail if we explicitly - * specified interrupt type, otherwise continue */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { - if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, - " interrupt vector does not support eventfd!\n"); - return -1; - } else - continue; - } - - /* set up an eventfd for interrupts */ - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - - switch (i) { - case VFIO_PCI_MSIX_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - break; - case VFIO_PCI_MSI_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - break; - case VFIO_PCI_INTX_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - break; - default: - RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); - return -1; - } - - return 0; - } - - /* if we're here, we haven't found a suitable interrupt vector */ - return -1; -} - -/* open container fd or get an existing one */ -int -pci_vfio_get_container_fd(void) -{ - int ret, vfio_container_fd; - - /* if we're in a primary process, try to open the container */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR); - if (vfio_container_fd < 0) { - RTE_LOG(ERR, EAL, " cannot open VFIO container, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - - /* check VFIO API version */ - ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION); - if (ret != VFIO_API_VERSION) { - if (ret < 0) - RTE_LOG(ERR, EAL, " could not get VFIO API version, " - "error %i (%s)\n", errno, strerror(errno)); - else - RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n"); - close(vfio_container_fd); - return -1; - } - - /* check if we support IOMMU type 1 */ - ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU); - if (ret != 1) { - if (ret < 0) - RTE_LOG(ERR, EAL, " could not get IOMMU type, " - "error %i (%s)\n", errno, - strerror(errno)); - else - RTE_LOG(ERR, EAL, " unsupported IOMMU type " - "detected in VFIO\n"); - close(vfio_container_fd); - return -1; - } - - return vfio_container_fd; - } else { - /* - * if we're in a secondary process, request container fd from the - * primary process via our socket - */ - int socket_fd; - - socket_fd = vfio_mp_sync_connect_to_primary(); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); - return -1; - } - vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd); - if (vfio_container_fd < 0) { - RTE_LOG(ERR, EAL, " cannot get container fd!\n"); - close(socket_fd); - return -1; - } - close(socket_fd); - return vfio_container_fd; - } - - return -1; -} - -/* open group fd or get an existing one */ -int -pci_vfio_get_group_fd(int iommu_group_no) -{ - int i; - int vfio_group_fd; - char filename[PATH_MAX]; - - /* check if we already have the group descriptor open */ - for (i = 0; i < vfio_cfg.vfio_group_idx; i++) - if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no) - return vfio_cfg.vfio_groups[i].fd; - - /* if primary, try to open the group */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - snprintf(filename, sizeof(filename), - VFIO_GROUP_FMT, iommu_group_no); - vfio_group_fd = open(filename, O_RDWR); - if (vfio_group_fd < 0) { - /* if file not found, it's not an error */ - if (errno != ENOENT) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, - strerror(errno)); - return -1; - } - return 0; - } - - /* if the fd is valid, create a new group for it */ - if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) { - RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); - return -1; - } - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; - return vfio_group_fd; - } - /* if we're in a secondary process, request group fd from the primary - * process via our socket - */ - else { - int socket_fd, ret; - - socket_fd = vfio_mp_sync_connect_to_primary(); - - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) { - RTE_LOG(ERR, EAL, " cannot send group number!\n"); - close(socket_fd); - return -1; - } - ret = vfio_mp_sync_receive_request(socket_fd); - switch (ret) { - case SOCKET_NO_FD: - close(socket_fd); - return 0; - case SOCKET_OK: - vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); - /* if we got the fd, return it */ - if (vfio_group_fd > 0) { - close(socket_fd); - return vfio_group_fd; - } - /* fall-through on error */ - default: - RTE_LOG(ERR, EAL, " cannot get container fd!\n"); - close(socket_fd); - return -1; - } - } - return -1; -} - -/* parse IOMMU group number for a PCI device - * returns 1 on success, -1 for errors, 0 for non-existent group - */ -static int -pci_vfio_get_group_no(const char *pci_addr, int *iommu_group_no) -{ - char linkname[PATH_MAX]; - char filename[PATH_MAX]; - char *tok[16], *group_tok, *end; - int ret; - - memset(linkname, 0, sizeof(linkname)); - memset(filename, 0, sizeof(filename)); - - /* try to find out IOMMU group for this device */ - snprintf(linkname, sizeof(linkname), - SYSFS_PCI_DEVICES "/%s/iommu_group", pci_addr); - - ret = readlink(linkname, filename, sizeof(filename)); - - /* if the link doesn't exist, no VFIO for us */ - if (ret < 0) - return 0; - - ret = rte_strsplit(filename, sizeof(filename), - tok, RTE_DIM(tok), '/'); - - if (ret <= 0) { - RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", pci_addr); - return -1; - } - - /* IOMMU group is always the last token */ - errno = 0; - group_tok = tok[ret - 1]; - end = group_tok; - *iommu_group_no = strtol(group_tok, &end, 10); - if ((end != group_tok && *end != '\0') || errno != 0) { - RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", pci_addr); - return -1; - } - - return 1; -} - -static void -clear_current_group(void) -{ - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1; -} - - -/* - * map the PCI resources of a PCI device in virtual memory (VFIO version). - * primary and secondary processes follow almost exactly the same path - */ -int -pci_vfio_map_resource(struct rte_pci_device *dev) -{ - struct vfio_group_status group_status = { - .argsz = sizeof(group_status) - }; - struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; - int vfio_group_fd, vfio_dev_fd; - int iommu_group_no; - char pci_addr[PATH_MAX] = {0}; - struct rte_pci_addr *loc = &dev->addr; - int i, ret, msix_bar; - struct mapped_pci_resource *vfio_res = NULL; - struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); - - struct pci_map *maps; - uint32_t msix_table_offset = 0; - uint32_t msix_table_size = 0; - - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - - /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, - loc->domain, loc->bus, loc->devid, loc->function); - - /* get group number */ - ret = pci_vfio_get_group_no(pci_addr, &iommu_group_no); - if (ret == 0) { - RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", - pci_addr); - return 1; - } - - /* if negative, something failed */ - if (ret < 0) - return -1; - - /* get the actual group fd */ - vfio_group_fd = pci_vfio_get_group_fd(iommu_group_no); - if (vfio_group_fd < 0) - return -1; - - /* store group fd */ - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; - - /* if group_fd == 0, that means the device isn't managed by VFIO */ - if (vfio_group_fd == 0) { - RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", - pci_addr); - /* we store 0 as group fd to distinguish between existing but - * unbound VFIO groups, and groups that don't exist at all. - */ - vfio_cfg.vfio_group_idx++; - return 1; - } - - /* - * at this point, we know at least one port on this device is bound to VFIO, - * so we can proceed to try and set this particular port up - */ - - /* check if the group is viable */ - ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status); - if (ret) { - RTE_LOG(ERR, EAL, " %s cannot get group status, " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); - close(vfio_group_fd); - clear_current_group(); - return -1; - } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { - RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", pci_addr); - close(vfio_group_fd); - clear_current_group(); - return -1; - } - - /* - * at this point, we know that this group is viable (meaning, all devices - * are either bound to VFIO or not bound to anything) - */ - - /* check if group does not have a container yet */ - if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) { - - /* add group to a container */ - ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER, - &vfio_cfg.vfio_container_fd); - if (ret) { - RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); - close(vfio_group_fd); - clear_current_group(); - return -1; - } - /* - * at this point we know that this group has been successfully - * initialized, so we increment vfio_group_idx to indicate that we can - * add new groups. - */ - vfio_cfg.vfio_group_idx++; - } - - /* - * set up DMA mappings for container - * - * needs to be done only once, only when at least one group is assigned to - * a container and only in primary process - */ - if (internal_config.process_type == RTE_PROC_PRIMARY && - vfio_cfg.vfio_container_has_dma == 0) { - ret = pci_vfio_setup_dma_maps(vfio_cfg.vfio_container_fd); - if (ret) { - RTE_LOG(ERR, EAL, " %s DMA remapping failed, " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); - return -1; - } - vfio_cfg.vfio_container_has_dma = 1; - } - - /* get a file descriptor for the device */ - vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, pci_addr); - if (vfio_dev_fd < 0) { - /* if we cannot get a device fd, this simply means that this - * particular port is not bound to VFIO - */ - RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", - pci_addr); - return 1; - } - - /* test and setup the device */ - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_INFO, &device_info); - if (ret) { - RTE_LOG(ERR, EAL, " %s cannot get device info, " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); - close(vfio_dev_fd); - return -1; - } - - /* get MSI-X BAR, if any (we have to know where it is because we can't - * easily mmap it when using VFIO) */ - msix_bar = -1; - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar, - &msix_table_offset, &msix_table_size); - if (ret < 0) { - RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); - close(vfio_dev_fd); - return -1; - } - - /* if we're in a primary process, allocate vfio_res and get region info */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - close(vfio_dev_fd); - return -1; - } - memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); - - /* get number of registers (up to BAR5) */ - vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions, - VFIO_PCI_BAR5_REGION_INDEX + 1); - } else { - /* if we're in a secondary process, just find our tailq entry */ - TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) - continue; - break; - } - /* if we haven't found our tailq entry, something's wrong */ - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", - pci_addr); - close(vfio_dev_fd); - return -1; - } - } - - /* map BARs */ - maps = vfio_res->maps; - - for (i = 0; i < (int) vfio_res->nb_maps; i++) { - struct vfio_region_info reg = { .argsz = sizeof(reg) }; - void *bar_addr; - struct memreg { - unsigned long offset, size; - } memreg[2] = {}; - - reg.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); - - if (ret) { - RTE_LOG(ERR, EAL, " %s cannot get device region info " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); - close(vfio_dev_fd); - if (internal_config.process_type == RTE_PROC_PRIMARY) - rte_free(vfio_res); - return -1; - } - - /* skip non-mmapable BARs */ - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) - continue; - - if (i == msix_bar) { - /* - * VFIO will not let us map the MSI-X table, - * but we can map around it. - */ - uint32_t table_start = msix_table_offset; - uint32_t table_end = table_start + msix_table_size; - table_end = (table_end + ~PAGE_MASK) & PAGE_MASK; - table_start &= PAGE_MASK; - - if (table_start == 0 && table_end >= reg.size) { - /* Cannot map this BAR */ - RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i); - continue; - } else { - memreg[0].offset = reg.offset; - memreg[0].size = table_start; - memreg[1].offset = table_end; - memreg[1].size = reg.size - table_end; - - RTE_LOG(DEBUG, EAL, - "Trying to map BAR %d that contains the MSI-X " - "table. Trying offsets: " - "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i, - memreg[0].offset, memreg[0].size, - memreg[1].offset, memreg[1].size); - } - } else { - memreg[0].offset = reg.offset; - memreg[0].size = reg.size; - } - - /* try to figure out an address */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - /* try mapping somewhere close to the end of hugepages */ - if (pci_map_addr == NULL) - pci_map_addr = pci_find_max_end_va(); - - bar_addr = pci_map_addr; - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); - } else { - bar_addr = maps[i].addr; - } - - /* reserve the address using an inaccessible mapping */ - bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (bar_addr != MAP_FAILED) { - void *map_addr = NULL; - if (memreg[0].size) { - /* actual map of first part */ - map_addr = pci_map_resource(bar_addr, vfio_dev_fd, - memreg[0].offset, - memreg[0].size, - MAP_FIXED); - } - - /* if there's a second part, try to map it */ - if (map_addr != MAP_FAILED - && memreg[1].offset && memreg[1].size) { - void *second_addr = RTE_PTR_ADD(bar_addr, memreg[1].offset); - map_addr = pci_map_resource(second_addr, - vfio_dev_fd, memreg[1].offset, - memreg[1].size, - MAP_FIXED); - } - - if (map_addr == MAP_FAILED || !map_addr) { - munmap(bar_addr, reg.size); - bar_addr = MAP_FAILED; - } - } - - if (bar_addr == MAP_FAILED || - (internal_config.process_type == RTE_PROC_SECONDARY && - bar_addr != maps[i].addr)) { - RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, - strerror(errno)); - close(vfio_dev_fd); - if (internal_config.process_type == RTE_PROC_PRIMARY) - rte_free(vfio_res); - return -1; - } - - maps[i].addr = bar_addr; - maps[i].offset = reg.offset; - maps[i].size = reg.size; - maps[i].path = NULL; /* vfio doesn't have per-resource paths */ - dev->mem_resource[i].addr = bar_addr; - } - - /* if secondary process, do not set up interrupts */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { - RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr); - close(vfio_dev_fd); - rte_free(vfio_res); - return -1; - } - - /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd)) { - RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr); - close(vfio_dev_fd); - rte_free(vfio_res); - return -1; - } - - /* Reset the device */ - ioctl(vfio_dev_fd, VFIO_DEVICE_RESET); - } - - if (internal_config.process_type == RTE_PROC_PRIMARY) - TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); - - return 0; -} - -int -pci_vfio_enable(void) -{ - /* initialize group list */ - int i; - int module_vfio_type1; - - for (i = 0; i < VFIO_MAX_GROUPS; i++) { - vfio_cfg.vfio_groups[i].fd = -1; - vfio_cfg.vfio_groups[i].group_no = -1; - } - - module_vfio_type1 = rte_eal_check_module("vfio_iommu_type1"); - - /* return error directly */ - if (module_vfio_type1 == -1) { - RTE_LOG(INFO, EAL, "Could not get loaded module details!\n"); - return -1; - } - - /* return 0 if VFIO modules not loaded */ - if (module_vfio_type1 == 0) { - RTE_LOG(INFO, EAL, "VFIO modules not all loaded, " - "skip VFIO support...\n"); - return 0; - } - - vfio_cfg.vfio_container_fd = pci_vfio_get_container_fd(); - - /* check if we have VFIO driver enabled */ - if (vfio_cfg.vfio_container_fd != -1) - vfio_cfg.vfio_enabled = 1; - else - RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n"); - - return 0; -} - -int -pci_vfio_is_enabled(void) -{ - return vfio_cfg.vfio_enabled; -} -#endif diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c deleted file mode 100644 index d9188fde..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c +++ /dev/null @@ -1,405 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include - -/* sys/un.h with __USE_MISC uses strlen, which is unsafe */ -#ifdef __USE_MISC -#define REMOVED_USE_MISC -#undef __USE_MISC -#endif -#include -/* make sure we redefine __USE_MISC only if it was previously undefined */ -#ifdef REMOVED_USE_MISC -#define __USE_MISC -#undef REMOVED_USE_MISC -#endif - -#include -#include -#include -#include - -#include "eal_filesystem.h" -#include "eal_pci_init.h" -#include "eal_thread.h" - -/** - * @file - * VFIO socket for communication between primary and secondary processes. - * - * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". - */ - -#ifdef VFIO_PRESENT - -#define SOCKET_PATH_FMT "%s/.%s_mp_socket" -#define CMSGLEN (CMSG_LEN(sizeof(int))) -#define FD_TO_CMSGHDR(fd, chdr) \ - do {\ - (chdr).cmsg_len = CMSGLEN;\ - (chdr).cmsg_level = SOL_SOCKET;\ - (chdr).cmsg_type = SCM_RIGHTS;\ - memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\ - } while (0) -#define CMSGHDR_TO_FD(chdr, fd) \ - memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd)) - -static pthread_t socket_thread; -static int mp_socket_fd; - - -/* get socket path (/var/run if root, $HOME otherwise) */ -static void -get_socket_path(char *buffer, int bufsz) -{ - const char *dir = "/var/run"; - const char *home_dir = getenv("HOME"); - - if (getuid() != 0 && home_dir != NULL) - dir = home_dir; - - /* use current prefix as file path */ - snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir, - internal_config.hugefile_prefix); -} - - - -/* - * data flow for socket comm protocol: - * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP - * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number - * 2. server receives message - * 2a. in case of invalid group, SOCKET_ERR is sent back to client - * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client - * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd - * - * in case of any error, socket is closed. - */ - -/* send a request, return -1 on error */ -int -vfio_mp_sync_send_request(int socket, int req) -{ - struct msghdr hdr; - struct iovec iov; - int buf; - int ret; - - memset(&hdr, 0, sizeof(hdr)); - - buf = req; - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - - ret = sendmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - return 0; -} - -/* receive a request and return it */ -int -vfio_mp_sync_receive_request(int socket) -{ - int buf; - struct msghdr hdr; - struct iovec iov; - int ret, req; - - memset(&hdr, 0, sizeof(hdr)); - - buf = SOCKET_ERR; - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - - ret = recvmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - - req = buf; - - return req; -} - -/* send OK in message, fd in control message */ -int -vfio_mp_sync_send_fd(int socket, int fd) -{ - int buf; - struct msghdr hdr; - struct cmsghdr *chdr; - char chdr_buf[CMSGLEN]; - struct iovec iov; - int ret; - - chdr = (struct cmsghdr *) chdr_buf; - memset(chdr, 0, sizeof(chdr_buf)); - memset(&hdr, 0, sizeof(hdr)); - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - hdr.msg_control = chdr; - hdr.msg_controllen = CMSGLEN; - - buf = SOCKET_OK; - FD_TO_CMSGHDR(fd, *chdr); - - ret = sendmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - return 0; -} - -/* receive OK in message, fd in control message */ -int -vfio_mp_sync_receive_fd(int socket) -{ - int buf; - struct msghdr hdr; - struct cmsghdr *chdr; - char chdr_buf[CMSGLEN]; - struct iovec iov; - int ret, req, fd; - - buf = SOCKET_ERR; - - chdr = (struct cmsghdr *) chdr_buf; - memset(chdr, 0, sizeof(chdr_buf)); - memset(&hdr, 0, sizeof(hdr)); - - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - iov.iov_base = (char *) &buf; - iov.iov_len = sizeof(buf); - hdr.msg_control = chdr; - hdr.msg_controllen = CMSGLEN; - - ret = recvmsg(socket, &hdr, 0); - if (ret < 0) - return -1; - - req = buf; - - if (req != SOCKET_OK) - return -1; - - CMSGHDR_TO_FD(*chdr, fd); - - return fd; -} - -/* connect socket_fd in secondary process to the primary process's socket */ -int -vfio_mp_sync_connect_to_primary(void) -{ - struct sockaddr_un addr; - socklen_t sockaddr_len; - int socket_fd; - - /* set up a socket */ - socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, "Failed to create socket!\n"); - return -1; - } - - get_socket_path(addr.sun_path, sizeof(addr.sun_path)); - addr.sun_family = AF_UNIX; - - sockaddr_len = sizeof(struct sockaddr_un); - - if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0) - return socket_fd; - - /* if connect failed */ - close(socket_fd); - return -1; -} - - - -/* - * socket listening thread for primary process - */ -static __attribute__((noreturn)) void * -pci_vfio_mp_sync_thread(void __rte_unused * arg) -{ - int ret, fd, vfio_group_no; - - /* wait for requests on the socket */ - for (;;) { - int conn_sock; - struct sockaddr_un addr; - socklen_t sockaddr_len = sizeof(addr); - - /* this is a blocking call */ - conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr, - &sockaddr_len); - - /* just restart on error */ - if (conn_sock == -1) - continue; - - /* set socket to linger after close */ - struct linger l; - l.l_onoff = 1; - l.l_linger = 60; - setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)); - - ret = vfio_mp_sync_receive_request(conn_sock); - - switch (ret) { - case SOCKET_REQ_CONTAINER: - fd = pci_vfio_get_container_fd(); - if (fd < 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); - else - vfio_mp_sync_send_fd(conn_sock, fd); - break; - case SOCKET_REQ_GROUP: - /* wait for group number */ - vfio_group_no = vfio_mp_sync_receive_request(conn_sock); - if (vfio_group_no < 0) { - close(conn_sock); - continue; - } - - fd = pci_vfio_get_group_fd(vfio_group_no); - - if (fd < 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); - /* if VFIO group exists but isn't bound to VFIO driver */ - else if (fd == 0) - vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); - /* if group exists and is bound to VFIO driver */ - else { - vfio_mp_sync_send_request(conn_sock, SOCKET_OK); - vfio_mp_sync_send_fd(conn_sock, fd); - } - break; - default: - vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); - break; - } - close(conn_sock); - } -} - -static int -vfio_mp_sync_socket_setup(void) -{ - int ret, socket_fd; - struct sockaddr_un addr; - socklen_t sockaddr_len; - - /* set up a socket */ - socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, "Failed to create socket!\n"); - return -1; - } - - get_socket_path(addr.sun_path, sizeof(addr.sun_path)); - addr.sun_family = AF_UNIX; - - sockaddr_len = sizeof(struct sockaddr_un); - - unlink(addr.sun_path); - - ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len); - if (ret) { - RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno)); - close(socket_fd); - return -1; - } - - ret = listen(socket_fd, 50); - if (ret) { - RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno)); - close(socket_fd); - return -1; - } - - /* save the socket in local configuration */ - mp_socket_fd = socket_fd; - - return 0; -} - -/* - * set up a local socket and tell it to listen for incoming connections - */ -int -pci_vfio_mp_sync_setup(void) -{ - int ret; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; - - if (vfio_mp_sync_socket_setup() < 0) { - RTE_LOG(ERR, EAL, "Failed to set up local socket!\n"); - return -1; - } - - ret = pthread_create(&socket_thread, NULL, - pci_vfio_mp_sync_thread, NULL); - if (ret) { - RTE_LOG(ERR, EAL, - "Failed to create thread for communication with secondary processes!\n"); - close(mp_socket_fd); - return -1; - } - - /* Set thread_name for aid in debugging. */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pci-vfio-sync"); - ret = rte_thread_setname(socket_thread, thread_name); - if (ret) - RTE_LOG(ERR, EAL, - "Failed to set thread name for secondary processes!\n"); - - return 0; -} - -#endif diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_thread.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_thread.c deleted file mode 100644 index 18bd8e04..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_thread.c +++ /dev/null @@ -1,199 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "eal_private.h" -#include "eal_thread.h" - -RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY; -RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY; -RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset); - -/* - * Send a message to a slave lcore identified by slave_id to call a - * function f with argument arg. Once the execution is done, the - * remote lcore switch in FINISHED state. - */ -int -rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id) -{ - int n; - char c = 0; - int m2s = lcore_config[slave_id].pipe_master2slave[1]; - int s2m = lcore_config[slave_id].pipe_slave2master[0]; - - if (lcore_config[slave_id].state != WAIT) - return -EBUSY; - - lcore_config[slave_id].f = f; - lcore_config[slave_id].arg = arg; - - /* send message */ - n = 0; - while (n == 0 || (n < 0 && errno == EINTR)) - n = write(m2s, &c, 1); - if (n < 0) - rte_panic("cannot write on configuration pipe\n"); - - /* wait ack */ - do { - n = read(s2m, &c, 1); - } while (n < 0 && errno == EINTR); - - if (n <= 0) - rte_panic("cannot read on configuration pipe\n"); - - return 0; -} - -/* set affinity for current EAL thread */ -static int -eal_thread_set_affinity(void) -{ - unsigned lcore_id = rte_lcore_id(); - - /* acquire system unique id */ - rte_gettid(); - - /* update EAL thread core affinity */ - return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset); -} - -void eal_thread_init_master(unsigned lcore_id) -{ - /* set the lcore ID in per-lcore memory area */ - RTE_PER_LCORE(_lcore_id) = lcore_id; - - /* set CPU affinity */ - if (eal_thread_set_affinity() < 0) - rte_panic("cannot set affinity\n"); -} - -/* main loop of threads */ -__attribute__((noreturn)) void * -eal_thread_loop(__attribute__((unused)) void *arg) -{ - char c; - int n, ret; - unsigned lcore_id; - pthread_t thread_id; - int m2s, s2m; - char cpuset[RTE_CPU_AFFINITY_STR_LEN]; - - thread_id = pthread_self(); - - /* retrieve our lcore_id from the configuration structure */ - RTE_LCORE_FOREACH_SLAVE(lcore_id) { - if (thread_id == lcore_config[lcore_id].thread_id) - break; - } - if (lcore_id == RTE_MAX_LCORE) - rte_panic("cannot retrieve lcore id\n"); - - m2s = lcore_config[lcore_id].pipe_master2slave[0]; - s2m = lcore_config[lcore_id].pipe_slave2master[1]; - - /* set the lcore ID in per-lcore memory area */ - RTE_PER_LCORE(_lcore_id) = lcore_id; - - /* set CPU affinity */ - if (eal_thread_set_affinity() < 0) - rte_panic("cannot set affinity\n"); - - ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); - - RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n", - lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "..."); - - /* read on our pipe to get commands */ - while (1) { - void *fct_arg; - - /* wait command */ - do { - n = read(m2s, &c, 1); - } while (n < 0 && errno == EINTR); - - if (n <= 0) - rte_panic("cannot read on configuration pipe\n"); - - lcore_config[lcore_id].state = RUNNING; - - /* send ack */ - n = 0; - while (n == 0 || (n < 0 && errno == EINTR)) - n = write(s2m, &c, 1); - if (n < 0) - rte_panic("cannot write on configuration pipe\n"); - - if (lcore_config[lcore_id].f == NULL) - rte_panic("NULL function pointer\n"); - - /* call the function and store the return value */ - fct_arg = lcore_config[lcore_id].arg; - ret = lcore_config[lcore_id].f(fct_arg); - lcore_config[lcore_id].ret = ret; - rte_wmb(); - lcore_config[lcore_id].state = FINISHED; - } - - /* never reached */ - /* pthread_exit(NULL); */ - /* return NULL; */ -} - -/* require calling thread tid by gettid() */ -int rte_sys_gettid(void) -{ - return (int)syscall(SYS_gettid); -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_timer.c b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_timer.c deleted file mode 100644 index 9ceff330..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_timer.c +++ /dev/null @@ -1,304 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * Copyright(c) 2012-2013 6WIND S.A. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "eal_private.h" -#include "eal_internal_cfg.h" - -enum timer_source eal_timer_source = EAL_TIMER_HPET; - -#ifdef RTE_LIBEAL_USE_HPET - -#define DEV_HPET "/dev/hpet" - -/* Maximum number of counters. */ -#define HPET_TIMER_NUM 3 - -/* General capabilities register */ -#define CLK_PERIOD_SHIFT 32 /* Clock period shift. */ -#define CLK_PERIOD_MASK 0xffffffff00000000ULL /* Clock period mask. */ - -/** - * HPET timer registers. From the Intel IA-PC HPET (High Precision Event - * Timers) Specification. - */ -struct eal_hpet_regs { - /* Memory-mapped, software visible registers */ - uint64_t capabilities; /**< RO General Capabilities Register. */ - uint64_t reserved0; /**< Reserved for future use. */ - uint64_t config; /**< RW General Configuration Register. */ - uint64_t reserved1; /**< Reserved for future use. */ - uint64_t isr; /**< RW Clear General Interrupt Status. */ - uint64_t reserved2[25]; /**< Reserved for future use. */ - union { - uint64_t counter; /**< RW Main Counter Value Register. */ - struct { - uint32_t counter_l; /**< RW Main Counter Low. */ - uint32_t counter_h; /**< RW Main Counter High. */ - }; - }; - uint64_t reserved3; /**< Reserved for future use. */ - struct { - uint64_t config; /**< RW Timer Config and Capability Reg. */ - uint64_t comp; /**< RW Timer Comparator Value Register. */ - uint64_t fsb; /**< RW FSB Interrupt Route Register. */ - uint64_t reserved4; /**< Reserved for future use. */ - } timers[HPET_TIMER_NUM]; /**< Set of HPET timers. */ -}; - -/* Mmap'd hpet registers */ -static volatile struct eal_hpet_regs *eal_hpet = NULL; - -/* Period at which the HPET counter increments in - * femtoseconds (10^-15 seconds). */ -static uint32_t eal_hpet_resolution_fs = 0; - -/* Frequency of the HPET counter in Hz */ -static uint64_t eal_hpet_resolution_hz = 0; - -/* Incremented 4 times during one 32bits hpet full count */ -static uint32_t eal_hpet_msb; - -static pthread_t msb_inc_thread_id; - -/* - * This function runs on a specific thread to update a global variable - * containing used to process MSB of the HPET (unfortunatelly, we need - * this because hpet is 32 bits by default under linux). - */ -static void -hpet_msb_inc(__attribute__((unused)) void *arg) -{ - uint32_t t; - - while (1) { - t = (eal_hpet->counter_l >> 30); - if (t != (eal_hpet_msb & 3)) - eal_hpet_msb ++; - sleep(10); - } -} - -uint64_t -rte_get_hpet_hz(void) -{ - if(internal_config.no_hpet) - rte_panic("Error, HPET called, but no HPET present\n"); - - return eal_hpet_resolution_hz; -} - -uint64_t -rte_get_hpet_cycles(void) -{ - uint32_t t, msb; - uint64_t ret; - - if(internal_config.no_hpet) - rte_panic("Error, HPET called, but no HPET present\n"); - - t = eal_hpet->counter_l; - msb = eal_hpet_msb; - ret = (msb + 2 - (t >> 30)) / 4; - ret <<= 32; - ret += t; - return ret; -} - -#endif - -#ifdef RTE_LIBEAL_USE_HPET -/* - * Open and mmap /dev/hpet (high precision event timer) that will - * provide our time reference. - */ -int -rte_eal_hpet_init(int make_default) -{ - int fd, ret; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; - - if (internal_config.no_hpet) { - RTE_LOG(NOTICE, EAL, "HPET is disabled\n"); - return -1; - } - - fd = open(DEV_HPET, O_RDONLY); - if (fd < 0) { - RTE_LOG(ERR, EAL, "ERROR: Cannot open "DEV_HPET": %s!\n", - strerror(errno)); - internal_config.no_hpet = 1; - return -1; - } - eal_hpet = mmap(NULL, 1024, PROT_READ, MAP_SHARED, fd, 0); - if (eal_hpet == MAP_FAILED) { - RTE_LOG(ERR, EAL, "ERROR: Cannot mmap "DEV_HPET"!\n" - "Please enable CONFIG_HPET_MMAP in your kernel configuration " - "to allow HPET support.\n" - "To run without using HPET, set CONFIG_RTE_LIBEAL_USE_HPET=n " - "in your build configuration or use '--no-hpet' EAL flag.\n"); - close(fd); - internal_config.no_hpet = 1; - return -1; - } - close(fd); - - eal_hpet_resolution_fs = (uint32_t)((eal_hpet->capabilities & - CLK_PERIOD_MASK) >> - CLK_PERIOD_SHIFT); - - eal_hpet_resolution_hz = (1000ULL*1000ULL*1000ULL*1000ULL*1000ULL) / - (uint64_t)eal_hpet_resolution_fs; - - RTE_LOG(INFO, EAL, "HPET frequency is ~%"PRIu64" kHz\n", - eal_hpet_resolution_hz/1000); - - eal_hpet_msb = (eal_hpet->counter_l >> 30); - - /* create a thread that will increment a global variable for - * msb (hpet is 32 bits by default under linux) */ - ret = pthread_create(&msb_inc_thread_id, NULL, - (void *(*)(void *))hpet_msb_inc, NULL); - if (ret != 0) { - RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n"); - internal_config.no_hpet = 1; - return -1; - } - - /* - * Set thread_name for aid in debugging. - */ - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc"); - ret = rte_thread_setname(msb_inc_thread_id, thread_name); - if (ret != 0) - RTE_LOG(ERR, EAL, - "ERROR: Cannot set HPET timer thread name!\n"); - - if (make_default) - eal_timer_source = EAL_TIMER_HPET; - return 0; -} -#endif - -static void -check_tsc_flags(void) -{ - char line[512]; - FILE *stream; - - stream = fopen("/proc/cpuinfo", "r"); - if (!stream) { - RTE_LOG(WARNING, EAL, "WARNING: Unable to open /proc/cpuinfo\n"); - return; - } - - while (fgets(line, sizeof line, stream)) { - char *constant_tsc; - char *nonstop_tsc; - - if (strncmp(line, "flags", 5) != 0) - continue; - - constant_tsc = strstr(line, "constant_tsc"); - nonstop_tsc = strstr(line, "nonstop_tsc"); - if (!constant_tsc || !nonstop_tsc) - RTE_LOG(WARNING, EAL, - "WARNING: cpu flags " - "constant_tsc=%s " - "nonstop_tsc=%s " - "-> using unreliable clock cycles !\n", - constant_tsc ? "yes":"no", - nonstop_tsc ? "yes":"no"); - break; - } - - fclose(stream); -} - -uint64_t -get_tsc_freq(void) -{ -#ifdef CLOCK_MONOTONIC_RAW -#define NS_PER_SEC 1E9 - - struct timespec sleeptime = {.tv_nsec = 5E8 }; /* 1/2 second */ - - struct timespec t_start, t_end; - uint64_t tsc_hz; - - if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) { - uint64_t ns, end, start = rte_rdtsc(); - nanosleep(&sleeptime,NULL); - clock_gettime(CLOCK_MONOTONIC_RAW, &t_end); - end = rte_rdtsc(); - ns = ((t_end.tv_sec - t_start.tv_sec) * NS_PER_SEC); - ns += (t_end.tv_nsec - t_start.tv_nsec); - - double secs = (double)ns/NS_PER_SEC; - tsc_hz = (uint64_t)((end - start)/secs); - return tsc_hz; - } -#endif - return 0; -} - -int -rte_eal_timer_init(void) -{ - - eal_timer_source = EAL_TIMER_TSC; - - set_tsc_freq(); - check_tsc_flags(); - return 0; -} diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_vfio.h b/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_vfio.h deleted file mode 100644 index 72ec3f62..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ /dev/null @@ -1,59 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef EAL_VFIO_H_ -#define EAL_VFIO_H_ - -/* - * determine if VFIO is present on the system - */ -#ifdef RTE_EAL_VFIO -#include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) -#include - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) -#define RTE_PCI_MSIX_TABLE_BIR 0x7 -#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 -#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff -#else -#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR -#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET -#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE -#endif - -#define VFIO_PRESENT -#endif /* kernel version */ -#endif /* RTE_EAL_VFIO */ - -#endif /* EAL_VFIO_H_ */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h deleted file mode 100644 index d9707780..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h +++ /dev/null @@ -1,108 +0,0 @@ -/*- - * This file is provided under a dual BSD/LGPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GNU LESSER GENERAL PUBLIC LICENSE - * - * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * Contact Information: - * Intel Corporation - * - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef _RTE_DOM0_COMMON_H_ -#define _RTE_DOM0_COMMON_H_ - -#ifdef __KERNEL__ -#include -#endif - -#define DOM0_NAME_MAX 256 -#define DOM0_MM_DEV "/dev/dom0_mm" - -#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */ -#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ -#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */ -#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ -#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ - -#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) -#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) -#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) -#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) - -/** - * A structure used to store memory information. - */ -struct memory_info { - char name[DOM0_NAME_MAX]; - uint64_t size; -}; - -/** - * A structure used to store memory segment information. - */ -struct memseg_info { - uint32_t idx; - uint64_t pfn; - uint64_t size; - uint64_t mfn[DOM0_NUM_MEMBLOCK]; -}; - -/** - * A structure used to store memory block information. - */ -struct memblock_info { - uint8_t exchange_flag; - uint8_t used; - uint64_t vir_addr; - uint64_t pfn; - uint64_t mfn; -}; -#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h deleted file mode 100644 index 3dacbff8..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ /dev/null @@ -1,228 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_INTERRUPTS_H_ -#error "don't include this file directly, please include generic " -#endif - -#ifndef _RTE_LINUXAPP_INTERRUPTS_H_ -#define _RTE_LINUXAPP_INTERRUPTS_H_ - -#define RTE_MAX_RXTX_INTR_VEC_ID 32 -#define RTE_INTR_VEC_ZERO_OFFSET 0 -#define RTE_INTR_VEC_RXTX_OFFSET 1 - -enum rte_intr_handle_type { - RTE_INTR_HANDLE_UNKNOWN = 0, - RTE_INTR_HANDLE_UIO, /**< uio device handle */ - RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */ - RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ - RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */ - RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */ - RTE_INTR_HANDLE_ALARM, /**< alarm handle */ - RTE_INTR_HANDLE_EXT, /**< external handler */ - RTE_INTR_HANDLE_MAX -}; - -#define RTE_INTR_EVENT_ADD 1UL -#define RTE_INTR_EVENT_DEL 2UL - -typedef void (*rte_intr_event_cb_t)(int fd, void *arg); - -struct rte_epoll_data { - uint32_t event; /**< event type */ - void *data; /**< User data */ - rte_intr_event_cb_t cb_fun; /**< IN: callback fun */ - void *cb_arg; /**< IN: callback arg */ -}; - -enum { - RTE_EPOLL_INVALID = 0, - RTE_EPOLL_VALID, - RTE_EPOLL_EXEC, -}; - -/** interrupt epoll event obj, taken by epoll_event.ptr */ -struct rte_epoll_event { - volatile uint32_t status; /**< OUT: event status */ - int fd; /**< OUT: event fd */ - int epfd; /**< OUT: epoll instance the ev associated with */ - struct rte_epoll_data epdata; -}; - -/** Handle for interrupts. */ -struct rte_intr_handle { - union { - int vfio_dev_fd; /**< VFIO device file descriptor */ - int uio_cfg_fd; /**< UIO config file descriptor - for uio_pci_generic */ - }; - int fd; /**< interrupt event file descriptor */ - enum rte_intr_handle_type type; /**< handle type */ - uint32_t max_intr; /**< max interrupt requested */ - uint32_t nb_efd; /**< number of available efd(event fd) */ - int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */ - struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID]; - /**< intr vector epoll event */ - int *intr_vec; /**< intr vector number array */ -}; - -#define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */ - -/** - * It waits for events on the epoll instance. - * - * @param epfd - * Epoll instance fd on which the caller wait for events. - * @param events - * Memory area contains the events that will be available for the caller. - * @param maxevents - * Up to maxevents are returned, must greater than zero. - * @param timeout - * Specifying a timeout of -1 causes a block indefinitely. - * Specifying a timeout equal to zero cause to return immediately. - * @return - * - On success, returns the number of available event. - * - On failure, a negative value. - */ -int -rte_epoll_wait(int epfd, struct rte_epoll_event *events, - int maxevents, int timeout); - -/** - * It performs control operations on epoll instance referred by the epfd. - * It requests that the operation op be performed for the target fd. - * - * @param epfd - * Epoll instance fd on which the caller perform control operations. - * @param op - * The operation be performed for the target fd. - * @param fd - * The target fd on which the control ops perform. - * @param event - * Describes the object linked to the fd. - * Note: The caller must take care the object deletion after CTL_DEL. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -rte_epoll_ctl(int epfd, int op, int fd, - struct rte_epoll_event *event); - -/** - * The function returns the per thread epoll instance. - * - * @return - * epfd the epoll instance referred to. - */ -int -rte_intr_tls_epfd(void); - -/** - * @param intr_handle - * Pointer to the interrupt handle. - * @param epfd - * Epoll instance fd which the intr vector associated to. - * @param op - * The operation be performed for the vector. - * Operation type of {ADD, DEL}. - * @param vec - * RX intr vector number added to the epoll instance wait list. - * @param data - * User raw data. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, - int epfd, int op, unsigned int vec, void *data); - -/** - * It enables the packet I/O interrupt event if it's necessary. - * It creates event fd for each interrupt vector when MSIX is used, - * otherwise it multiplexes a single event fd. - * - * @param intr_handle - * Pointer to the interrupt handle. - * @param nb_efd - * Number of interrupt vector trying to enable. - * The value 0 is not allowed. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd); - -/** - * It disables the packet I/O interrupt event. - * It deletes registered eventfds and closes the open fds. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -void -rte_intr_efd_disable(struct rte_intr_handle *intr_handle); - -/** - * The packet I/O interrupt on datapath is enabled or not. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int -rte_intr_dp_is_en(struct rte_intr_handle *intr_handle); - -/** - * The interrupt handle instance allows other causes or not. - * Other causes stand for any none packet I/O interrupts. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int -rte_intr_allow_others(struct rte_intr_handle *intr_handle); - -/** - * The multiple interrupt vector capability of interrupt handle instance. - * It returns zero if no multiple interrupt vector support. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int -rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); - -#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h deleted file mode 100644 index bd1cc094..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h +++ /dev/null @@ -1,174 +0,0 @@ -/*- - * This file is provided under a dual BSD/LGPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GNU LESSER GENERAL PUBLIC LICENSE - * - * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * Contact Information: - * Intel Corporation - * - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef _RTE_KNI_COMMON_H_ -#define _RTE_KNI_COMMON_H_ - -#ifdef __KERNEL__ -#include -#endif - -/** - * KNI name is part of memzone name. - */ -#define RTE_KNI_NAMESIZE 32 - -#ifndef RTE_CACHE_LINE_SIZE -#define RTE_CACHE_LINE_SIZE 64 /**< Cache line size. */ -#endif - -/* - * Request id. - */ -enum rte_kni_req_id { - RTE_KNI_REQ_UNKNOWN = 0, - RTE_KNI_REQ_CHANGE_MTU, - RTE_KNI_REQ_CFG_NETWORK_IF, - RTE_KNI_REQ_MAX, -}; - -/* - * Structure for KNI request. - */ -struct rte_kni_request { - uint32_t req_id; /**< Request id */ - union { - uint32_t new_mtu; /**< New MTU */ - uint8_t if_up; /**< 1: interface up, 0: interface down */ - }; - int32_t result; /**< Result for processing request */ -} __attribute__((__packed__)); - -/* - * Fifo struct mapped in a shared memory. It describes a circular buffer FIFO - * Write and read should wrap around. Fifo is empty when write == read - * Writing should never overwrite the read position - */ -struct rte_kni_fifo { - volatile unsigned write; /**< Next position to be written*/ - volatile unsigned read; /**< Next position to be read */ - unsigned len; /**< Circular buffer length */ - unsigned elem_size; /**< Pointer size - for 32/64 bit OS */ - void * volatile buffer[0]; /**< The buffer contains mbuf pointers */ -}; - -/* - * The kernel image of the rte_mbuf struct, with only the relevant fields. - * Padding is necessary to assure the offsets of these fields - */ -struct rte_kni_mbuf { - void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); - char pad0[10]; - uint16_t data_off; /**< Start address of data in segment buffer. */ - char pad1[4]; - uint64_t ol_flags; /**< Offload features. */ - char pad2[4]; - uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ - uint16_t data_len; /**< Amount of data in segment buffer. */ - - /* fields on second cache line */ - char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); - void *pool; - void *next; -}; - -/* - * Struct used to create a KNI device. Passed to the kernel in IOCTL call - */ - -struct rte_kni_device_info { - char name[RTE_KNI_NAMESIZE]; /**< Network device name for KNI */ - - phys_addr_t tx_phys; - phys_addr_t rx_phys; - phys_addr_t alloc_phys; - phys_addr_t free_phys; - - /* Used by Ethtool */ - phys_addr_t req_phys; - phys_addr_t resp_phys; - phys_addr_t sync_phys; - void * sync_va; - - /* mbuf mempool */ - void * mbuf_va; - phys_addr_t mbuf_phys; - - /* PCI info */ - uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ - uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ - uint8_t bus; /**< Device bus */ - uint8_t devid; /**< Device ID */ - uint8_t function; /**< Device function. */ - - uint16_t group_id; /**< Group ID */ - uint32_t core_id; /**< core ID to bind for kernel thread */ - - uint8_t force_bind : 1; /**< Flag for kernel thread binding */ - - /* mbuf size */ - unsigned mbuf_size; -}; - -#define KNI_DEVICE "kni" - -#define RTE_KNI_IOCTL_TEST _IOWR(0, 1, int) -#define RTE_KNI_IOCTL_CREATE _IOWR(0, 2, struct rte_kni_device_info) -#define RTE_KNI_IOCTL_RELEASE _IOWR(0, 3, struct rte_kni_device_info) - -#endif /* _RTE_KNI_COMMON_H_ */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/igb_uio/compat.h b/src/dpdk22/lib/librte_eal/linuxapp/igb_uio/compat.h deleted file mode 100644 index c1d45a66..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/igb_uio/compat.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Minimal wrappers to allow compiling igb_uio on older kernels. - */ - -#ifndef RHEL_RELEASE_VERSION -#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) -#define pci_cfg_access_lock pci_block_user_cfg_access -#define pci_cfg_access_unlock pci_unblock_user_cfg_access -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) -#define HAVE_PTE_MASK_PAGE_IOMAP -#endif - -#ifndef PCI_MSIX_ENTRY_SIZE -#define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR 0 -#define PCI_MSIX_ENTRY_UPPER_ADDR 4 -#define PCI_MSIX_ENTRY_DATA 8 -#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \ - (!(defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9))) - -static int pci_num_vf(struct pci_dev *dev) -{ - struct iov { - int pos; - int nres; - u32 cap; - u16 ctrl; - u16 total; - u16 initial; - u16 nr_virtfn; - } *iov = (struct iov *)dev->sriov; - - if (!dev->is_physfn) - return 0; - - return iov->nr_virtfn; -} - -#endif /* < 2.6.34 */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ - (!(defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) - -#define kstrtoul strict_strtoul - -#endif /* < 2.6.39 */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) && \ - (!(defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 3))) - -/* Check if INTX works to control irq's. - * Set's INTX_DISABLE flag and reads it back - */ -static bool pci_intx_mask_supported(struct pci_dev *pdev) -{ - bool mask_supported = false; - uint16_t orig, new; - - pci_block_user_cfg_access(pdev); - pci_read_config_word(pdev, PCI_COMMAND, &orig); - pci_write_config_word(pdev, PCI_COMMAND, - orig ^ PCI_COMMAND_INTX_DISABLE); - pci_read_config_word(pdev, PCI_COMMAND, &new); - - if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) { - dev_err(&pdev->dev, "Command register changed from " - "0x%x to 0x%x: driver or hardware bug?\n", orig, new); - } else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) { - mask_supported = true; - pci_write_config_word(pdev, PCI_COMMAND, orig); - } - pci_unblock_user_cfg_access(pdev); - - return mask_supported; -} - -static bool pci_check_and_mask_intx(struct pci_dev *pdev) -{ - bool pending; - uint32_t status; - - pci_block_user_cfg_access(pdev); - pci_read_config_dword(pdev, PCI_COMMAND, &status); - - /* interrupt is not ours, goes to out */ - pending = (((status >> 16) & PCI_STATUS_INTERRUPT) != 0); - if (pending) { - uint16_t old, new; - - old = status; - if (status != 0) - new = old & (~PCI_COMMAND_INTX_DISABLE); - else - new = old | PCI_COMMAND_INTX_DISABLE; - - if (old != new) - pci_write_config_word(pdev, PCI_COMMAND, new); - } - pci_unblock_user_cfg_access(pdev); - - return pending; -} - -#endif /* < 3.3.0 */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/compat.h b/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/compat.h deleted file mode 100644 index e6eb97f2..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/compat.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Minimal wrappers to allow compiling xen_dom0 on older kernels. - */ - -#ifndef RHEL_RELEASE_VERSION -#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ - (!(defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) - -#define kstrtoul strict_strtoul - -#endif /* < 2.6.39 */ diff --git a/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h deleted file mode 100644 index 9d5ffb22..00000000 --- a/src/dpdk22/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h +++ /dev/null @@ -1,107 +0,0 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -#ifndef _DOM0_MM_DEV_H_ -#define _DOM0_MM_DEV_H_ - -#include -#include -#include -#include -#include - -#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/ -#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/ -#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE) -#define MAX_NUM_ORDER (DOM0_CONTIG_NUM_ORDER + 1) -#define SIZE_PER_BLOCK 2 /**< Size of memory block (2MB).*/ - -/** - * A structure describing the private information for a dom0 device. - */ -struct dom0_mm_dev { - struct miscdevice miscdev; - uint8_t fail_times; - uint32_t used_memsize; - uint32_t num_mem_ctx; - uint32_t config_memsize; - uint32_t num_bigblock; - struct dom0_mm_data *mm_data[NUM_MEM_CTX]; - struct mutex data_lock; -}; - -struct dom0_mm_data{ - uint32_t refcnt; - uint32_t num_memseg; /**< Number of memory segment. */ - uint32_t mem_size; /**< Size of requesting memory. */ - - char name[DOM0_NAME_MAX]; - - /** Store global memory block IDs used by an instance */ - uint32_t block_num[DOM0_NUM_MEMBLOCK]; - - /** Store memory block information.*/ - struct memblock_info block_info[DOM0_NUM_MEMBLOCK]; - - /** Store memory segment information.*/ - struct memseg_info seg_info[DOM0_NUM_MEMSEG]; -}; - -#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args) -#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args) -#endif -- cgit 1.2.3-korg