diff options
author | Luca Boccassi <luca.boccassi@gmail.com> | 2017-11-08 14:15:11 +0000 |
---|---|---|
committer | Luca Boccassi <luca.boccassi@gmail.com> | 2017-11-08 14:45:54 +0000 |
commit | 055c52583a2794da8ba1e85a48cce3832372b12f (patch) | |
tree | 8ceb1cb78fbb46a0f341f8ee24feb3c6b5540013 /lib/librte_eal | |
parent | f239aed5e674965691846e8ce3f187dd47523689 (diff) |
New upstream version 17.11-rc3
Change-Id: I6a5baa40612fe0c20f30b5fa773a6cbbac63a685
Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'lib/librte_eal')
89 files changed, 2353 insertions, 7634 deletions
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index 005019ed..afa117de 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -46,16 +46,15 @@ LDLIBS += -lexecinfo LDLIBS += -lpthread LDLIBS += -lgcc_s -EXPORT_MAP := rte_eal_version.map +EXPORT_MAP := ../../rte_eal_version.map -LIBABIVER := 5 +LIBABIVER := 6 # specific to bsdapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c -SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c @@ -68,9 +67,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c -SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_vdev.c -SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c -SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c @@ -92,6 +88,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c +SRCS-y += rte_cycles.c CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST) @@ -107,7 +104,7 @@ CFLAGS_eal_thread.o += -Wno-return-type CFLAGS_eal_hpet.o += -Wno-return-type endif -INC := rte_interrupts.h +INC := # no bsdapp specific headers SYMLINK-$(CONFIG_RTE_EXEC_ENV_BSDAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 5fa59884..369a682a 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -51,7 +51,6 @@ #include <rte_common.h> #include <rte_debug.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> @@ -66,7 +65,6 @@ #include <rte_cpuflags.h> #include <rte_interrupts.h> #include <rte_bus.h> -#include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> #include <rte_version.h> @@ -112,6 +110,13 @@ struct internal_config internal_config; /* used by rte_rdtsc() */ int rte_cycles_vmware_tsc_map; +/* Return mbuf pool ops name */ +const char * +rte_eal_mbuf_default_mempool_ops(void) +{ + return internal_config.mbuf_pool_ops_name; +} + /* Return a pointer to the configuration structure */ struct rte_config * rte_eal_get_configuration(void) @@ -119,6 +124,12 @@ rte_eal_get_configuration(void) return &rte_config; } +enum rte_iova_mode +rte_eal_iova_mode(void) +{ + return rte_eal_get_configuration()->iova_mode; +} + /* parse a sysfs (or other) file containing one integer value */ int eal_parse_sysfs_value(const char *filename, unsigned long *val) @@ -385,6 +396,9 @@ eal_parse_args(int argc, char **argv) continue; switch (opt) { + case OPT_MBUF_POOL_OPS_NAME_NUM: + internal_config.mbuf_pool_ops_name = optarg; + break; case 'h': eal_usage(prgname); exit(EXIT_SUCCESS); @@ -535,6 +549,29 @@ rte_eal_init(int argc, char **argv) return -1; } + if (eal_plugins_init() < 0) { + rte_eal_init_alert("Cannot init plugins\n"); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } + + if (eal_option_device_parse()) { + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + /* autodetect the iova mapping mode (default is iova_pa) */ + rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class(); + if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && eal_hugepage_info_init() < 0) { @@ -603,9 +640,6 @@ rte_eal_init(int argc, char **argv) eal_check_mem_on_local_socket(); - if (eal_plugins_init() < 0) - rte_eal_init_alert("Cannot init plugins\n"); - eal_thread_init_master(rte_config.master_lcore); ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); @@ -614,17 +648,6 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, thread_id, cpuset, ret == 0 ? "" : "..."); - if (eal_option_device_parse()) { - rte_errno = ENODEV; - return -1; - } - - if (rte_bus_scan()) { - rte_eal_init_alert("Cannot scan the buses for devices\n"); - rte_errno = ENODEV; - return -1; - } - RTE_LCORE_FOREACH_SLAVE(i) { /* @@ -698,3 +721,60 @@ rte_eal_process_type(void) { return rte_config.process_type; } + +int rte_eal_has_pci(void) +{ + return !internal_config.no_pci; +} + +int rte_eal_create_uio_dev(void) +{ + return internal_config.create_uio_dev; +} + +enum rte_intr_mode +rte_eal_vfio_intr_mode(void) +{ + return RTE_INTR_MODE_NONE; +} + +/* dummy forward declaration. */ +struct vfio_device_info; + +/* dummy prototypes. */ +int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, + int *vfio_dev_fd, struct vfio_device_info *device_info); +int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); +int rte_vfio_enable(const char *modname); +int rte_vfio_is_enabled(const char *modname); +int rte_vfio_noiommu_is_enabled(void); + +int rte_vfio_setup_device(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int *vfio_dev_fd, + __rte_unused struct vfio_device_info *device_info) +{ + return -1; +} + +int rte_vfio_release_device(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int fd) +{ + return -1; +} + +int rte_vfio_enable(__rte_unused const char *modname) +{ + return -1; +} + +int rte_vfio_is_enabled(__rte_unused const char *modname) +{ + return 0; +} + +int rte_vfio_noiommu_is_enabled(void) +{ + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c index ea2afff4..deba8770 100644 --- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c +++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c @@ -125,3 +125,38 @@ rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) RTE_SET_USED(intr_handle); return 0; } + +int +rte_epoll_wait(int epfd, struct rte_epoll_event *events, + int maxevents, int timeout) +{ + RTE_SET_USED(epfd); + RTE_SET_USED(events); + RTE_SET_USED(maxevents); + RTE_SET_USED(timeout); + + return -ENOTSUP; +} + +int +rte_epoll_ctl(int epfd, int op, int fd, struct rte_epoll_event *event) +{ + RTE_SET_USED(epfd); + RTE_SET_USED(op); + RTE_SET_USED(fd); + RTE_SET_USED(event); + + return -ENOTSUP; +} + +int +rte_intr_tls_epfd(void) +{ + return -ENOTSUP; +} + +void +rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle) +{ + RTE_SET_USED(intr_handle); +} diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c index 3614da8d..6ba05857 100644 --- a/lib/librte_eal/bsdapp/eal/eal_memory.c +++ b/lib/librte_eal/bsdapp/eal/eal_memory.c @@ -54,9 +54,14 @@ phys_addr_t rte_mem_virt2phy(const void *virtaddr) { /* XXX not implemented. This function is only used by - * rte_mempool_virt2phy() when hugepages are disabled. */ + * rte_mempool_virt2iova() when hugepages are disabled. */ (void)virtaddr; - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; +} +rte_iova_t +rte_mem_virt2iova(const void *virtaddr) +{ + return rte_mem_virt2phy(virtaddr); } int @@ -73,7 +78,7 @@ rte_eal_hugepage_init(void) /* for debug purposes, hugetlbfs can be disabled */ if (internal_config.no_hugetlbfs) { addr = malloc(internal_config.memory); - mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; + mcfg->memseg[0].iova = (rte_iova_t)(uintptr_t)addr; mcfg->memseg[0].addr = addr; mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; mcfg->memseg[0].len = internal_config.memory; @@ -88,7 +93,7 @@ rte_eal_hugepage_init(void) hpi = &internal_config.hugepage_info[i]; for (j = 0; j < hpi->num_pages[0]; j++) { struct rte_memseg *seg; - uint64_t physaddr; + rte_iova_t physaddr; int error; size_t sysctl_size = sizeof(physaddr); char physaddr_str[64]; @@ -114,7 +119,7 @@ rte_eal_hugepage_init(void) seg = &mcfg->memseg[seg_idx++]; seg->addr = addr; - seg->phys_addr = physaddr; + seg->iova = physaddr; seg->hugepage_sz = hpi->hugepage_sz; seg->len = hpi->hugepage_sz; seg->nchannel = mcfg->nchannel; @@ -192,3 +197,9 @@ error: close(fd_hugepage); return -1; } + +int +rte_eal_using_phys_addrs(void) +{ + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c deleted file mode 100644 index 04eacdcc..00000000 --- a/lib/librte_eal/bsdapp/eal/eal_pci.c +++ /dev/null @@ -1,670 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <ctype.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#include <unistd.h> -#include <inttypes.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <dirent.h> -#include <limits.h> -#include <sys/queue.h> -#include <sys/mman.h> -#include <sys/ioctl.h> -#include <sys/pciio.h> -#include <dev/pci/pcireg.h> - -#if defined(RTE_ARCH_X86) -#include <machine/cpufunc.h> -#endif - -#include <rte_interrupts.h> -#include <rte_log.h> -#include <rte_pci.h> -#include <rte_common.h> -#include <rte_launch.h> -#include <rte_memory.h> -#include <rte_memzone.h> -#include <rte_eal.h> -#include <rte_eal_memconfig.h> -#include <rte_per_lcore.h> -#include <rte_lcore.h> -#include <rte_malloc.h> -#include <rte_string_fns.h> -#include <rte_debug.h> -#include <rte_devargs.h> - -#include "eal_filesystem.h" -#include "eal_private.h" - -/** - * @file - * PCI probing under linux - * - * This code is used to simulate a PCI probe by parsing information in - * sysfs. Moreover, when a registered driver matches a device, the - * kernel driver currently using it is unloaded and replaced by - * igb_uio module, which is a very minimal userland driver for Intel - * network card, only providing access to PCI BAR to applications, and - * enabling bus master. - */ - -extern struct rte_pci_bus rte_pci_bus; - -/* Map pci device */ -int -rte_pci_map_device(struct rte_pci_device *dev) -{ - int ret = -1; - - /* try mapping the NIC resources */ - switch (dev->kdrv) { - case RTE_KDRV_NIC_UIO: - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - ret = 1; - break; - } - - return ret; -} - -/* Unmap pci device */ -void -rte_pci_unmap_device(struct rte_pci_device *dev) -{ - /* try unmapping the NIC resources */ - switch (dev->kdrv) { - case RTE_KDRV_NIC_UIO: - /* unmap resources for devices that use uio */ - pci_uio_unmap_resource(dev); - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - break; - } -} - -void -pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res) -{ - rte_free(uio_res); - - if (dev->intr_handle.fd) { - close(dev->intr_handle.fd); - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - } -} - -int -pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res) -{ - char devname[PATH_MAX]; /* contains the /dev/uioX */ - struct rte_pci_addr *loc; - - loc = &dev->addr; - - snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u", - dev->addr.bus, dev->addr.devid, dev->addr.function); - - if (access(devname, O_RDWR) < 0) { - RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " - "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); - return 1; - } - - /* save fd if in primary process */ - dev->intr_handle.fd = open(devname, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - - /* allocate the mapping details for secondary processes*/ - *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); - if (*uio_res == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - goto error; - } - - snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); - memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); - - return 0; - -error: - pci_uio_free_resource(dev, *uio_res); - return -1; -} - -int -pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx) -{ - int fd; - char *devname; - void *mapaddr; - uint64_t offset; - uint64_t pagesz; - struct pci_map *maps; - - maps = uio_res->maps; - devname = uio_res->path; - pagesz = sysconf(_SC_PAGESIZE); - - /* allocate memory to keep path */ - maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); - if (maps[map_idx].path == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", - strerror(errno)); - return -1; - } - - /* - * open resource file, to mmap it - */ - fd = open(devname, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - - /* if matching map is found, then use it */ - offset = res_idx * pagesz; - mapaddr = pci_map_resource(NULL, fd, (off_t)offset, - (size_t)dev->mem_resource[res_idx].len, 0); - close(fd); - if (mapaddr == MAP_FAILED) - goto error; - - maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; - maps[map_idx].size = dev->mem_resource[res_idx].len; - maps[map_idx].addr = mapaddr; - maps[map_idx].offset = offset; - strcpy(maps[map_idx].path, devname); - dev->mem_resource[res_idx].addr = mapaddr; - - return 0; - -error: - rte_free(maps[map_idx].path); - return -1; -} - -static int -pci_scan_one(int dev_pci_fd, struct pci_conf *conf) -{ - struct rte_pci_device *dev; - struct pci_bar_io bar; - unsigned i, max; - - dev = malloc(sizeof(*dev)); - if (dev == NULL) { - return -1; - } - - memset(dev, 0, sizeof(*dev)); - dev->addr.domain = conf->pc_sel.pc_domain; - dev->addr.bus = conf->pc_sel.pc_bus; - dev->addr.devid = conf->pc_sel.pc_dev; - dev->addr.function = conf->pc_sel.pc_func; - - /* get vendor id */ - dev->id.vendor_id = conf->pc_vendor; - - /* get device id */ - dev->id.device_id = conf->pc_device; - - /* get subsystem_vendor id */ - dev->id.subsystem_vendor_id = conf->pc_subvendor; - - /* get subsystem_device id */ - dev->id.subsystem_device_id = conf->pc_subdevice; - - /* get class id */ - dev->id.class_id = (conf->pc_class << 16) | - (conf->pc_subclass << 8) | - (conf->pc_progif); - - /* TODO: get max_vfs */ - dev->max_vfs = 0; - - /* FreeBSD has no NUMA support (yet) */ - dev->device.numa_node = 0; - - pci_name_set(dev); - - /* FreeBSD has only one pass through driver */ - dev->kdrv = RTE_KDRV_NIC_UIO; - - /* parse resources */ - switch (conf->pc_hdr & PCIM_HDRTYPE) { - case PCIM_HDRTYPE_NORMAL: - max = PCIR_MAX_BAR_0; - break; - case PCIM_HDRTYPE_BRIDGE: - max = PCIR_MAX_BAR_1; - break; - case PCIM_HDRTYPE_CARDBUS: - max = PCIR_MAX_BAR_2; - break; - default: - goto skipdev; - } - - for (i = 0; i <= max; i++) { - bar.pbi_sel = conf->pc_sel; - bar.pbi_reg = PCIR_BAR(i); - if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0) - continue; - - dev->mem_resource[i].len = bar.pbi_length; - if (PCI_BAR_IO(bar.pbi_base)) { - dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf)); - continue; - } - dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf); - } - - /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { - rte_pci_add_device(dev); - } - else { - struct rte_pci_device *dev2 = NULL; - int ret; - - TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { - ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); - if (ret > 0) - continue; - else if (ret < 0) { - rte_pci_insert_device(dev2, dev); - } else { /* already registered */ - dev2->kdrv = dev->kdrv; - dev2->max_vfs = dev->max_vfs; - pci_name_set(dev2); - memmove(dev2->mem_resource, - dev->mem_resource, - sizeof(dev->mem_resource)); - free(dev); - } - return 0; - } - rte_pci_add_device(dev); - } - - return 0; - -skipdev: - free(dev); - return 0; -} - -/* - * Scan the content of the PCI bus, and add the devices in the devices - * list. Call pci_scan_one() for each pci entry found. - */ -int -rte_pci_scan(void) -{ - int fd; - unsigned dev_count = 0; - struct pci_conf matches[16]; - struct pci_conf_io conf_io = { - .pat_buf_len = 0, - .num_patterns = 0, - .patterns = NULL, - .match_buf_len = sizeof(matches), - .matches = &matches[0], - }; - - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - fd = open("/dev/pci", O_RDONLY); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); - goto error; - } - - do { - unsigned i; - if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { - RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", - __func__, strerror(errno)); - goto error; - } - - for (i = 0; i < conf_io.num_matches; i++) - if (pci_scan_one(fd, &matches[i]) < 0) - goto error; - - dev_count += conf_io.num_matches; - } while(conf_io.status == PCI_GETCONF_MORE_DEVS); - - close(fd); - - RTE_LOG(DEBUG, EAL, "PCI scan found %u devices\n", dev_count); - return 0; - -error: - if (fd >= 0) - close(fd); - return -1; -} - -int -pci_update_device(const struct rte_pci_addr *addr) -{ - int fd; - struct pci_conf matches[2]; - struct pci_match_conf match = { - .pc_sel = { - .pc_domain = addr->domain, - .pc_bus = addr->bus, - .pc_dev = addr->devid, - .pc_func = addr->function, - }, - }; - struct pci_conf_io conf_io = { - .pat_buf_len = 0, - .num_patterns = 1, - .patterns = &match, - .match_buf_len = sizeof(matches), - .matches = &matches[0], - }; - - fd = open("/dev/pci", O_RDONLY); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); - goto error; - } - - if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { - RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", - __func__, strerror(errno)); - goto error; - } - - if (conf_io.num_matches != 1) - goto error; - - if (pci_scan_one(fd, &matches[0]) < 0) - goto error; - - close(fd); - - return 0; - -error: - if (fd >= 0) - close(fd); - return -1; -} - -/* Read PCI config space. */ -int rte_pci_read_config(const struct rte_pci_device *dev, - void *buf, size_t len, off_t offset) -{ - int fd = -1; - int size; - struct pci_io pi = { - .pi_sel = { - .pc_domain = dev->addr.domain, - .pc_bus = dev->addr.bus, - .pc_dev = dev->addr.devid, - .pc_func = dev->addr.function, - }, - .pi_reg = offset, - }; - - fd = open("/dev/pci", O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); - goto error; - } - - while (len > 0) { - size = (len >= 4) ? 4 : ((len >= 2) ? 2 : 1); - pi.pi_width = size; - - if (ioctl(fd, PCIOCREAD, &pi) < 0) - goto error; - memcpy(buf, &pi.pi_data, size); - - buf = (char *)buf + size; - pi.pi_reg += size; - len -= size; - } - close(fd); - - return 0; - - error: - if (fd >= 0) - close(fd); - return -1; -} - -/* Write PCI config space. */ -int rte_pci_write_config(const struct rte_pci_device *dev, - const void *buf, size_t len, off_t offset) -{ - int fd = -1; - - struct pci_io pi = { - .pi_sel = { - .pc_domain = dev->addr.domain, - .pc_bus = dev->addr.bus, - .pc_dev = dev->addr.devid, - .pc_func = dev->addr.function, - }, - .pi_reg = offset, - .pi_data = *(const uint32_t *)buf, - .pi_width = len, - }; - - if (len == 3 || len > sizeof(pi.pi_data)) { - RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__); - goto error; - } - - memcpy(&pi.pi_data, buf, len); - - fd = open("/dev/pci", O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); - goto error; - } - - if (ioctl(fd, PCIOCWRITE, &pi) < 0) - goto error; - - close(fd); - return 0; - - error: - if (fd >= 0) - close(fd); - return -1; -} - -int -rte_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - int ret; - - switch (dev->kdrv) { -#if defined(RTE_ARCH_X86) - case RTE_KDRV_NIC_UIO: - if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) { - p->base = (uintptr_t)dev->mem_resource[bar].addr; - ret = 0; - } else - ret = -1; - break; -#endif - default: - ret = -1; - break; - } - - if (!ret) - p->dev = dev; - - return ret; -} - -static void -pci_uio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ -#if defined(RTE_ARCH_X86) - uint8_t *d; - int size; - unsigned short reg = p->base + offset; - - for (d = data; len > 0; d += size, reg += size, len -= size) { - if (len >= 4) { - size = 4; - *(uint32_t *)d = inl(reg); - } else if (len >= 2) { - size = 2; - *(uint16_t *)d = inw(reg); - } else { - size = 1; - *d = inb(reg); - } - } -#else - RTE_SET_USED(p); - RTE_SET_USED(data); - RTE_SET_USED(len); - RTE_SET_USED(offset); -#endif -} - -void -rte_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - switch (p->dev->kdrv) { - case RTE_KDRV_NIC_UIO: - pci_uio_ioport_read(p, data, len, offset); - break; - default: - break; - } -} - -static void -pci_uio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ -#if defined(RTE_ARCH_X86) - const uint8_t *s; - int size; - unsigned short reg = p->base + offset; - - for (s = data; len > 0; s += size, reg += size, len -= size) { - if (len >= 4) { - size = 4; - outl(reg, *(const uint32_t *)s); - } else if (len >= 2) { - size = 2; - outw(reg, *(const uint16_t *)s); - } else { - size = 1; - outb(reg, *s); - } - } -#else - RTE_SET_USED(p); - RTE_SET_USED(data); - RTE_SET_USED(len); - RTE_SET_USED(offset); -#endif -} - -void -rte_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - switch (p->dev->kdrv) { - case RTE_KDRV_NIC_UIO: - pci_uio_ioport_write(p, data, len, offset); - break; - default: - break; - } -} - -int -rte_pci_ioport_unmap(struct rte_pci_ioport *p) -{ - int ret; - - switch (p->dev->kdrv) { -#if defined(RTE_ARCH_X86) - case RTE_KDRV_NIC_UIO: - ret = 0; - break; -#endif - default: - ret = -1; - break; - } - - return ret; -} diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c index 783d68c5..2a2136a2 100644 --- a/lib/librte_eal/bsdapp/eal/eal_thread.c +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c @@ -46,7 +46,6 @@ #include <rte_launch.h> #include <rte_log.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_per_lcore.h> #include <rte_eal.h> #include <rte_lcore.h> diff --git a/lib/librte_eal/bsdapp/eal/eal_timer.c b/lib/librte_eal/bsdapp/eal/eal_timer.c index f12d9bd2..14421943 100644 --- a/lib/librte_eal/bsdapp/eal/eal_timer.c +++ b/lib/librte_eal/bsdapp/eal/eal_timer.c @@ -42,7 +42,6 @@ #include <rte_log.h> #include <rte_cycles.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_debug.h> diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h deleted file mode 100644 index 99a33432..00000000 --- a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h +++ /dev/null @@ -1,107 +0,0 @@ -/*- - * This file is provided under a dual BSD/LGPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GNU LESSER GENERAL PUBLIC LICENSE - * - * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * Contact Information: - * Intel Corporation - * - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef _RTE_DOM0_COMMON_H_ -#define _RTE_DOM0_COMMON_H_ - -#ifdef __KERNEL__ -#include <linux/if.h> -#endif - -#define DOM0_NAME_MAX 256 -#define DOM0_MM_DEV "/dev/dom0_mm" - -#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */ -#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ -#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */ -#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ -#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ - -#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) -#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) -#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) -#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) - -/** - * A structure used to store memory information. - */ -struct memory_info { - char name[DOM0_NAME_MAX]; - uint64_t size; -}; - -/** - * A structure used to store memory segment information. - */ -struct memseg_info { - uint32_t idx; - uint64_t pfn; - uint64_t size; - uint64_t mfn[DOM0_NUM_MEMBLOCK]; -}; - -/** - * A structure used to store memory block information. - */ -struct memblock_info { - uint8_t exchange_flag; - uint64_t vir_addr; - uint64_t pfn; - uint64_t mfn; -}; -#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h deleted file mode 100644 index c1995ee1..00000000 --- a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h +++ /dev/null @@ -1,137 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_INTERRUPTS_H_ -#error "don't include this file directly, please include generic <rte_interrupts.h>" -#endif - -#ifndef _RTE_BSDAPP_INTERRUPTS_H_ -#define _RTE_BSDAPP_INTERRUPTS_H_ - -#define RTE_INTR_VEC_ZERO_OFFSET 0 -#define RTE_INTR_VEC_RXTX_OFFSET 1 - -#define RTE_MAX_RXTX_INTR_VEC_ID 32 - -enum rte_intr_handle_type { - RTE_INTR_HANDLE_UNKNOWN = 0, - RTE_INTR_HANDLE_UIO, /**< uio device handle */ - RTE_INTR_HANDLE_ALARM, /**< alarm handle */ - RTE_INTR_HANDLE_MAX -}; - -/** Handle for interrupts. */ -struct rte_intr_handle { - int fd; /**< file descriptor */ - int uio_cfg_fd; /**< UIO config file descriptor */ - enum rte_intr_handle_type type; /**< handle type */ - int max_intr; /**< max interrupt requested */ - uint32_t nb_efd; /**< number of available efds */ - int *intr_vec; /**< intr vector number array */ -}; - -/** - * @param intr_handle - * Pointer to the interrupt handle. - * @param epfd - * Epoll instance fd which the intr vector associated to. - * @param op - * The operation be performed for the vector. - * Operation type of {ADD, DEL}. - * @param vec - * RX intr vector number added to the epoll instance wait list. - * @param data - * User raw data. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, - int epfd, int op, unsigned int vec, void *data); - -/** - * It enables the fastpath event fds if it's necessary. - * It creates event fds when multi-vectors allowed, - * otherwise it multiplexes the single event fds. - * - * @param intr_handle - * Pointer to the interrupt handle. - * @param nb_efd - * Number of interrupt vector trying to enable. - * The value 0 is not allowed. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd); - -/** - * It disable the fastpath event fds. - * It deletes registered eventfds and closes the open fds. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -void -rte_intr_efd_disable(struct rte_intr_handle *intr_handle); - -/** - * The fastpath interrupt is enabled or not. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int rte_intr_dp_is_en(struct rte_intr_handle *intr_handle); - -/** - * The interrupt handle instance allows other cause or not. - * Other cause stands for none fastpath interrupt. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int rte_intr_allow_others(struct rte_intr_handle *intr_handle); - -/** - * The multiple interrupt vector capability of interrupt handle instance. - * It returns zero if no multiple interrupt vector support. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int -rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); - -#endif /* _RTE_BSDAPP_INTERRUPTS_H_ */ diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map deleted file mode 100644 index aac6fd77..00000000 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ /dev/null @@ -1,239 +0,0 @@ -DPDK_2.0 { - global: - - __rte_panic; - devargs_list; - eal_parse_sysfs_value; - eal_timer_source; - lcore_config; - per_lcore__lcore_id; - per_lcore__rte_errno; - rte_calloc; - rte_calloc_socket; - rte_cpu_check_supported; - rte_cpu_get_flag_enabled; - rte_cycles_vmware_tsc_map; - rte_delay_us; - rte_dump_physmem_layout; - rte_dump_registers; - rte_dump_stack; - rte_dump_tailq; - rte_eal_alarm_cancel; - rte_eal_alarm_set; - rte_eal_devargs_add; - rte_eal_devargs_dump; - rte_eal_devargs_type_count; - rte_eal_get_configuration; - rte_eal_get_lcore_state; - rte_eal_get_physmem_layout; - rte_eal_get_physmem_size; - rte_eal_has_hugepages; - rte_eal_hpet_init; - rte_eal_init; - rte_eal_iopl_init; - rte_eal_lcore_role; - rte_eal_mp_remote_launch; - rte_eal_mp_wait_lcore; - rte_eal_parse_devargs_str; - rte_eal_process_type; - rte_eal_remote_launch; - rte_eal_tailq_lookup; - rte_eal_tailq_register; - rte_eal_wait_lcore; - rte_exit; - rte_free; - rte_get_hpet_cycles; - rte_get_hpet_hz; - rte_get_log_level; - rte_get_log_type; - rte_get_tsc_hz; - rte_hexdump; - rte_intr_callback_register; - rte_intr_callback_unregister; - rte_intr_disable; - rte_intr_enable; - rte_log; - rte_log_cur_msg_loglevel; - rte_log_cur_msg_logtype; - rte_logs; - rte_malloc; - rte_malloc_dump_stats; - rte_malloc_get_socket_stats; - rte_malloc_set_limit; - rte_malloc_socket; - rte_malloc_validate; - rte_malloc_virt2phy; - rte_mem_lock_page; - rte_mem_phy2mch; - rte_mem_virt2phy; - rte_memdump; - rte_memory_get_nchannel; - rte_memory_get_nrank; - rte_memzone_dump; - rte_memzone_lookup; - rte_memzone_reserve; - rte_memzone_reserve_aligned; - rte_memzone_reserve_bounded; - rte_memzone_walk; - rte_openlog_stream; - rte_realloc; - rte_set_application_usage_hook; - rte_set_log_level; - rte_set_log_type; - rte_socket_id; - rte_strerror; - rte_strsplit; - rte_sys_gettid; - rte_thread_get_affinity; - rte_thread_set_affinity; - rte_vlog; - rte_xen_dom0_memory_attach; - rte_xen_dom0_memory_init; - rte_zmalloc; - rte_zmalloc_socket; - - local: *; -}; - -DPDK_2.1 { - global: - - rte_intr_allow_others; - rte_intr_dp_is_en; - rte_intr_efd_disable; - rte_intr_efd_enable; - rte_intr_rx_ctl; - rte_memzone_free; - -} DPDK_2.0; - -DPDK_2.2 { - global: - - rte_intr_cap_multiple; - rte_keepalive_create; - rte_keepalive_dispatch_pings; - rte_keepalive_mark_alive; - rte_keepalive_register_core; - rte_xen_dom0_supported; - -} DPDK_2.1; - -DPDK_16.04 { - global: - - rte_cpu_get_flag_name; - rte_eal_primary_proc_alive; - -} DPDK_2.2; - -DPDK_16.07 { - global: - - pci_get_sysfs_path; - rte_keepalive_mark_sleep; - rte_keepalive_register_relay_callback; - rte_rtm_supported; - rte_thread_setname; - -} DPDK_16.04; - -DPDK_16.11 { - global: - - rte_delay_us_block; - rte_delay_us_callback_register; - rte_eal_dev_attach; - rte_eal_dev_detach; - -} DPDK_16.07; - -DPDK_17.02 { - global: - - rte_bus_dump; - rte_bus_probe; - rte_bus_register; - rte_bus_scan; - rte_bus_unregister; - -} DPDK_16.11; - -DPDK_17.05 { - global: - - rte_cpu_is_supported; - rte_log_dump; - rte_log_register; - rte_log_get_global_level; - rte_log_set_global_level; - rte_log_set_level; - rte_log_set_level_regexp; - rte_pci_detach; - rte_pci_dump; - rte_pci_ioport_map; - rte_pci_ioport_read; - rte_pci_ioport_unmap; - rte_pci_ioport_write; - rte_pci_map_device; - rte_pci_probe; - rte_pci_probe_one; - rte_pci_read_config; - rte_pci_register; - rte_pci_scan; - rte_pci_unmap_device; - rte_pci_unregister; - rte_pci_write_config; - rte_vdev_init; - rte_vdev_register; - rte_vdev_uninit; - rte_vdev_unregister; - vfio_get_container_fd; - vfio_get_group_fd; - vfio_get_group_no; - -} DPDK_17.02; - -DPDK_17.08 { - global: - - rte_bus_find; - rte_bus_find_by_device; - rte_bus_find_by_name; - rte_log_get_level; - -} DPDK_17.05; - -EXPERIMENTAL { - global: - - rte_eal_devargs_insert; - rte_eal_devargs_parse; - rte_eal_devargs_remove; - rte_eal_hotplug_add; - rte_eal_hotplug_remove; - rte_service_disable_on_lcore; - rte_service_dump; - rte_service_enable_on_lcore; - rte_service_get_by_id; - rte_service_get_by_name; - rte_service_get_count; - rte_service_get_enabled_on_lcore; - rte_service_is_running; - rte_service_lcore_add; - rte_service_lcore_count; - rte_service_lcore_del; - rte_service_lcore_list; - rte_service_lcore_reset_all; - rte_service_lcore_start; - rte_service_lcore_stop; - rte_service_probe_capability; - rte_service_register; - rte_service_reset; - rte_service_set_stats_enable; - rte_service_start; - rte_service_start_with_defaults; - rte_service_stop; - rte_service_unregister; - -} DPDK_17.08; diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index e8fd67a2..9effd0d4 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -32,16 +32,18 @@ include $(RTE_SDK)/mk/rte.vars.mk INC := rte_branch_prediction.h rte_common.h -INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h -INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h +INC += rte_debug.h rte_eal.h rte_eal_interrupts.h +INC += rte_errno.h rte_launch.h rte_lcore.h +INC += rte_log.h rte_memory.h rte_memzone.h INC += rte_per_lcore.h rte_random.h INC += rte_tailq.h rte_interrupts.h rte_alarm.h INC += rte_string_fns.h rte_version.h INC += rte_eal_memconfig.h rte_malloc_heap.h -INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h +INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h INC += rte_service.h rte_service_component.h +INC += rte_bitmap.h rte_vfio.h GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h @@ -49,7 +51,7 @@ GENERIC_INC += rte_vect.h rte_pause.h rte_io.h # defined in mk/arch/$(RTE_ARCH)/rte.vars.mk ARCH_DIR ?= $(RTE_ARCH) -ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h)) +ARCH_INC := $(sort $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h))) SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC)) SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \ diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c index 5636e9c1..88f1cbe3 100644 --- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c +++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c @@ -137,7 +137,7 @@ rte_cpu_get_features(hwcap_registers_t out) _Elfx_auxv_t auxv; auxv_fd = open("/proc/self/auxv", O_RDONLY); - assert(auxv_fd); + assert(auxv_fd != -1); while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) { if (auxv.a_type == AT_HWCAP) { out[REG_HWCAP] = auxv.a_un.a_val; diff --git a/lib/librte_eal/common/arch/arm/rte_cycles.c b/lib/librte_eal/common/arch/arm/rte_cycles.c new file mode 100644 index 00000000..3e31e5be --- /dev/null +++ b/lib/librte_eal/common/arch/arm/rte_cycles.c @@ -0,0 +1,45 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "eal_private.h" + +uint64_t +get_tsc_freq_arch(void) +{ +#if defined RTE_ARCH_ARM64 && !defined RTE_ARM_EAL_RDTSC_USE_PMU + uint64_t freq; + asm volatile("mrs %0, cntfrq_el0" : "=r" (freq)); + return freq; +#else + return 0; +#endif +} diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c index fcf96e04..970a61c5 100644 --- a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c +++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c @@ -108,7 +108,7 @@ rte_cpu_get_features(hwcap_registers_t out) Elf64_auxv_t auxv; auxv_fd = open("/proc/self/auxv", O_RDONLY); - assert(auxv_fd); + assert(auxv_fd != -1); while (read(auxv_fd, &auxv, sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) { if (auxv.a_type == AT_HWCAP) diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cycles.c b/lib/librte_eal/common/arch/ppc_64/rte_cycles.c new file mode 100644 index 00000000..69a9f747 --- /dev/null +++ b/lib/librte_eal/common/arch/ppc_64/rte_cycles.c @@ -0,0 +1,52 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_lcore.h> +#include <rte_log.h> +#include "eal_filesystem.h" +#include "eal_private.h" + +static const char sys_cpu_dir[] = "/sys/devices/system/cpu"; + +uint64_t +get_tsc_freq_arch(void) +{ + unsigned long cpu_hz; + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/cpu%d/cpufreq/cpuinfo_cur_freq", + sys_cpu_dir, rte_get_master_lcore()); + if (eal_parse_sysfs_value(path, &cpu_hz) < 0) + RTE_LOG(WARNING, EAL, "Unable to parse %s\n", path); + + return cpu_hz*1000; +} diff --git a/lib/librte_eal/common/arch/x86/rte_cpuflags.c b/lib/librte_eal/common/arch/x86/rte_cpuflags.c index 01382571..7d4a0fef 100644 --- a/lib/librte_eal/common/arch/x86/rte_cpuflags.c +++ b/lib/librte_eal/common/arch/x86/rte_cpuflags.c @@ -36,6 +36,7 @@ #include <stdio.h> #include <errno.h> #include <stdint.h> +#include <cpuid.h> enum cpu_register_t { RTE_REG_EAX = 0, @@ -156,38 +157,12 @@ const struct feature_entry rte_cpu_feature_table[] = { FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX, 8) }; -/* - * Execute CPUID instruction and get contents of a specific register - * - * This function, when compiled with GCC, will generate architecture-neutral - * code, as per GCC manual. - */ -static void -rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out) -{ -#if defined(__i386__) && defined(__PIC__) - /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */ - asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0" - : "=r" (out[RTE_REG_EBX]), - "=a" (out[RTE_REG_EAX]), - "=c" (out[RTE_REG_ECX]), - "=d" (out[RTE_REG_EDX]) - : "a" (leaf), "c" (subleaf)); -#else - asm volatile("cpuid" - : "=a" (out[RTE_REG_EAX]), - "=b" (out[RTE_REG_EBX]), - "=c" (out[RTE_REG_ECX]), - "=d" (out[RTE_REG_EDX]) - : "a" (leaf), "c" (subleaf)); -#endif -} - int rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) { const struct feature_entry *feat; cpuid_registers_t regs; + unsigned int maxleaf; if (feature >= RTE_CPUFLAG_NUMFLAGS) /* Flag does not match anything in the feature tables */ @@ -199,13 +174,14 @@ rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) /* This entry in the table wasn't filled out! */ return -EFAULT; - rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs); - if (((regs[RTE_REG_EAX] ^ feat->leaf) & 0xffff0000) || - regs[RTE_REG_EAX] < feat->leaf) + maxleaf = __get_cpuid_max(feat->leaf & 0x80000000, NULL); + + if (maxleaf < feat->leaf) return 0; - /* get the cpuid leaf containing the desired feature */ - rte_cpu_get_features(feat->leaf, feat->subleaf, regs); + __cpuid_count(feat->leaf, feat->subleaf, + regs[RTE_REG_EAX], regs[RTE_REG_EBX], + regs[RTE_REG_ECX], regs[RTE_REG_EDX]); /* check if the feature is enabled */ return (regs[feat->reg] >> feat->bit) & 1; diff --git a/lib/librte_eal/common/arch/x86/rte_cycles.c b/lib/librte_eal/common/arch/x86/rte_cycles.c new file mode 100644 index 00000000..417850ee --- /dev/null +++ b/lib/librte_eal/common/arch/x86/rte_cycles.c @@ -0,0 +1,152 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fcntl.h> +#include <unistd.h> +#include <cpuid.h> + +#include <rte_common.h> + +#include "eal_private.h" + +static unsigned int +rte_cpu_get_model(uint32_t fam_mod_step) +{ + uint32_t family, model, ext_model; + + family = (fam_mod_step >> 8) & 0xf; + model = (fam_mod_step >> 4) & 0xf; + + if (family == 6 || family == 15) { + ext_model = (fam_mod_step >> 16) & 0xf; + model += (ext_model << 4); + } + + return model; +} + +static int32_t +rdmsr(int msr, uint64_t *val) +{ +#ifdef RTE_EXEC_ENV_LINUXAPP + int fd; + int ret; + + fd = open("/dev/cpu/0/msr", O_RDONLY); + if (fd < 0) + return fd; + + ret = pread(fd, val, sizeof(uint64_t), msr); + + close(fd); + + return ret; +#else + RTE_SET_USED(msr); + RTE_SET_USED(val); + + return -1; +#endif +} + +static uint32_t +check_model_wsm_nhm(uint8_t model) +{ + switch (model) { + /* Westmere */ + case 0x25: + case 0x2C: + case 0x2F: + /* Nehalem */ + case 0x1E: + case 0x1F: + case 0x1A: + case 0x2E: + return 1; + } + + return 0; +} + +static uint32_t +check_model_gdm_dnv(uint8_t model) +{ + switch (model) { + /* Goldmont */ + case 0x5C: + /* Denverton */ + case 0x5F: + return 1; + } + + return 0; +} + +uint64_t +get_tsc_freq_arch(void) +{ + uint64_t tsc_hz = 0; + uint32_t a, b, c, d, maxleaf; + uint8_t mult, model; + int32_t ret; + + /* + * Time Stamp Counter and Nominal Core Crystal Clock + * Information Leaf + */ + maxleaf = __get_cpuid_max(0, NULL); + + if (maxleaf >= 0x15) { + __cpuid(0x15, a, b, c, d); + + /* EBX : TSC/Crystal ratio, ECX : Crystal Hz */ + if (b && c) + return c * (b / a); + } + + __cpuid(0x1, a, b, c, d); + model = rte_cpu_get_model(a); + + if (check_model_wsm_nhm(model)) + mult = 133; + else if ((c & bit_AVX) || check_model_gdm_dnv(model)) + mult = 100; + else + return 0; + + ret = rdmsr(0xCE, &tsc_hz); + if (ret < 0) + return 0; + + return ((tsc_hz >> 8) & 0xff) * mult * 1E6; +} diff --git a/lib/librte_eal/common/arch/x86/rte_memcpy.c b/lib/librte_eal/common/arch/x86/rte_memcpy.c new file mode 100644 index 00000000..174bef15 --- /dev/null +++ b/lib/librte_eal/common/arch/x86/rte_memcpy.c @@ -0,0 +1,58 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_memcpy.h> +#include <rte_cpuflags.h> +#include <rte_log.h> + +void *(*rte_memcpy_ptr)(void *dst, const void *src, size_t n) = NULL; + +RTE_INIT(rte_memcpy_init) +{ +#ifdef CC_SUPPORT_AVX512F + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) { + rte_memcpy_ptr = rte_memcpy_avx512f; + RTE_LOG(DEBUG, EAL, "AVX512 memcpy is using!\n"); + return; + } +#endif +#ifdef CC_SUPPORT_AVX2 + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) { + rte_memcpy_ptr = rte_memcpy_avx2; + RTE_LOG(DEBUG, EAL, "AVX2 memcpy is using!\n"); + return; + } +#endif + rte_memcpy_ptr = rte_memcpy_sse; + RTE_LOG(DEBUG, EAL, "Default SSE/AVX memcpy is using!\n"); +} diff --git a/lib/librte_eal/common/arch/x86/rte_spinlock.c b/lib/librte_eal/common/arch/x86/rte_spinlock.c index c383e9f0..1244a90b 100644 --- a/lib/librte_eal/common/arch/x86/rte_spinlock.c +++ b/lib/librte_eal/common/arch/x86/rte_spinlock.c @@ -38,8 +38,7 @@ uint8_t rte_rtm_supported; /* cache the flag to avoid the overhead of the rte_cpu_get_flag_enabled function */ -static void __attribute__((constructor)) -rte_rtm_init(void) +RTE_INIT(rte_rtm_init) { rte_rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM); } diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c index 08bec2d9..3e022d51 100644 --- a/lib/librte_eal/common/eal_common_bus.c +++ b/lib/librte_eal/common/eal_common_bus.c @@ -35,6 +35,7 @@ #include <sys/queue.h> #include <rte_bus.h> +#include <rte_debug.h> #include "eal_private.h" @@ -73,11 +74,9 @@ rte_bus_scan(void) TAILQ_FOREACH(bus, &rte_bus_list, next) { ret = bus->scan(); - if (ret) { + if (ret) RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n", bus->name); - return ret; - } } return 0; @@ -97,20 +96,16 @@ rte_bus_probe(void) } ret = bus->probe(); - if (ret) { + if (ret) RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n", bus->name); - return ret; - } } if (vbus) { ret = vbus->probe(); - if (ret) { + if (ret) RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n", vbus->name); - return ret; - } } return 0; @@ -152,15 +147,16 @@ struct rte_bus * rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp, const void *data) { - struct rte_bus *bus = NULL; + struct rte_bus *bus; - TAILQ_FOREACH(bus, &rte_bus_list, next) { - if (start && bus == start) { - start = NULL; /* starting point found */ - continue; - } + if (start != NULL) + bus = TAILQ_NEXT(start, next); + else + bus = TAILQ_FIRST(&rte_bus_list); + while (bus != NULL) { if (cmp(bus, data) == 0) break; + bus = TAILQ_NEXT(bus, next); } return bus; } @@ -222,3 +218,26 @@ rte_bus_find_by_device_name(const char *str) c[0] = '\0'; return rte_bus_find(NULL, bus_can_parse, name); } + + +/* + * Get iommu class of devices on the bus. + */ +enum rte_iova_mode +rte_bus_get_iommu_class(void) +{ + int mode = RTE_IOVA_DC; + struct rte_bus *bus; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + + if (bus->get_iommu_class) + mode |= bus->get_iommu_class(); + } + + if (mode != RTE_IOVA_VA) { + /* Use default IOVA mode */ + mode = RTE_IOVA_PA; + } + return mode; +} diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index e2512755..dda8f583 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -67,7 +67,6 @@ static int cmp_dev_name(const struct rte_device *dev, const void *_name) int rte_eal_dev_attach(const char *name, const char *devargs) { struct rte_bus *bus; - int ret; if (name == NULL || devargs == NULL) { RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n"); @@ -80,22 +79,13 @@ int rte_eal_dev_attach(const char *name, const char *devargs) name); return -EINVAL; } - if (strcmp(bus->name, "pci") == 0) - return rte_eal_hotplug_add("pci", name, devargs); - if (strcmp(bus->name, "vdev") != 0) { - RTE_LOG(ERR, EAL, "Device attach is only supported for PCI and vdev devices.\n"); - return -ENOTSUP; - } + if (strcmp(bus->name, "pci") == 0 || strcmp(bus->name, "vdev") == 0) + return rte_eal_hotplug_add(bus->name, name, devargs); - /* - * If we haven't found a bus device the user meant to "hotplug" a - * virtual device instead. - */ - ret = rte_vdev_init(name, devargs); - if (ret) - RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", - name); - return ret; + RTE_LOG(ERR, EAL, + "Device attach is only supported for PCI and vdev devices.\n"); + + return -ENOTSUP; } int rte_eal_dev_detach(struct rte_device *dev) diff --git a/lib/librte_eal/common/eal_common_errno.c b/lib/librte_eal/common/eal_common_errno.c index de48d8e4..dc5b7c04 100644 --- a/lib/librte_eal/common/eal_common_errno.c +++ b/lib/librte_eal/common/eal_common_errno.c @@ -46,18 +46,20 @@ RTE_DEFINE_PER_LCORE(int, _rte_errno); const char * rte_strerror(int errnum) { + /* BSD puts a colon in the "unknown error" messages, Linux doesn't */ +#ifdef RTE_EXEC_ENV_BSDAPP + static const char *sep = ":"; +#else + static const char *sep = ""; +#endif #define RETVAL_SZ 256 static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval); + char *ret = RTE_PER_LCORE(retval); /* since some implementations of strerror_r throw an error * themselves if errnum is too big, we handle that case here */ - if (errnum > RTE_MAX_ERRNO) - snprintf(RTE_PER_LCORE(retval), RETVAL_SZ, -#ifdef RTE_EXEC_ENV_BSDAPP - "Unknown error: %d", errnum); -#else - "Unknown error %d", errnum); -#endif + if (errnum >= RTE_MAX_ERRNO) + snprintf(ret, RETVAL_SZ, "Unknown error%s %d", sep, errnum); else switch (errnum){ case E_RTE_SECONDARY: @@ -65,8 +67,10 @@ rte_strerror(int errnum) case E_RTE_NO_CONFIG: return "Missing rte_config structure"; default: - strerror_r(errnum, RTE_PER_LCORE(retval), RETVAL_SZ); + if (strerror_r(errnum, ret, RETVAL_SZ) != 0) + snprintf(ret, RETVAL_SZ, "Unknown error%s %d", + sep, errnum); } - return RTE_PER_LCORE(retval); + return ret; } diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c index 137c191d..2d5cae9f 100644 --- a/lib/librte_eal/common/eal_common_launch.c +++ b/lib/librte_eal/common/eal_common_launch.c @@ -38,7 +38,6 @@ #include <rte_launch.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_atomic.h> #include <rte_pause.h> diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c index 0e3b9320..be404136 100644 --- a/lib/librte_eal/common/eal_common_log.c +++ b/lib/librte_eal/common/eal_common_log.c @@ -89,14 +89,6 @@ rte_log_set_global_level(uint32_t level) rte_logs.level = (uint32_t)level; } -/* Set global log level */ -/* replaced by rte_log_set_global_level */ -__rte_deprecated void -rte_set_log_level(uint32_t level) -{ - rte_log_set_global_level(level); -} - /* Get global log level */ uint32_t rte_log_get_global_level(void) @@ -104,14 +96,6 @@ rte_log_get_global_level(void) return rte_logs.level; } -/* Get global log level */ -/* replaced by rte_log_get_global_level */ -uint32_t -rte_get_log_level(void) -{ - return rte_log_get_global_level(); -} - int rte_log_get_level(uint32_t type) { @@ -121,30 +105,6 @@ rte_log_get_level(uint32_t type) return rte_logs.dynamic_types[type].loglevel; } -/* Set global log type */ -__rte_deprecated void -rte_set_log_type(uint32_t type, int enable) -{ - if (type < RTE_LOGTYPE_FIRST_EXT_ID) { - if (enable) - rte_logs.type |= 1 << type; - else - rte_logs.type &= ~(1 << type); - } - - if (enable) - rte_log_set_level(type, 0); - else - rte_log_set_level(type, RTE_LOG_DEBUG); -} - -/* Get global log type */ -__rte_deprecated uint32_t -rte_get_log_type(void) -{ - return rte_logs.type; -} - int rte_log_set_level(uint32_t type, uint32_t level) { @@ -289,7 +249,8 @@ static const struct logtype logtype_strings[] = { {RTE_LOGTYPE_USER8, "user8"} }; -RTE_INIT(rte_log_init); +/* Logging should be first initialzer (before drivers and bus) */ +RTE_INIT_PRIO(rte_log_init, 101); static void rte_log_init(void) { diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 996877ef..fc6c44da 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -41,7 +41,6 @@ #include <sys/queue.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> #include <rte_log.h> @@ -96,11 +95,11 @@ rte_dump_physmem_layout(FILE *f) if (mcfg->memseg[i].addr == NULL) break; - fprintf(f, "Segment %u: phys:0x%"PRIx64", len:%zu, " + fprintf(f, "Segment %u: IOVA:0x%"PRIx64", len:%zu, " "virt:%p, socket_id:%"PRId32", " "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", " "nrank:%"PRIx32"\n", i, - mcfg->memseg[i].phys_addr, + mcfg->memseg[i].iova, mcfg->memseg[i].len, mcfg->memseg[i].addr, mcfg->memseg[i].socket_id, diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 3026e36b..ea072a25 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -251,7 +251,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, mcfg->memzone_cnt++; snprintf(mz->name, sizeof(mz->name), "%s", name); - mz->phys_addr = rte_malloc_virt2phy(mz_addr); + mz->iova = rte_malloc_virt2iova(mz_addr); mz->addr = mz_addr; mz->len = (requested_len == 0 ? elem->size : requested_len); mz->hugepage_sz = elem->ms->hugepage_sz; @@ -391,10 +391,10 @@ rte_memzone_dump(FILE *f) for (i=0; i<RTE_MAX_MEMZONE; i++) { if (mcfg->memzone[i].addr == NULL) break; - fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx" + fprintf(f, "Zone %u: name:<%s>, IO:0x%"PRIx64", len:0x%zx" ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i, mcfg->memzone[i].name, - mcfg->memzone[i].phys_addr, + mcfg->memzone[i].iova, mcfg->memzone[i].len, mcfg->memzone[i].addr, mcfg->memzone[i].socket_id, diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 1da185e5..996a0342 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -85,6 +85,7 @@ eal_long_options[] = { {OPT_LCORES, 1, NULL, OPT_LCORES_NUM }, {OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM }, {OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM }, + {OPT_MBUF_POOL_OPS_NAME, 1, NULL, OPT_MBUF_POOL_OPS_NAME_NUM}, {OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM }, {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM }, {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM }, @@ -97,7 +98,6 @@ eal_long_options[] = { {OPT_VDEV, 1, NULL, OPT_VDEV_NUM }, {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM }, {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM }, - {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM }, {0, 0, NULL, 0 } }; @@ -208,8 +208,6 @@ eal_reset_internal_config(struct internal_config *internal_cfg) internal_cfg->syslog_facility = LOG_DAEMON; - internal_cfg->xen_dom0_support = 0; - /* if set to NONE, interrupt mode is determined automatically */ internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE; @@ -220,6 +218,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg) #endif internal_cfg->vmware_tsc_map = 0; internal_cfg->create_uio_dev = 0; + internal_cfg->mbuf_pool_ops_name = RTE_MBUF_DEFAULT_MEMPOOL_OPS; } static int @@ -279,12 +278,13 @@ int eal_plugins_init(void) { struct shared_driver *solib = NULL; + struct stat sb; - if (*default_solib_dir != '\0') + if (*default_solib_dir != '\0' && stat(default_solib_dir, &sb) == 0 && + S_ISDIR(sb.st_mode)) eal_plugin_add(default_solib_dir); TAILQ_FOREACH(solib, &solib_list, next) { - struct stat sb; if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) { if (eal_plugindir_init(solib->name) == -1) { @@ -1279,6 +1279,7 @@ eal_common_usage(void) " '@' can be omitted if cpus and lcores have the same value\n" " -s SERVICE COREMASK Hexadecimal bitmask of cores to be used as service cores\n" " --"OPT_MASTER_LCORE" ID Core ID that is used as master\n" + " --"OPT_MBUF_POOL_OPS_NAME" Pool ops name for mbuf to use\n" " -n CHANNELS Number of memory channels\n" " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n" " -r RANKS Force number of memory ranks (don't detect)\n" diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c deleted file mode 100644 index 52fd38cd..00000000 --- a/lib/librte_eal/common/eal_common_pci.c +++ /dev/null @@ -1,580 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * Copyright 2013-2014 6WIND S.A. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <string.h> -#include <inttypes.h> -#include <stdint.h> -#include <stdlib.h> -#include <stdio.h> -#include <sys/queue.h> -#include <sys/mman.h> - -#include <rte_errno.h> -#include <rte_interrupts.h> -#include <rte_log.h> -#include <rte_bus.h> -#include <rte_pci.h> -#include <rte_per_lcore.h> -#include <rte_memory.h> -#include <rte_memzone.h> -#include <rte_eal.h> -#include <rte_string_fns.h> -#include <rte_common.h> -#include <rte_devargs.h> - -#include "eal_private.h" - -extern struct rte_pci_bus rte_pci_bus; - -#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" - -const char *pci_get_sysfs_path(void) -{ - const char *path = NULL; - - path = getenv("SYSFS_PCI_DEVICES"); - if (path == NULL) - return SYSFS_PCI_DEVICES; - - return path; -} - -static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev) -{ - struct rte_devargs *devargs; - struct rte_pci_addr addr; - struct rte_bus *pbus; - - pbus = rte_bus_find_by_name("pci"); - TAILQ_FOREACH(devargs, &devargs_list, next) { - if (devargs->bus != pbus) - continue; - devargs->bus->parse(devargs->name, &addr); - if (!rte_eal_compare_pci_addr(&dev->addr, &addr)) - return devargs; - } - return NULL; -} - -void -pci_name_set(struct rte_pci_device *dev) -{ - struct rte_devargs *devargs; - - /* Each device has its internal, canonical name set. */ - rte_pci_device_name(&dev->addr, - dev->name, sizeof(dev->name)); - devargs = pci_devargs_lookup(dev); - dev->device.devargs = devargs; - /* In blacklist mode, if the device is not blacklisted, no - * rte_devargs exists for it. - */ - if (devargs != NULL) - /* If an rte_devargs exists, the generic rte_device uses the - * given name as its namea - */ - dev->device.name = dev->device.devargs->name; - else - /* Otherwise, it uses the internal, canonical form. */ - dev->device.name = dev->name; -} - -/* map a particular resource from a file */ -void * -pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, - int additional_flags) -{ - void *mapaddr; - - /* Map the PCI memory resource of device */ - mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, - MAP_SHARED | additional_flags, fd, offset); - if (mapaddr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", - __func__, fd, requested_addr, - (unsigned long)size, (unsigned long)offset, - strerror(errno), mapaddr); - } else - RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); - - return mapaddr; -} - -/* unmap a particular resource */ -void -pci_unmap_resource(void *requested_addr, size_t size) -{ - if (requested_addr == NULL) - return; - - /* Unmap the PCI memory resource of device */ - if (munmap(requested_addr, size)) { - RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n", - __func__, requested_addr, (unsigned long)size, - strerror(errno)); - } else - RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n", - requested_addr); -} - -/* - * Match the PCI Driver and Device using the ID Table - * - * @param pci_drv - * PCI driver from which ID table would be extracted - * @param pci_dev - * PCI device to match against the driver - * @return - * 1 for successful match - * 0 for unsuccessful match - */ -static int -rte_pci_match(const struct rte_pci_driver *pci_drv, - const struct rte_pci_device *pci_dev) -{ - const struct rte_pci_id *id_table; - - for (id_table = pci_drv->id_table; id_table->vendor_id != 0; - id_table++) { - /* check if device's identifiers match the driver's ones */ - if (id_table->vendor_id != pci_dev->id.vendor_id && - id_table->vendor_id != PCI_ANY_ID) - continue; - if (id_table->device_id != pci_dev->id.device_id && - id_table->device_id != PCI_ANY_ID) - continue; - if (id_table->subsystem_vendor_id != - pci_dev->id.subsystem_vendor_id && - id_table->subsystem_vendor_id != PCI_ANY_ID) - continue; - if (id_table->subsystem_device_id != - pci_dev->id.subsystem_device_id && - id_table->subsystem_device_id != PCI_ANY_ID) - continue; - if (id_table->class_id != pci_dev->id.class_id && - id_table->class_id != RTE_CLASS_ANY_ID) - continue; - - return 1; - } - - return 0; -} - -/* - * If vendor/device ID match, call the probe() function of the - * driver. - */ -static int -rte_pci_probe_one_driver(struct rte_pci_driver *dr, - struct rte_pci_device *dev) -{ - int ret; - struct rte_pci_addr *loc; - - if ((dr == NULL) || (dev == NULL)) - return -EINVAL; - - loc = &dev->addr; - - /* The device is not blacklisted; Check if driver supports it */ - if (!rte_pci_match(dr, dev)) - /* Match of device and driver failed */ - return 1; - - RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", - loc->domain, loc->bus, loc->devid, loc->function, - dev->device.numa_node); - - /* no initialization when blacklisted, return without error */ - if (dev->device.devargs != NULL && - dev->device.devargs->policy == - RTE_DEV_BLACKLISTED) { - RTE_LOG(INFO, EAL, " Device is blacklisted, not" - " initializing\n"); - return 1; - } - - if (dev->device.numa_node < 0) { - RTE_LOG(WARNING, EAL, " Invalid NUMA socket, default to 0\n"); - dev->device.numa_node = 0; - } - - RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->driver.name); - - if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { - /* map resources for devices that use igb_uio */ - ret = rte_pci_map_device(dev); - if (ret != 0) - return ret; - } - - /* reference driver structure */ - dev->driver = dr; - dev->device.driver = &dr->driver; - - /* call the driver probe() function */ - ret = dr->probe(dr, dev); - if (ret) { - dev->driver = NULL; - dev->device.driver = NULL; - if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) && - /* Don't unmap if device is unsupported and - * driver needs mapped resources. - */ - !(ret > 0 && - (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES))) - rte_pci_unmap_device(dev); - } - - return ret; -} - -/* - * If vendor/device ID match, call the remove() function of the - * driver. - */ -static int -rte_pci_detach_dev(struct rte_pci_device *dev) -{ - struct rte_pci_addr *loc; - struct rte_pci_driver *dr; - - if (dev == NULL) - return -EINVAL; - - dr = dev->driver; - loc = &dev->addr; - - RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", - loc->domain, loc->bus, loc->devid, - loc->function, dev->device.numa_node); - - RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->driver.name); - - if (dr->remove && (dr->remove(dev) < 0)) - return -1; /* negative value is an error */ - - /* clear driver structure */ - dev->driver = NULL; - - if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) - /* unmap resources for devices that use igb_uio */ - rte_pci_unmap_device(dev); - - return 0; -} - -/* - * If vendor/device ID match, call the probe() function of all - * registered driver for the given device. Return -1 if initialization - * failed, return 1 if no driver is found for this device. - */ -static int -pci_probe_all_drivers(struct rte_pci_device *dev) -{ - struct rte_pci_driver *dr = NULL; - int rc = 0; - - if (dev == NULL) - return -1; - - /* Check if a driver is already loaded */ - if (dev->driver != NULL) - return 0; - - FOREACH_DRIVER_ON_PCIBUS(dr) { - rc = rte_pci_probe_one_driver(dr, dev); - if (rc < 0) - /* negative value is an error */ - return -1; - if (rc > 0) - /* positive value means driver doesn't support it */ - continue; - return 0; - } - return 1; -} - -/* - * Find the pci device specified by pci address, then invoke probe function of - * the driver of the device. - */ -int -rte_pci_probe_one(const struct rte_pci_addr *addr) -{ - struct rte_pci_device *dev = NULL; - - int ret = 0; - - if (addr == NULL) - return -1; - - /* update current pci device in global list, kernel bindings might have - * changed since last time we looked at it. - */ - if (pci_update_device(addr) < 0) - goto err_return; - - FOREACH_DEVICE_ON_PCIBUS(dev) { - if (rte_eal_compare_pci_addr(&dev->addr, addr)) - continue; - - ret = pci_probe_all_drivers(dev); - if (ret) - goto err_return; - return 0; - } - return -1; - -err_return: - RTE_LOG(WARNING, EAL, - "Requested device " PCI_PRI_FMT " cannot be used\n", - addr->domain, addr->bus, addr->devid, addr->function); - return -1; -} - -/* - * Detach device specified by its pci address. - */ -int -rte_pci_detach(const struct rte_pci_addr *addr) -{ - struct rte_pci_device *dev = NULL; - int ret = 0; - - if (addr == NULL) - return -1; - - FOREACH_DEVICE_ON_PCIBUS(dev) { - if (rte_eal_compare_pci_addr(&dev->addr, addr)) - continue; - - ret = rte_pci_detach_dev(dev); - if (ret < 0) - /* negative value is an error */ - goto err_return; - if (ret > 0) - /* positive value means driver doesn't support it */ - continue; - - rte_pci_remove_device(dev); - free(dev); - return 0; - } - return -1; - -err_return: - RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT - " cannot be used\n", dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - return -1; -} - -/* - * Scan the content of the PCI bus, and call the probe() function for - * all registered drivers that have a matching entry in its id_table - * for discovered devices. - */ -int -rte_pci_probe(void) -{ - struct rte_pci_device *dev = NULL; - size_t probed = 0, failed = 0; - struct rte_devargs *devargs; - int probe_all = 0; - int ret = 0; - - if (rte_pci_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST) - probe_all = 1; - - FOREACH_DEVICE_ON_PCIBUS(dev) { - probed++; - - devargs = dev->device.devargs; - /* probe all or only whitelisted devices */ - if (probe_all) - ret = pci_probe_all_drivers(dev); - else if (devargs != NULL && - devargs->policy == RTE_DEV_WHITELISTED) - ret = pci_probe_all_drivers(dev); - if (ret < 0) { - RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT - " cannot be used\n", dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - rte_errno = errno; - failed++; - ret = 0; - } - } - - return (probed && probed == failed) ? -1 : 0; -} - -/* dump one device */ -static int -pci_dump_one_device(FILE *f, struct rte_pci_device *dev) -{ - int i; - - fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id, - dev->id.device_id); - - for (i = 0; i != sizeof(dev->mem_resource) / - sizeof(dev->mem_resource[0]); i++) { - fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n", - dev->mem_resource[i].phys_addr, - dev->mem_resource[i].len); - } - return 0; -} - -/* dump devices on the bus */ -void -rte_pci_dump(FILE *f) -{ - struct rte_pci_device *dev = NULL; - - FOREACH_DEVICE_ON_PCIBUS(dev) { - pci_dump_one_device(f, dev); - } -} - -static int -pci_parse(const char *name, void *addr) -{ - struct rte_pci_addr *out = addr; - struct rte_pci_addr pci_addr; - bool parse; - - parse = (eal_parse_pci_BDF(name, &pci_addr) == 0 || - eal_parse_pci_DomBDF(name, &pci_addr) == 0); - if (parse && addr != NULL) - *out = pci_addr; - return parse == false; -} - -/* register a driver */ -void -rte_pci_register(struct rte_pci_driver *driver) -{ - TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next); - driver->bus = &rte_pci_bus; -} - -/* unregister a driver */ -void -rte_pci_unregister(struct rte_pci_driver *driver) -{ - TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next); - driver->bus = NULL; -} - -/* Add a device to PCI bus */ -void -rte_pci_add_device(struct rte_pci_device *pci_dev) -{ - TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next); -} - -/* Insert a device into a predefined position in PCI bus */ -void -rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, - struct rte_pci_device *new_pci_dev) -{ - TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next); -} - -/* Remove a device from PCI bus */ -void -rte_pci_remove_device(struct rte_pci_device *pci_dev) -{ - TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next); -} - -static struct rte_device * -pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, - const void *data) -{ - struct rte_pci_device *dev; - - FOREACH_DEVICE_ON_PCIBUS(dev) { - if (start && &dev->device == start) { - start = NULL; /* starting point found */ - continue; - } - if (cmp(&dev->device, data) == 0) - return &dev->device; - } - - return NULL; -} - -static int -pci_plug(struct rte_device *dev) -{ - return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev)); -} - -static int -pci_unplug(struct rte_device *dev) -{ - struct rte_pci_device *pdev; - int ret; - - pdev = RTE_DEV_TO_PCI(dev); - ret = rte_pci_detach_dev(pdev); - rte_pci_remove_device(pdev); - free(pdev); - return ret; -} - -struct rte_pci_bus rte_pci_bus = { - .bus = { - .scan = rte_pci_scan, - .probe = rte_pci_probe, - .find_device = pci_find_device, - .plug = pci_plug, - .unplug = pci_unplug, - .parse = pci_parse, - }, - .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), - .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), -}; - -RTE_REGISTER_BUS(pci, rte_pci_bus.bus); diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c deleted file mode 100644 index 367a6816..00000000 --- a/lib/librte_eal/common/eal_common_pci_uio.c +++ /dev/null @@ -1,233 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <fcntl.h> -#include <string.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> - -#include <rte_eal.h> -#include <rte_tailq.h> -#include <rte_log.h> -#include <rte_malloc.h> - -#include "eal_private.h" - -static struct rte_tailq_elem rte_uio_tailq = { - .name = "UIO_RESOURCE_LIST", -}; -EAL_REGISTER_TAILQ(rte_uio_tailq) - -static int -pci_uio_map_secondary(struct rte_pci_device *dev) -{ - int fd, i, j; - struct mapped_pci_resource *uio_res; - struct mapped_pci_res_list *uio_res_list = - RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); - - TAILQ_FOREACH(uio_res, uio_res_list, next) { - - /* skip this element if it doesn't match our PCI address */ - if (rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr)) - continue; - - for (i = 0; i != uio_res->nb_maps; i++) { - /* - * open devname, to mmap it - */ - fd = open(uio_res->maps[i].path, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - uio_res->maps[i].path, strerror(errno)); - return -1; - } - - void *mapaddr = pci_map_resource(uio_res->maps[i].addr, - fd, (off_t)uio_res->maps[i].offset, - (size_t)uio_res->maps[i].size, 0); - /* fd is not needed in slave process, close it */ - close(fd); - if (mapaddr != uio_res->maps[i].addr) { - RTE_LOG(ERR, EAL, - "Cannot mmap device resource file %s to address: %p\n", - uio_res->maps[i].path, - uio_res->maps[i].addr); - if (mapaddr != MAP_FAILED) { - /* unmap addrs correctly mapped */ - for (j = 0; j < i; j++) - pci_unmap_resource( - uio_res->maps[j].addr, - (size_t)uio_res->maps[j].size); - /* unmap addr wrongly mapped */ - pci_unmap_resource(mapaddr, - (size_t)uio_res->maps[i].size); - } - return -1; - } - } - return 0; - } - - RTE_LOG(ERR, EAL, "Cannot find resource for device\n"); - return 1; -} - -/* map the PCI resource of a PCI device in virtual memory */ -int -pci_uio_map_resource(struct rte_pci_device *dev) -{ - int i, map_idx = 0, ret; - uint64_t phaddr; - struct mapped_pci_resource *uio_res = NULL; - struct mapped_pci_res_list *uio_res_list = - RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); - - dev->intr_handle.fd = -1; - dev->intr_handle.uio_cfg_fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - - /* secondary processes - use already recorded details */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return pci_uio_map_secondary(dev); - - /* allocate uio resource */ - ret = pci_uio_alloc_resource(dev, &uio_res); - if (ret) - return ret; - - /* Map all BARs */ - for (i = 0; i != PCI_MAX_RESOURCE; i++) { - /* skip empty BAR */ - phaddr = dev->mem_resource[i].phys_addr; - if (phaddr == 0) - continue; - - ret = pci_uio_map_resource_by_index(dev, i, - uio_res, map_idx); - if (ret) - goto error; - - map_idx++; - } - - uio_res->nb_maps = map_idx; - - TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); - - return 0; -error: - for (i = 0; i < map_idx; i++) { - pci_unmap_resource(uio_res->maps[i].addr, - (size_t)uio_res->maps[i].size); - rte_free(uio_res->maps[i].path); - } - pci_uio_free_resource(dev, uio_res); - return -1; -} - -static void -pci_uio_unmap(struct mapped_pci_resource *uio_res) -{ - int i; - - if (uio_res == NULL) - return; - - for (i = 0; i != uio_res->nb_maps; i++) { - pci_unmap_resource(uio_res->maps[i].addr, - (size_t)uio_res->maps[i].size); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) - rte_free(uio_res->maps[i].path); - } -} - -static struct mapped_pci_resource * -pci_uio_find_resource(struct rte_pci_device *dev) -{ - struct mapped_pci_resource *uio_res; - struct mapped_pci_res_list *uio_res_list = - RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); - - if (dev == NULL) - return NULL; - - TAILQ_FOREACH(uio_res, uio_res_list, next) { - - /* skip this element if it doesn't match our PCI address */ - if (!rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr)) - return uio_res; - } - return NULL; -} - -/* unmap the PCI resource of a PCI device in virtual memory */ -void -pci_uio_unmap_resource(struct rte_pci_device *dev) -{ - struct mapped_pci_resource *uio_res; - struct mapped_pci_res_list *uio_res_list = - RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); - - if (dev == NULL) - return; - - /* find an entry for the device */ - uio_res = pci_uio_find_resource(dev); - if (uio_res == NULL) - return; - - /* secondary processes - just free maps */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return pci_uio_unmap(uio_res); - - TAILQ_REMOVE(uio_res_list, uio_res, next); - - /* unmap all resources */ - pci_uio_unmap(uio_res); - - /* free uio resource */ - rte_free(uio_res); - - /* close fd if in primary process */ - close(dev->intr_handle.fd); - if (dev->intr_handle.uio_cfg_fd >= 0) { - close(dev->intr_handle.uio_cfg_fd); - dev->intr_handle.uio_cfg_fd = -1; - } - - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; -} diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c index 55955f9e..6ae09fdb 100644 --- a/lib/librte_eal/common/eal_common_tailqs.c +++ b/lib/librte_eal/common/eal_common_tailqs.c @@ -40,7 +40,6 @@ #include <inttypes.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c index 2405e93f..55e96963 100644 --- a/lib/librte_eal/common/eal_common_thread.c +++ b/lib/librte_eal/common/eal_common_thread.c @@ -53,6 +53,20 @@ unsigned rte_socket_id(void) return RTE_PER_LCORE(_socket_id); } +int +rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + + if (lcore_id >= RTE_MAX_LCORE) + return -EINVAL; + + if (cfg->lcore_role[lcore_id] == role) + return 0; + + return -EINVAL; +} + int eal_cpuset_socket_id(rte_cpuset_t *cpusetp) { unsigned cpu = 0; diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c index ed0b16d0..568ae2fd 100644 --- a/lib/librte_eal/common/eal_common_timer.c +++ b/lib/librte_eal/common/eal_common_timer.c @@ -80,8 +80,11 @@ estimate_tsc_freq(void) void set_tsc_freq(void) { - uint64_t freq = get_tsc_freq(); + uint64_t freq; + freq = get_tsc_freq_arch(); + if (!freq) + freq = get_tsc_freq(); if (!freq) freq = estimate_tsc_freq(); @@ -94,8 +97,7 @@ void rte_delay_us_callback_register(void (*userfunc)(unsigned int)) rte_delay_us = userfunc; } -static void __attribute__((constructor)) -rte_timer_init(void) +RTE_INIT(rte_timer_init) { /* set rte_delay_us_block as a delay function */ rte_delay_us_callback_register(rte_delay_us_block); diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c deleted file mode 100644 index f7e547a6..00000000 --- a/lib/librte_eal/common/eal_common_vdev.c +++ /dev/null @@ -1,342 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2016 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <string.h> -#include <inttypes.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <stdbool.h> -#include <sys/queue.h> - -#include <rte_eal.h> -#include <rte_dev.h> -#include <rte_bus.h> -#include <rte_vdev.h> -#include <rte_common.h> -#include <rte_devargs.h> -#include <rte_memory.h> -#include <rte_errno.h> - -/* Forward declare to access virtual bus name */ -static struct rte_bus rte_vdev_bus; - -/** Double linked list of virtual device drivers. */ -TAILQ_HEAD(vdev_device_list, rte_vdev_device); - -static struct vdev_device_list vdev_device_list = - TAILQ_HEAD_INITIALIZER(vdev_device_list); -struct vdev_driver_list vdev_driver_list = - TAILQ_HEAD_INITIALIZER(vdev_driver_list); - -/* register a driver */ -void -rte_vdev_register(struct rte_vdev_driver *driver) -{ - TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next); -} - -/* unregister a driver */ -void -rte_vdev_unregister(struct rte_vdev_driver *driver) -{ - TAILQ_REMOVE(&vdev_driver_list, driver, next); -} - -static int -vdev_parse(const char *name, void *addr) -{ - struct rte_vdev_driver **out = addr; - struct rte_vdev_driver *driver = NULL; - - TAILQ_FOREACH(driver, &vdev_driver_list, next) { - if (strncmp(driver->driver.name, name, - strlen(driver->driver.name)) == 0) - break; - if (driver->driver.alias && - strncmp(driver->driver.alias, name, - strlen(driver->driver.alias)) == 0) - break; - } - if (driver != NULL && - addr != NULL) - *out = driver; - return driver == NULL; -} - -static int -vdev_probe_all_drivers(struct rte_vdev_device *dev) -{ - const char *name; - struct rte_vdev_driver *driver; - int ret; - - name = rte_vdev_device_name(dev); - - RTE_LOG(DEBUG, EAL, "Search driver %s to probe device %s\n", name, - rte_vdev_device_name(dev)); - - if (vdev_parse(name, &driver)) - return -1; - dev->device.driver = &driver->driver; - ret = driver->probe(dev); - if (ret) - dev->device.driver = NULL; - return ret; -} - -static struct rte_vdev_device * -find_vdev(const char *name) -{ - struct rte_vdev_device *dev; - - if (!name) - return NULL; - - TAILQ_FOREACH(dev, &vdev_device_list, next) { - const char *devname = rte_vdev_device_name(dev); - if (!strncmp(devname, name, strlen(name))) - return dev; - } - - return NULL; -} - -static struct rte_devargs * -alloc_devargs(const char *name, const char *args) -{ - struct rte_devargs *devargs; - int ret; - - devargs = calloc(1, sizeof(*devargs)); - if (!devargs) - return NULL; - - devargs->bus = &rte_vdev_bus; - if (args) - devargs->args = strdup(args); - else - devargs->args = strdup(""); - - ret = snprintf(devargs->name, sizeof(devargs->name), "%s", name); - if (ret < 0 || ret >= (int)sizeof(devargs->name)) { - free(devargs->args); - free(devargs); - return NULL; - } - - return devargs; -} - -int -rte_vdev_init(const char *name, const char *args) -{ - struct rte_vdev_device *dev; - struct rte_devargs *devargs; - int ret; - - if (name == NULL) - return -EINVAL; - - dev = find_vdev(name); - if (dev) - return -EEXIST; - - devargs = alloc_devargs(name, args); - if (!devargs) - return -ENOMEM; - - dev = calloc(1, sizeof(*dev)); - if (!dev) { - ret = -ENOMEM; - goto fail; - } - - dev->device.devargs = devargs; - dev->device.numa_node = SOCKET_ID_ANY; - dev->device.name = devargs->name; - - ret = vdev_probe_all_drivers(dev); - if (ret) { - if (ret > 0) - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); - goto fail; - } - - TAILQ_INSERT_TAIL(&devargs_list, devargs, next); - - TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); - return 0; - -fail: - free(devargs->args); - free(devargs); - free(dev); - return ret; -} - -static int -vdev_remove_driver(struct rte_vdev_device *dev) -{ - const char *name = rte_vdev_device_name(dev); - const struct rte_vdev_driver *driver; - - if (!dev->device.driver) { - RTE_LOG(DEBUG, EAL, "no driver attach to device %s\n", name); - return 1; - } - - driver = container_of(dev->device.driver, const struct rte_vdev_driver, - driver); - return driver->remove(dev); -} - -int -rte_vdev_uninit(const char *name) -{ - struct rte_vdev_device *dev; - struct rte_devargs *devargs; - int ret; - - if (name == NULL) - return -EINVAL; - - dev = find_vdev(name); - if (!dev) - return -ENOENT; - - devargs = dev->device.devargs; - - ret = vdev_remove_driver(dev); - if (ret) - return ret; - - TAILQ_REMOVE(&vdev_device_list, dev, next); - - TAILQ_REMOVE(&devargs_list, devargs, next); - - free(devargs->args); - free(devargs); - free(dev); - return 0; -} - -static int -vdev_scan(void) -{ - struct rte_vdev_device *dev; - struct rte_devargs *devargs; - - /* for virtual devices we scan the devargs_list populated via cmdline */ - TAILQ_FOREACH(devargs, &devargs_list, next) { - - if (devargs->bus != &rte_vdev_bus) - continue; - - dev = find_vdev(devargs->name); - if (dev) - continue; - - dev = calloc(1, sizeof(*dev)); - if (!dev) - return -1; - - dev->device.devargs = devargs; - dev->device.numa_node = SOCKET_ID_ANY; - dev->device.name = devargs->name; - - TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); - } - - return 0; -} - -static int -vdev_probe(void) -{ - struct rte_vdev_device *dev; - - /* call the init function for each virtual device */ - TAILQ_FOREACH(dev, &vdev_device_list, next) { - - if (dev->device.driver) - continue; - - if (vdev_probe_all_drivers(dev)) { - RTE_LOG(ERR, EAL, "failed to initialize %s device\n", - rte_vdev_device_name(dev)); - return -1; - } - } - - return 0; -} - -static struct rte_device * -vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, - const void *data) -{ - struct rte_vdev_device *dev; - - TAILQ_FOREACH(dev, &vdev_device_list, next) { - if (start && &dev->device == start) { - start = NULL; - continue; - } - if (cmp(&dev->device, data) == 0) - return &dev->device; - } - return NULL; -} - -static int -vdev_plug(struct rte_device *dev) -{ - return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev)); -} - -static int -vdev_unplug(struct rte_device *dev) -{ - return rte_vdev_uninit(dev->name); -} - -static struct rte_bus rte_vdev_bus = { - .scan = vdev_scan, - .probe = vdev_probe, - .find_device = vdev_find_device, - .plug = vdev_plug, - .unplug = vdev_unplug, - .parse = vdev_parse, -}; - -RTE_REGISTER_BUS(vdev, rte_vdev_bus); diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 7b7e8c88..fa6ccbec 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -65,7 +65,6 @@ struct internal_config { volatile unsigned force_nrank; /**< force number of ranks */ volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ unsigned hugepage_unlink; /**< true to unlink backing files */ - volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/ volatile unsigned no_pci; /**< true to disable PCI */ volatile unsigned no_hpet; /**< true to disable HPET */ volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping @@ -82,7 +81,7 @@ struct internal_config { volatile enum rte_intr_mode vfio_intr_mode; const char *hugefile_prefix; /**< the base filename of hugetlbfs files */ const char *hugepage_dir; /**< specific hugetlbfs directory to use */ - + const char *mbuf_pool_ops_name; /**< mbuf pool ops name */ unsigned num_hugepage_sizes; /**< how many sizes on this system */ struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES]; }; diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index 439a2610..30e6bb41 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -61,6 +61,8 @@ enum { OPT_LOG_LEVEL_NUM, #define OPT_MASTER_LCORE "master-lcore" OPT_MASTER_LCORE_NUM, +#define OPT_MBUF_POOL_OPS_NAME "mbuf-pool-ops-name" + OPT_MBUF_POOL_OPS_NAME_NUM, #define OPT_PROC_TYPE "proc-type" OPT_PROC_TYPE_NUM, #define OPT_NO_HPET "no-hpet" @@ -81,8 +83,6 @@ enum { OPT_VFIO_INTR_NUM, #define OPT_VMWARE_TSC_MAP "vmware-tsc-map" OPT_VMWARE_TSC_MAP_NUM, -#define OPT_XEN_DOM0 "xen-dom0" - OPT_XEN_DOM0_NUM, OPT_LONG_MAX_NUM }; diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 597d82e4..462226f1 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -35,8 +35,8 @@ #define _EAL_PRIVATE_H_ #include <stdbool.h> +#include <stdint.h> #include <stdio.h> -#include <rte_pci.h> /** * Initialize the memzone subsystem (private to eal). @@ -109,137 +109,6 @@ int rte_eal_timer_init(void); */ int rte_eal_log_init(const char *id, int facility); -struct rte_pci_driver; -struct rte_pci_device; - -/** - * Find the name of a PCI device. - */ -void pci_name_set(struct rte_pci_device *dev); - -/** - * Add a PCI device to the PCI Bus (append to PCI Device list). This function - * also updates the bus references of the PCI Device (and the generic device - * object embedded within. - * - * @param pci_dev - * PCI device to add - * @return void - */ -void rte_pci_add_device(struct rte_pci_device *pci_dev); - -/** - * Insert a PCI device in the PCI Bus at a particular location in the device - * list. It also updates the PCI Bus reference of the new devices to be - * inserted. - * - * @param exist_pci_dev - * Existing PCI device in PCI Bus - * @param new_pci_dev - * PCI device to be added before exist_pci_dev - * @return void - */ -void rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, - struct rte_pci_device *new_pci_dev); - -/** - * Remove a PCI device from the PCI Bus. This sets to NULL the bus references - * in the PCI device object as well as the generic device object. - * - * @param pci_device - * PCI device to be removed from PCI Bus - * @return void - */ -void rte_pci_remove_device(struct rte_pci_device *pci_device); - -/** - * Update a pci device object by asking the kernel for the latest information. - * - * This function is private to EAL. - * - * @param addr - * The PCI Bus-Device-Function address to look for - * @return - * - 0 on success. - * - negative on error. - */ -int pci_update_device(const struct rte_pci_addr *addr); - -/** - * Unbind kernel driver for this device - * - * This function is private to EAL. - * - * @return - * 0 on success, negative on error - */ -int pci_unbind_kernel_driver(struct rte_pci_device *dev); - -/** - * Map the PCI resource of a PCI device in virtual memory - * - * This function is private to EAL. - * - * @return - * 0 on success, negative on error - */ -int pci_uio_map_resource(struct rte_pci_device *dev); - -/** - * Unmap the PCI resource of a PCI device - * - * This function is private to EAL. - */ -void pci_uio_unmap_resource(struct rte_pci_device *dev); - -/** - * Allocate uio resource for PCI device - * - * This function is private to EAL. - * - * @param dev - * PCI device to allocate uio resource - * @param uio_res - * Pointer to uio resource. - * If the function returns 0, the pointer will be filled. - * @return - * 0 on success, negative on error - */ -int pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res); - -/** - * Free uio resource for PCI device - * - * This function is private to EAL. - * - * @param dev - * PCI device to free uio resource - * @param uio_res - * Pointer to uio resource. - */ -void pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res); - -/** - * Map device memory to uio resource - * - * This function is private to EAL. - * - * @param dev - * PCI device that has memory information. - * @param res_idx - * Memory resource index of the PCI device. - * @param uio_res - * uio resource that will keep mapping information. - * @param map_idx - * Mapping information index of the uio resource. - * @return - * 0 on success, negative on error - */ -int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx); - /** * Init tail queues for non-EAL library structures. This is to allow * the rings, mempools, etc. lists to be shared among multiple processes @@ -315,6 +184,17 @@ void set_tsc_freq(void); uint64_t get_tsc_freq(void); /** + * Get TSC frequency if the architecture supports. + * + * This function is private to the EAL. + * + * @return + * The number of TSC cycles in one second. + * Returns zero if the architecture support is not available. + */ +uint64_t get_tsc_freq_arch(void); + +/** * Prepare physical memory mapping * i.e. hugepages on Linux and * contigmem on BSD. @@ -333,17 +213,6 @@ int rte_eal_hugepage_init(void); int rte_eal_hugepage_attach(void); /** - * Returns true if the system is able to obtain - * physical addresses. Return false if using DMA - * addresses through an IOMMU. - * - * Drivers based on uio will not load unless physical - * addresses are obtainable. It is only possible to get - * physical addresses when running as a privileged user. - */ -bool rte_eal_using_phys_addrs(void); - -/** * Find a bus capable of identifying a device. * * @param str diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h index 782350d1..aa887a97 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -136,7 +136,7 @@ vgetq_lane_p64(poly64x2_t x, const int lane) #endif /* - * If (0 <= index <= 15), then call the ASIMD ext intruction on the + * If (0 <= index <= 15), then call the ASIMD ext instruction on the * 128 bit regs v0 and v1 with the appropriate index. * * Else returns a zero vector. diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h index 2e04c759..fb3abf18 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h @@ -81,7 +81,7 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) : "memory" ); /* no-clobber list */ #else asm volatile ( - "mov %%ebx, %%edi\n" + "xchgl %%ebx, %%edi;\n" MPLOCKED "cmpxchg8b (%[dst]);" "setz %[res];" diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h new file mode 100644 index 00000000..010d752c --- /dev/null +++ b/lib/librte_eal/common/include/rte_bitmap.h @@ -0,0 +1,561 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_BITMAP_H__ +#define __INCLUDE_RTE_BITMAP_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Bitmap + * + * The bitmap component provides a mechanism to manage large arrays of bits + * through bit get/set/clear and bit array scan operations. + * + * The bitmap scan operation is optimized for 64-bit CPUs using 64/128 byte cache + * lines. The bitmap is hierarchically organized using two arrays (array1 and + * array2), with each bit in array1 being associated with a full cache line + * (512/1024 bits) of bitmap bits, which are stored in array2: the bit in array1 + * is set only when there is at least one bit set within its associated array2 + * bits, otherwise the bit in array1 is cleared. The read and write operations + * for array1 and array2 are always done in slabs of 64 bits. + * + * This bitmap is not thread safe. For lock free operation on a specific bitmap + * instance, a single writer thread performing bit set/clear operations is + * allowed, only the writer thread can do bitmap scan operations, while there + * can be several reader threads performing bit get operations in parallel with + * the writer thread. When the use of locking primitives is acceptable, the + * serialization of the bit set/clear and bitmap scan operations needs to be + * enforced by the caller, while the bit get operation does not require locking + * the bitmap. + * + ***/ + +#include <string.h> +#include <rte_common.h> +#include <rte_debug.h> +#include <rte_memory.h> +#include <rte_branch_prediction.h> +#include <rte_prefetch.h> + +#ifndef RTE_BITMAP_OPTIMIZATIONS +#define RTE_BITMAP_OPTIMIZATIONS 1 +#endif + +/* Slab */ +#define RTE_BITMAP_SLAB_BIT_SIZE 64 +#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2 6 +#define RTE_BITMAP_SLAB_BIT_MASK (RTE_BITMAP_SLAB_BIT_SIZE - 1) + +/* Cache line (CL) */ +#define RTE_BITMAP_CL_BIT_SIZE (RTE_CACHE_LINE_SIZE * 8) +#define RTE_BITMAP_CL_BIT_SIZE_LOG2 (RTE_CACHE_LINE_SIZE_LOG2 + 3) +#define RTE_BITMAP_CL_BIT_MASK (RTE_BITMAP_CL_BIT_SIZE - 1) + +#define RTE_BITMAP_CL_SLAB_SIZE (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE) +#define RTE_BITMAP_CL_SLAB_SIZE_LOG2 (RTE_BITMAP_CL_BIT_SIZE_LOG2 - RTE_BITMAP_SLAB_BIT_SIZE_LOG2) +#define RTE_BITMAP_CL_SLAB_MASK (RTE_BITMAP_CL_SLAB_SIZE - 1) + +/** Bitmap data structure */ +struct rte_bitmap { + /* Context for array1 and array2 */ + uint64_t *array1; /**< Bitmap array1 */ + uint64_t *array2; /**< Bitmap array2 */ + uint32_t array1_size; /**< Number of 64-bit slabs in array1 that are actually used */ + uint32_t array2_size; /**< Number of 64-bit slabs in array2 */ + + /* Context for the "scan next" operation */ + uint32_t index1; /**< Bitmap scan: Index of current array1 slab */ + uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */ + uint32_t index2; /**< Bitmap scan: Index of current array2 slab */ + uint32_t go2; /**< Bitmap scan: Go/stop condition for current array2 cache line */ + + /* Storage space for array1 and array2 */ + uint8_t memory[]; +}; + +static inline void +__rte_bitmap_index1_inc(struct rte_bitmap *bmp) +{ + bmp->index1 = (bmp->index1 + 1) & (bmp->array1_size - 1); +} + +static inline uint64_t +__rte_bitmap_mask1_get(struct rte_bitmap *bmp) +{ + return (~1lu) << bmp->offset1; +} + +static inline void +__rte_bitmap_index2_set(struct rte_bitmap *bmp) +{ + bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2); +} + +#if RTE_BITMAP_OPTIMIZATIONS + +static inline int +rte_bsf64(uint64_t slab, uint32_t *pos) +{ + if (likely(slab == 0)) { + return 0; + } + + *pos = __builtin_ctzll(slab); + return 1; +} + +#else + +static inline int +rte_bsf64(uint64_t slab, uint32_t *pos) +{ + uint64_t mask; + uint32_t i; + + if (likely(slab == 0)) { + return 0; + } + + for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) { + if (unlikely(slab & mask)) { + *pos = i; + return 1; + } + } + + return 0; +} + +#endif + +static inline uint32_t +__rte_bitmap_get_memory_footprint(uint32_t n_bits, + uint32_t *array1_byte_offset, uint32_t *array1_slabs, + uint32_t *array2_byte_offset, uint32_t *array2_slabs) +{ + uint32_t n_slabs_context, n_slabs_array1, n_cache_lines_context_and_array1; + uint32_t n_cache_lines_array2; + uint32_t n_bytes_total; + + n_cache_lines_array2 = (n_bits + RTE_BITMAP_CL_BIT_SIZE - 1) / RTE_BITMAP_CL_BIT_SIZE; + n_slabs_array1 = (n_cache_lines_array2 + RTE_BITMAP_SLAB_BIT_SIZE - 1) / RTE_BITMAP_SLAB_BIT_SIZE; + n_slabs_array1 = rte_align32pow2(n_slabs_array1); + n_slabs_context = (sizeof(struct rte_bitmap) + (RTE_BITMAP_SLAB_BIT_SIZE / 8) - 1) / (RTE_BITMAP_SLAB_BIT_SIZE / 8); + n_cache_lines_context_and_array1 = (n_slabs_context + n_slabs_array1 + RTE_BITMAP_CL_SLAB_SIZE - 1) / RTE_BITMAP_CL_SLAB_SIZE; + n_bytes_total = (n_cache_lines_context_and_array1 + n_cache_lines_array2) * RTE_CACHE_LINE_SIZE; + + if (array1_byte_offset) { + *array1_byte_offset = n_slabs_context * (RTE_BITMAP_SLAB_BIT_SIZE / 8); + } + if (array1_slabs) { + *array1_slabs = n_slabs_array1; + } + if (array2_byte_offset) { + *array2_byte_offset = n_cache_lines_context_and_array1 * RTE_CACHE_LINE_SIZE; + } + if (array2_slabs) { + *array2_slabs = n_cache_lines_array2 * RTE_BITMAP_CL_SLAB_SIZE; + } + + return n_bytes_total; +} + +static inline void +__rte_bitmap_scan_init(struct rte_bitmap *bmp) +{ + bmp->index1 = bmp->array1_size - 1; + bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1; + __rte_bitmap_index2_set(bmp); + bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE; + + bmp->go2 = 0; +} + +/** + * Bitmap memory footprint calculation + * + * @param n_bits + * Number of bits in the bitmap + * @return + * Bitmap memory footprint measured in bytes on success, 0 on error + */ +static inline uint32_t +rte_bitmap_get_memory_footprint(uint32_t n_bits) { + /* Check input arguments */ + if (n_bits == 0) { + return 0; + } + + return __rte_bitmap_get_memory_footprint(n_bits, NULL, NULL, NULL, NULL); +} + +/** + * Bitmap initialization + * + * @param mem_size + * Minimum expected size of bitmap. + * @param mem + * Base address of array1 and array2. + * @param n_bits + * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512. + * @return + * Handle to bitmap instance. + */ +static inline struct rte_bitmap * +rte_bitmap_init(uint32_t n_bits, uint8_t *mem, uint32_t mem_size) +{ + struct rte_bitmap *bmp; + uint32_t array1_byte_offset, array1_slabs, array2_byte_offset, array2_slabs; + uint32_t size; + + /* Check input arguments */ + if (n_bits == 0) { + return NULL; + } + + if ((mem == NULL) || (((uintptr_t) mem) & RTE_CACHE_LINE_MASK)) { + return NULL; + } + + size = __rte_bitmap_get_memory_footprint(n_bits, + &array1_byte_offset, &array1_slabs, + &array2_byte_offset, &array2_slabs); + if (size < mem_size) { + return NULL; + } + + /* Setup bitmap */ + memset(mem, 0, size); + bmp = (struct rte_bitmap *) mem; + + bmp->array1 = (uint64_t *) &mem[array1_byte_offset]; + bmp->array1_size = array1_slabs; + bmp->array2 = (uint64_t *) &mem[array2_byte_offset]; + bmp->array2_size = array2_slabs; + + __rte_bitmap_scan_init(bmp); + + return bmp; +} + +/** + * Bitmap free + * + * @param bmp + * Handle to bitmap instance + * @return + * 0 upon success, error code otherwise + */ +static inline int +rte_bitmap_free(struct rte_bitmap *bmp) +{ + /* Check input arguments */ + if (bmp == NULL) { + return -1; + } + + return 0; +} + +/** + * Bitmap reset + * + * @param bmp + * Handle to bitmap instance + */ +static inline void +rte_bitmap_reset(struct rte_bitmap *bmp) +{ + memset(bmp->array1, 0, bmp->array1_size * sizeof(uint64_t)); + memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t)); + __rte_bitmap_scan_init(bmp); +} + +/** + * Bitmap location prefetch into CPU L1 cache + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + * @return + * 0 upon success, error code otherwise + */ +static inline void +rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab2; + uint32_t index2; + + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + slab2 = bmp->array2 + index2; + rte_prefetch0((void *) slab2); +} + +/** + * Bitmap bit get + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + * @return + * 0 when bit is cleared, non-zero when bit is set + */ +static inline uint64_t +rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab2; + uint32_t index2, offset2; + + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + return (*slab2) & (1lu << offset2); +} + +/** + * Bitmap bit set + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + */ +static inline void +rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1, offset2; + + /* Set bit in array2 slab and set bit in array1 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + slab1 = bmp->array1 + index1; + + *slab2 |= 1lu << offset2; + *slab1 |= 1lu << offset1; +} + +/** + * Bitmap slab set + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position identifying the array2 slab + * @param slab + * Value to be assigned to the 64-bit slab in array2 + */ +static inline void +rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1; + + /* Set bits in array2 slab and set bit in array1 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + slab1 = bmp->array1 + index1; + + *slab2 |= slab; + *slab1 |= 1lu << offset1; +} + +static inline uint64_t +__rte_bitmap_line_not_empty(uint64_t *slab2) +{ + uint64_t v1, v2, v3, v4; + + v1 = slab2[0] | slab2[1]; + v2 = slab2[2] | slab2[3]; + v3 = slab2[4] | slab2[5]; + v4 = slab2[6] | slab2[7]; + v1 |= v2; + v3 |= v4; + + return v1 | v3; +} + +/** + * Bitmap bit clear + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + */ +static inline void +rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1, offset2; + + /* Clear bit in array2 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + + /* Return if array2 slab is not all-zeros */ + *slab2 &= ~(1lu << offset2); + if (*slab2){ + return; + } + + /* Check the entire cache line of array2 for all-zeros */ + index2 &= ~ RTE_BITMAP_CL_SLAB_MASK; + slab2 = bmp->array2 + index2; + if (__rte_bitmap_line_not_empty(slab2)) { + return; + } + + /* The array2 cache line is all-zeros, so clear bit in array1 slab */ + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab1 = bmp->array1 + index1; + *slab1 &= ~(1lu << offset1); + + return; +} + +static inline int +__rte_bitmap_scan_search(struct rte_bitmap *bmp) +{ + uint64_t value1; + uint32_t i; + + /* Check current array1 slab */ + value1 = bmp->array1[bmp->index1]; + value1 &= __rte_bitmap_mask1_get(bmp); + + if (rte_bsf64(value1, &bmp->offset1)) { + return 1; + } + + __rte_bitmap_index1_inc(bmp); + bmp->offset1 = 0; + + /* Look for another array1 slab */ + for (i = 0; i < bmp->array1_size; i ++, __rte_bitmap_index1_inc(bmp)) { + value1 = bmp->array1[bmp->index1]; + + if (rte_bsf64(value1, &bmp->offset1)) { + return 1; + } + } + + return 0; +} + +static inline void +__rte_bitmap_scan_read_init(struct rte_bitmap *bmp) +{ + __rte_bitmap_index2_set(bmp); + bmp->go2 = 1; + rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8)); +} + +static inline int +__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab) +{ + uint64_t *slab2; + + slab2 = bmp->array2 + bmp->index2; + for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) { + if (*slab2) { + *pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + *slab = *slab2; + + bmp->index2 ++; + slab2 ++; + bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK; + return 1; + } + } + + return 0; +} + +/** + * Bitmap scan (with automatic wrap-around) + * + * @param bmp + * Handle to bitmap instance + * @param pos + * When function call returns 1, pos contains the position of the next set + * bit, otherwise not modified + * @param slab + * When function call returns 1, slab contains the value of the entire 64-bit + * slab where the bit indicated by pos is located. Slabs are always 64-bit + * aligned, so the position of the first bit of the slab (this bit is not + * necessarily set) is pos / 64. Once a slab has been returned by the bitmap + * scan operation, the internal pointers of the bitmap are updated to point + * after this slab, so the same slab will not be returned again if it + * contains more than one bit which is set. When function call returns 0, + * slab is not modified. + * @return + * 0 if there is no bit set in the bitmap, 1 otherwise + */ +static inline int +rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab) +{ + /* Return data from current array2 line if available */ + if (__rte_bitmap_scan_read(bmp, pos, slab)) { + return 1; + } + + /* Look for non-empty array2 line */ + if (__rte_bitmap_scan_search(bmp)) { + __rte_bitmap_scan_read_init(bmp); + __rte_bitmap_scan_read(bmp, pos, slab); + return 1; + } + + /* Empty bitmap */ + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_BITMAP_H__ */ diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h index c79368d3..6fb08341 100644 --- a/lib/librte_eal/common/include/rte_bus.h +++ b/lib/librte_eal/common/include/rte_bus.h @@ -55,6 +55,21 @@ extern "C" { /** Double linked list of buses */ TAILQ_HEAD(rte_bus_list, rte_bus); + +/** + * IOVA mapping mode. + * + * IOVA mapping mode is iommu programming mode of a device. + * That device (for example: IOMMU backed DMA device) based + * on rte_iova_mode will generate physical or virtual address. + * + */ +enum rte_iova_mode { + RTE_IOVA_DC = 0, /* Don't care mode */ + RTE_IOVA_PA = (1 << 0), /* DMA using physical address */ + RTE_IOVA_VA = (1 << 1) /* DMA using virtual address */ +}; + /** * Bus specific scan for devices attached on the bus. * For each bus object, the scan would be responsible for finding devices and @@ -168,6 +183,20 @@ struct rte_bus_conf { enum rte_bus_scan_mode scan_mode; /**< Scan policy. */ }; + +/** + * Get common iommu class of the all the devices on the bus. The bus may + * check that those devices are attached to iommu driver. + * If no devices are attached to the bus. The bus may return with don't care + * (_DC) value. + * Otherwise, The bus will return appropriate _pa or _va iova mode. + * + * @return + * enum rte_iova_mode value. + */ +typedef enum rte_iova_mode (*rte_bus_get_iommu_class_t)(void); + + /** * A structure describing a generic bus. */ @@ -181,6 +210,7 @@ struct rte_bus { rte_bus_unplug_t unplug; /**< Remove single device from driver */ rte_bus_parse_t parse; /**< Parse a device name */ struct rte_bus_conf conf; /**< Bus configuration */ + rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */ }; /** @@ -280,12 +310,22 @@ struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev); */ struct rte_bus *rte_bus_find_by_name(const char *busname); + +/** + * Get the common iommu class of devices bound on to buses available in the + * system. The default mode is PA. + * + * @return + * enum rte_iova_mode value. + */ +enum rte_iova_mode rte_bus_get_iommu_class(void); + /** * Helper for Bus registration. * The constructor has higher priority than PMD constructors. */ #define RTE_REGISTER_BUS(nm, bus) \ -RTE_INIT_PRIO(businitfn_ ##nm, 101); \ +RTE_INIT_PRIO(businitfn_ ##nm, 110); \ static void businitfn_ ##nm(void) \ {\ (bus).name = RTE_STR(nm);\ diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index 1afc66e3..de853e16 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -109,6 +109,29 @@ typedef uint16_t unaligned_uint16_t; #define RTE_SET_USED(x) (void)(x) /** + * Run function before main() with low priority. + * + * The constructor will be run after prioritized constructors. + * + * @param func + * Constructor function. + */ +#define RTE_INIT(func) \ +static void __attribute__((constructor, used)) func(void) + +/** + * Run function before main() with high priority. + * + * @param func + * Constructor function. + * @param prio + * Priority number must be above 100. + * Lowest number is the first to run. + */ +#define RTE_INIT_PRIO(func, prio) \ +static void __attribute__((constructor(prio), used)) func(void) + +/** * Force a function to be inlined */ #define __rte_always_inline inline __attribute__((always_inline)) diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h index cab6fb4c..79b67b3e 100644 --- a/lib/librte_eal/common/include/rte_debug.h +++ b/lib/librte_eal/common/include/rte_debug.h @@ -79,7 +79,7 @@ void rte_dump_registers(void); #define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy") #define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__) -#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG +#ifdef RTE_ENABLE_ASSERT #define RTE_ASSERT(exp) RTE_VERIFY(exp) #else #define RTE_ASSERT(exp) do {} while (0) diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index 5386d3a2..9342e0cb 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -49,7 +49,6 @@ extern "C" { #include <stdio.h> #include <sys/queue.h> -#include <rte_config.h> #include <rte_log.h> __attribute__((format(printf, 2, 0))) @@ -152,7 +151,11 @@ struct rte_driver { const char *alias; /**< Driver alias. */ }; -#define RTE_DEV_NAME_MAX_LEN (32) +/* + * Internal identifier length + * Sufficiently large to allow for UUID or PCI address + */ +#define RTE_DEV_NAME_MAX_LEN 64 /** * A structure describing a generic device. @@ -166,28 +169,6 @@ struct rte_device { }; /** - * Initialize a driver specified by name. - * - * @param name - * The pointer to a driver name to be initialized. - * @param args - * The pointer to arguments used by driver initialization. - * @return - * 0 on success, negative on error - */ -int rte_vdev_init(const char *name, const char *args); - -/** - * Uninitalize a driver specified by name. - * - * @param name - * The pointer to a driver name to be initialized. - * @return - * 0 on success, negative on error - */ -int rte_vdev_uninit(const char *name); - -/** * Attach a device to a registered driver. * * @param name @@ -312,4 +293,4 @@ __attribute__((used)) = str } #endif -#endif /* _RTE_VDEV_H_ */ +#endif /* _RTE_DEV_H_ */ diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h index 0e7363d7..09b66819 100644 --- a/lib/librte_eal/common/include/rte_eal.h +++ b/lib/librte_eal/common/include/rte_eal.h @@ -44,7 +44,9 @@ #include <sched.h> #include <rte_per_lcore.h> -#include <rte_config.h> +#include <rte_bus.h> + +#include <rte_pci_dev_feature_defs.h> #ifdef __cplusplus extern "C" { @@ -87,6 +89,9 @@ struct rte_config { /** Primary or secondary configuration */ enum rte_proc_type_t process_type; + /** PA or VA mapping mode */ + enum rte_iova_mode iova_mode; + /** * Pointer to memory configuration, which may be shared across multiple * DPDK instances @@ -264,6 +269,32 @@ rte_set_application_usage_hook(rte_usage_hook_t usage_func); int rte_eal_has_hugepages(void); /** + * Whether EAL is using PCI bus. + * Disabled by --no-pci option. + * + * @return + * Nonzero if the PCI bus is enabled. + */ +int rte_eal_has_pci(void); + +/** + * Whether the EAL was asked to create UIO device. + * + * @return + * Nonzero if true. + */ +int rte_eal_create_uio_dev(void); + +/** + * The user-configured vfio interrupt mode. + * + * @return + * Interrupt mode configured with the command line, + * RTE_INTR_MODE_NONE by default. + */ +enum rte_intr_mode rte_eal_vfio_intr_mode(void); + +/** * A wrap API for syscall gettid. * * @return @@ -287,11 +318,22 @@ static inline int rte_gettid(void) return RTE_PER_LCORE(_thread_id); } -#define RTE_INIT(func) \ -static void __attribute__((constructor, used)) func(void) +/** + * Get the iova mode + * + * @return + * enum rte_iova_mode value. + */ +enum rte_iova_mode rte_eal_iova_mode(void); -#define RTE_INIT_PRIO(func, prio) \ -static void __attribute__((constructor(prio), used)) func(void) +/** + * Get default pool ops name for mbuf + * + * @return + * returns default pool ops name. + */ +const char * +rte_eal_mbuf_default_mempool_ops(void); #ifdef __cplusplus } diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h index 6daffebf..031f78cc 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_eal_interrupts.h @@ -35,15 +35,26 @@ #error "don't include this file directly, please include generic <rte_interrupts.h>" #endif -#ifndef _RTE_LINUXAPP_INTERRUPTS_H_ -#define _RTE_LINUXAPP_INTERRUPTS_H_ +/** + * @file rte_eal_interrupts.h + * @internal + * + * Contains function prototypes exposed by the EAL for interrupt handling by + * drivers and other DPDK internal consumers. + */ + +#ifndef _RTE_EAL_INTERRUPTS_H_ +#define _RTE_EAL_INTERRUPTS_H_ #define RTE_MAX_RXTX_INTR_VEC_ID 32 #define RTE_INTR_VEC_ZERO_OFFSET 0 #define RTE_INTR_VEC_RXTX_OFFSET 1 +/** + * The interrupt source type, e.g. UIO, VFIO, ALARM etc. + */ enum rte_intr_handle_type { - RTE_INTR_HANDLE_UNKNOWN = 0, + RTE_INTR_HANDLE_UNKNOWN = 0, /**< generic unknown handle */ RTE_INTR_HANDLE_UIO, /**< uio device handle */ RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */ RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ @@ -52,7 +63,7 @@ enum rte_intr_handle_type { RTE_INTR_HANDLE_ALARM, /**< alarm handle */ RTE_INTR_HANDLE_EXT, /**< external handler */ RTE_INTR_HANDLE_VDEV, /**< virtual device */ - RTE_INTR_HANDLE_MAX + RTE_INTR_HANDLE_MAX /**< count of elements */ }; #define RTE_INTR_EVENT_ADD 1UL @@ -86,13 +97,13 @@ struct rte_intr_handle { RTE_STD_C11 union { int vfio_dev_fd; /**< VFIO device file descriptor */ - int uio_cfg_fd; /**< UIO config file descriptor - for uio_pci_generic */ + int uio_cfg_fd; /**< UIO cfg file desc for uio_pci_generic */ }; int fd; /**< interrupt event file descriptor */ enum rte_intr_handle_type type; /**< handle type */ uint32_t max_intr; /**< max interrupt requested */ uint32_t nb_efd; /**< number of available efd(event fd) */ + uint8_t efd_counter_size; /**< size of efd counter, used for vdev */ int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */ struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vector epoll event */ @@ -236,4 +247,4 @@ rte_intr_allow_others(struct rte_intr_handle *intr_handle); int rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); -#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */ +#endif /* _RTE_EAL_INTERRUPTS_H_ */ diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index 5d06ed79..43177c7a 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -53,7 +53,7 @@ struct rte_intr_handle; /** Function to be registered for the specific interrupt */ typedef void (*rte_intr_callback_fn)(void *cb_arg); -#include <exec-env/rte_interrupts.h> +#include "rte_eal_interrupts.h" /** * It registers the callback for the specific interrupt. Multiple diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h index 50e0d0fe..c89e6bab 100644 --- a/lib/librte_eal/common/include/rte_lcore.h +++ b/lib/librte_eal/common/include/rte_lcore.h @@ -262,6 +262,20 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp); */ int rte_thread_setname(pthread_t id, const char *name); +/** + * Test if the core supplied has a specific role + * + * @param lcore_id + * The identifier of the lcore, which MUST be between 0 and + * RTE_MAX_LCORE-1. + * @param role + * The role to be checked against. + * @return + * On success, return 0; otherwise return a negative value. + */ +int +rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h index ec8dba79..16564d41 100644 --- a/lib/librte_eal/common/include/rte_log.h +++ b/lib/librte_eal/common/include/rte_log.h @@ -87,6 +87,7 @@ extern struct rte_logs rte_logs; #define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */ #define RTE_LOGTYPE_EFD 18 /**< Log related to EFD. */ #define RTE_LOGTYPE_EVENTDEV 19 /**< Log related to eventdev. */ +#define RTE_LOGTYPE_GSO 20 /**< Log related to GSO. */ /* these log types can be used in an application */ #define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */ @@ -138,12 +139,6 @@ int rte_openlog_stream(FILE *f); void rte_log_set_global_level(uint32_t level); /** - * Deprecated, replaced by rte_log_set_global_level(). - */ -__rte_deprecated -void rte_set_log_level(uint32_t level); - -/** * Get the global log level. * * @return @@ -152,29 +147,6 @@ void rte_set_log_level(uint32_t level); uint32_t rte_log_get_global_level(void); /** - * Deprecated, replaced by rte_log_get_global_level(). - */ -__rte_deprecated -uint32_t rte_get_log_level(void); - -/** - * Enable or disable the log type. - * - * @param type - * Log type, for example, RTE_LOGTYPE_EAL. - * @param enable - * True for enable; false for disable. - */ -__rte_deprecated -void rte_set_log_type(uint32_t type, int enable); - -/** - * Get the global log type. - */ -__rte_deprecated -uint32_t rte_get_log_type(void); - -/** * Get the log level for a given type. * * @param logtype diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h index 3d37f79b..5d4c11a7 100644 --- a/lib/librte_eal/common/include/rte_malloc.h +++ b/lib/librte_eal/common/include/rte_malloc.h @@ -323,17 +323,24 @@ int rte_malloc_set_limit(const char *type, size_t max); /** - * Return the physical address of a virtual address obtained through + * Return the IO address of a virtual address obtained through * rte_malloc * * @param addr * Address obtained from a previous rte_malloc call * @return - * RTE_BAD_PHYS_ADDR on error - * otherwise return physical address of the buffer + * RTE_BAD_IOVA on error + * otherwise return an address suitable for IO */ -phys_addr_t -rte_malloc_virt2phy(const void *addr); +rte_iova_t +rte_malloc_virt2iova(const void *addr); + +__rte_deprecated +static inline phys_addr_t +rte_malloc_virt2phy(const void *addr) +{ + return rte_malloc_virt2iova(addr); +} #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 4aa5d1f7..14aacea5 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -44,12 +44,6 @@ #include <stddef.h> #include <stdio.h> -#include <rte_config.h> - -#ifdef RTE_EXEC_ENV_LINUXAPP -#include <exec-env/rte_dom0_common.h> -#endif - #ifdef __cplusplus extern "C" { #endif @@ -98,14 +92,27 @@ enum rte_page_sizes { */ #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE) -typedef uint64_t phys_addr_t; /**< Physical address definition. */ +typedef uint64_t phys_addr_t; /**< Physical address. */ #define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1) +/** + * IO virtual address type. + * When the physical addressing mode (IOVA as PA) is in use, + * the translation from an IO virtual address (IOVA) to a physical address + * is a direct mapping, i.e. the same value. + * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation. + */ +typedef uint64_t rte_iova_t; +#define RTE_BAD_IOVA ((rte_iova_t)-1) /** * Physical memory segment descriptor. */ struct rte_memseg { - phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 + union { + phys_addr_t phys_addr; /**< deprecated - Start physical address. */ + rte_iova_t iova; /**< Start IO address. */ + }; RTE_STD_C11 union { void *addr; /**< Start virtual address. */ @@ -116,10 +123,6 @@ struct rte_memseg { int32_t socket_id; /**< NUMA socket ID. */ uint32_t nchannel; /**< Number of channels. */ uint32_t nrank; /**< Number of ranks. */ -#ifdef RTE_LIBRTE_XEN_DOM0 - /**< store segment MFNs */ - uint64_t mfn[DOM0_NUM_MEMBLOCK]; -#endif } __rte_packed; /** @@ -140,11 +143,21 @@ int rte_mem_lock_page(const void *virt); * @param virt * The virtual address. * @return - * The physical address or RTE_BAD_PHYS_ADDR on error. + * The physical address or RTE_BAD_IOVA on error. */ phys_addr_t rte_mem_virt2phy(const void *virt); /** + * Get IO virtual address of any mapped virtual address in the current process. + * + * @param virt + * The virtual address. + * @return + * The IO address or RTE_BAD_IOVA on error. + */ +rte_iova_t rte_mem_virt2iova(const void *virt); + +/** * Get the layout of the available physical memory. * * It can be useful for an application to have the full physical @@ -195,68 +208,16 @@ unsigned rte_memory_get_nchannel(void); */ unsigned rte_memory_get_nrank(void); -#ifdef RTE_LIBRTE_XEN_DOM0 - -/**< Internal use only - should DOM0 memory mapping be used */ -int rte_xen_dom0_supported(void); - -/**< Internal use only - phys to virt mapping for xen */ -phys_addr_t rte_xen_mem_phy2mch(int32_t, const phys_addr_t); - /** - * Return the physical address of elt, which is an element of the pool mp. - * - * @param memseg_id - * Identifier of the memory segment owning the physical address. If - * set to -1, find it automatically. - * @param phy_addr - * physical address of elt. - * - * @return - * The physical address or RTE_BAD_PHYS_ADDR on error. - */ -static inline phys_addr_t -rte_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr) -{ - if (rte_xen_dom0_supported()) - return rte_xen_mem_phy2mch(memseg_id, phy_addr); - else - return phy_addr; -} - -/** - * Memory init for supporting application running on Xen domain0. - * - * @param void + * Drivers based on uio will not load unless physical + * addresses are obtainable. It is only possible to get + * physical addresses when running as a privileged user. * * @return - * 0: successfully - * negative: error + * 1 if the system is able to obtain physical addresses. + * 0 if using DMA addresses through an IOMMU. */ -int rte_xen_dom0_memory_init(void); - -/** - * Attach to memory setments of primary process on Xen domain0. - * - * @param void - * - * @return - * 0: successfully - * negative: error - */ -int rte_xen_dom0_memory_attach(void); -#else -static inline int rte_xen_dom0_supported(void) -{ - return 0; -} - -static inline phys_addr_t -rte_mem_phy2mch(int32_t memseg_id __rte_unused, const phys_addr_t phy_addr) -{ - return phy_addr; -} -#endif +int rte_eal_using_phys_addrs(void); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h index 1d0827f4..6f0ba182 100644 --- a/lib/librte_eal/common/include/rte_memzone.h +++ b/lib/librte_eal/common/include/rte_memzone.h @@ -78,7 +78,11 @@ struct rte_memzone { #define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/ char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */ - phys_addr_t phys_addr; /**< Start physical address. */ + RTE_STD_C11 + union { + phys_addr_t phys_addr; /**< deprecated - Start physical address. */ + rte_iova_t iova; /**< Start IO address. */ + }; RTE_STD_C11 union { void *addr; /**< Start virtual address. */ diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h deleted file mode 100644 index 8b123391..00000000 --- a/lib/librte_eal/common/include/rte_pci.h +++ /dev/null @@ -1,598 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * Copyright 2013-2014 6WIND S.A. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_PCI_H_ -#define _RTE_PCI_H_ - -/** - * @file - * - * RTE PCI Interface - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include <stdio.h> -#include <stdlib.h> -#include <limits.h> -#include <errno.h> -#include <sys/queue.h> -#include <stdint.h> -#include <inttypes.h> - -#include <rte_debug.h> -#include <rte_interrupts.h> -#include <rte_dev.h> -#include <rte_bus.h> - -/** Pathname of PCI devices directory. */ -const char *pci_get_sysfs_path(void); - -/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ -#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 -#define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X") - -/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ -#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 - -/** Nb. of values in PCI device identifier format string. */ -#define PCI_FMT_NVAL 4 - -/** Nb. of values in PCI resource format. */ -#define PCI_RESOURCE_FMT_NVAL 3 - -/** Maximum number of PCI resources. */ -#define PCI_MAX_RESOURCE 6 - -/* Forward declarations */ -struct rte_pci_device; -struct rte_pci_driver; - -/** List of PCI devices */ -TAILQ_HEAD(rte_pci_device_list, rte_pci_device); -/** List of PCI drivers */ -TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver); - -/* PCI Bus iterators */ -#define FOREACH_DEVICE_ON_PCIBUS(p) \ - TAILQ_FOREACH(p, &(rte_pci_bus.device_list), next) - -#define FOREACH_DRIVER_ON_PCIBUS(p) \ - TAILQ_FOREACH(p, &(rte_pci_bus.driver_list), next) - -/** - * A structure describing an ID for a PCI driver. Each driver provides a - * table of these IDs for each device that it supports. - */ -struct rte_pci_id { - uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */ - uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ - uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ - uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */ - uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */ -}; - -/** - * A structure describing the location of a PCI device. - */ -struct rte_pci_addr { - uint32_t domain; /**< Device domain */ - uint8_t bus; /**< Device bus */ - uint8_t devid; /**< Device ID */ - uint8_t function; /**< Device function. */ -}; - -struct rte_devargs; - -/** - * A structure describing a PCI device. - */ -struct rte_pci_device { - TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ - struct rte_device device; /**< Inherit core device */ - struct rte_pci_addr addr; /**< PCI location. */ - struct rte_pci_id id; /**< PCI ID. */ - struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; - /**< PCI Memory Resource */ - struct rte_intr_handle intr_handle; /**< Interrupt handle */ - struct rte_pci_driver *driver; /**< Associated driver */ - uint16_t max_vfs; /**< sriov enable if not zero */ - enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ - char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */ -}; - -/** - * @internal - * Helper macro for drivers that need to convert to struct rte_pci_device. - */ -#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device) - -/** Any PCI device identifier (vendor, device, ...) */ -#define PCI_ANY_ID (0xffff) -#define RTE_CLASS_ANY_ID (0xffffff) - -#ifdef __cplusplus -/** C++ macro used to help building up tables of device IDs */ -#define RTE_PCI_DEVICE(vend, dev) \ - RTE_CLASS_ANY_ID, \ - (vend), \ - (dev), \ - PCI_ANY_ID, \ - PCI_ANY_ID -#else -/** Macro used to help building up tables of device IDs */ -#define RTE_PCI_DEVICE(vend, dev) \ - .class_id = RTE_CLASS_ANY_ID, \ - .vendor_id = (vend), \ - .device_id = (dev), \ - .subsystem_vendor_id = PCI_ANY_ID, \ - .subsystem_device_id = PCI_ANY_ID -#endif - -/** - * Initialisation function for the driver called during PCI probing. - */ -typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *); - -/** - * Uninitialisation function for the driver called during hotplugging. - */ -typedef int (pci_remove_t)(struct rte_pci_device *); - -/** - * A structure describing a PCI driver. - */ -struct rte_pci_driver { - TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ - struct rte_driver driver; /**< Inherit core driver. */ - struct rte_pci_bus *bus; /**< PCI bus reference. */ - pci_probe_t *probe; /**< Device Probe function. */ - pci_remove_t *remove; /**< Device Remove function. */ - const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ - uint32_t drv_flags; /**< Flags contolling handling of device. */ -}; - -/** - * Structure describing the PCI bus - */ -struct rte_pci_bus { - struct rte_bus bus; /**< Inherit the generic class */ - struct rte_pci_device_list device_list; /**< List of PCI devices */ - struct rte_pci_driver_list driver_list; /**< List of PCI drivers */ -}; - -/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ -#define RTE_PCI_DRV_NEED_MAPPING 0x0001 -/** Device driver supports link state interrupt */ -#define RTE_PCI_DRV_INTR_LSC 0x0008 -/** Device driver supports device removal interrupt */ -#define RTE_PCI_DRV_INTR_RMV 0x0010 -/** Device driver needs to keep mapped resources if unsupported dev detected */ -#define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020 - -/** - * A structure describing a PCI mapping. - */ -struct pci_map { - void *addr; - char *path; - uint64_t offset; - uint64_t size; - uint64_t phaddr; -}; - -/** - * A structure describing a mapped PCI resource. - * For multi-process we need to reproduce all PCI mappings in secondary - * processes, so save them in a tailq. - */ -struct mapped_pci_resource { - TAILQ_ENTRY(mapped_pci_resource) next; - - struct rte_pci_addr pci_addr; - char path[PATH_MAX]; - int nb_maps; - struct pci_map maps[PCI_MAX_RESOURCE]; -}; - -/** mapped pci device list */ -TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); - -/**< Internal use only - Macro used by pci addr parsing functions **/ -#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \ -do { \ - unsigned long val; \ - char *end; \ - errno = 0; \ - val = strtoul((in), &end, 16); \ - if (errno != 0 || end[0] != (dlm) || val > (lim)) \ - return -EINVAL; \ - (fd) = (typeof (fd))val; \ - (in) = end + 1; \ -} while(0) - -/** - * Utility function to produce a PCI Bus-Device-Function value - * given a string representation. Assumes that the BDF is provided without - * a domain prefix (i.e. domain returned is always 0) - * - * @param input - * The input string to be parsed. Should have the format XX:XX.X - * @param dev_addr - * The PCI Bus-Device-Function address to be returned. Domain will always be - * returned as 0 - * @return - * 0 on success, negative on error. - */ -static inline int -eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr) -{ - dev_addr->domain = 0; - GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); - GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); - GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); - return 0; -} - -/** - * Utility function to produce a PCI Bus-Device-Function value - * given a string representation. Assumes that the BDF is provided including - * a domain prefix. - * - * @param input - * The input string to be parsed. Should have the format XXXX:XX:XX.X - * @param dev_addr - * The PCI Bus-Device-Function address to be returned - * @return - * 0 on success, negative on error. - */ -static inline int -eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) -{ - GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':'); - GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); - GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); - GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); - return 0; -} -#undef GET_PCIADDR_FIELD - -/** - * Utility function to write a pci device name, this device name can later be - * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_* - * BDF helpers. - * - * @param addr - * The PCI Bus-Device-Function address - * @param output - * The output buffer string - * @param size - * The output buffer size - */ -static inline void -rte_pci_device_name(const struct rte_pci_addr *addr, - char *output, size_t size) -{ - RTE_VERIFY(size >= PCI_PRI_STR_SIZE); - RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT, - addr->domain, addr->bus, - addr->devid, addr->function) >= 0); -} - -/* Compare two PCI device addresses. */ -/** - * Utility function to compare two PCI device addresses. - * - * @param addr - * The PCI Bus-Device-Function address to compare - * @param addr2 - * The PCI Bus-Device-Function address to compare - * @return - * 0 on equal PCI address. - * Positive on addr is greater than addr2. - * Negative on addr is less than addr2, or error. - */ -static inline int -rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, - const struct rte_pci_addr *addr2) -{ - uint64_t dev_addr, dev_addr2; - - if ((addr == NULL) || (addr2 == NULL)) - return -1; - - dev_addr = ((uint64_t)addr->domain << 24) | - (addr->bus << 16) | (addr->devid << 8) | addr->function; - dev_addr2 = ((uint64_t)addr2->domain << 24) | - (addr2->bus << 16) | (addr2->devid << 8) | addr2->function; - - if (dev_addr > dev_addr2) - return 1; - else if (dev_addr < dev_addr2) - return -1; - else - return 0; -} - -/** - * Scan the content of the PCI bus, and the devices in the devices - * list - * - * @return - * 0 on success, negative on error - */ -int rte_pci_scan(void); - -/** - * Probe the PCI bus - * - * @return - * - 0 on success. - * - !0 on error. - */ -int -rte_pci_probe(void); - -/** - * Map the PCI device resources in user space virtual memory address - * - * Note that driver should not call this function when flag - * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for - * you when it's on. - * - * @param dev - * A pointer to a rte_pci_device structure describing the device - * to use - * - * @return - * 0 on success, negative on error and positive if no driver - * is found for the device. - */ -int rte_pci_map_device(struct rte_pci_device *dev); - -/** - * Unmap this device - * - * @param dev - * A pointer to a rte_pci_device structure describing the device - * to use - */ -void rte_pci_unmap_device(struct rte_pci_device *dev); - -/** - * @internal - * Map a particular resource from a file. - * - * @param requested_addr - * The starting address for the new mapping range. - * @param fd - * The file descriptor. - * @param offset - * The offset for the mapping range. - * @param size - * The size for the mapping range. - * @param additional_flags - * The additional flags for the mapping range. - * @return - * - On success, the function returns a pointer to the mapped area. - * - On error, the value MAP_FAILED is returned. - */ -void *pci_map_resource(void *requested_addr, int fd, off_t offset, - size_t size, int additional_flags); - -/** - * @internal - * Unmap a particular resource. - * - * @param requested_addr - * The address for the unmapping range. - * @param size - * The size for the unmapping range. - */ -void pci_unmap_resource(void *requested_addr, size_t size); - -/** - * Probe the single PCI device. - * - * Scan the content of the PCI bus, and find the pci device specified by pci - * address, then call the probe() function for registered driver that has a - * matching entry in its id_table for discovered device. - * - * @param addr - * The PCI Bus-Device-Function address to probe. - * @return - * - 0 on success. - * - Negative on error. - */ -int rte_pci_probe_one(const struct rte_pci_addr *addr); - -/** - * Close the single PCI device. - * - * Scan the content of the PCI bus, and find the pci device specified by pci - * address, then call the remove() function for registered driver that has a - * matching entry in its id_table for discovered device. - * - * @param addr - * The PCI Bus-Device-Function address to close. - * @return - * - 0 on success. - * - Negative on error. - */ -int rte_pci_detach(const struct rte_pci_addr *addr); - -/** - * Dump the content of the PCI bus. - * - * @param f - * A pointer to a file for output - */ -void rte_pci_dump(FILE *f); - -/** - * Register a PCI driver. - * - * @param driver - * A pointer to a rte_pci_driver structure describing the driver - * to be registered. - */ -void rte_pci_register(struct rte_pci_driver *driver); - -/** Helper for PCI device registration from driver (eth, crypto) instance */ -#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ -RTE_INIT(pciinitfn_ ##nm); \ -static void pciinitfn_ ##nm(void) \ -{\ - (pci_drv).driver.name = RTE_STR(nm);\ - rte_pci_register(&pci_drv); \ -} \ -RTE_PMD_EXPORT_NAME(nm, __COUNTER__) - -/** - * Unregister a PCI driver. - * - * @param driver - * A pointer to a rte_pci_driver structure describing the driver - * to be unregistered. - */ -void rte_pci_unregister(struct rte_pci_driver *driver); - -/** - * Read PCI config space. - * - * @param device - * A pointer to a rte_pci_device structure describing the device - * to use - * @param buf - * A data buffer where the bytes should be read into - * @param len - * The length of the data buffer. - * @param offset - * The offset into PCI config space - */ -int rte_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset); - -/** - * Write PCI config space. - * - * @param device - * A pointer to a rte_pci_device structure describing the device - * to use - * @param buf - * A data buffer containing the bytes should be written - * @param len - * The length of the data buffer. - * @param offset - * The offset into PCI config space - */ -int rte_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset); - -/** - * A structure used to access io resources for a pci device. - * rte_pci_ioport is arch, os, driver specific, and should not be used outside - * of pci ioport api. - */ -struct rte_pci_ioport { - struct rte_pci_device *dev; - uint64_t base; - uint64_t len; /* only filled for memory mapped ports */ -}; - -/** - * Initialize a rte_pci_ioport object for a pci device io resource. - * - * This object is then used to gain access to those io resources (see below). - * - * @param dev - * A pointer to a rte_pci_device structure describing the device - * to use. - * @param bar - * Index of the io pci resource we want to access. - * @param p - * The rte_pci_ioport object to be initialized. - * @return - * 0 on success, negative on error. - */ -int rte_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); - -/** - * Release any resources used in a rte_pci_ioport object. - * - * @param p - * The rte_pci_ioport object to be uninitialized. - * @return - * 0 on success, negative on error. - */ -int rte_pci_ioport_unmap(struct rte_pci_ioport *p); - -/** - * Read from a io pci resource. - * - * @param p - * The rte_pci_ioport object from which we want to read. - * @param data - * A data buffer where the bytes should be read into - * @param len - * The length of the data buffer. - * @param offset - * The offset into the pci io resource. - */ -void rte_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); - -/** - * Write to a io pci resource. - * - * @param p - * The rte_pci_ioport object to which we want to write. - * @param data - * A data buffer where the bytes should be read into - * @param len - * The length of the data buffer. - * @param offset - * The offset into the pci io resource. - */ -void rte_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_PCI_H_ */ diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h index 7c6f7383..92724406 100644 --- a/lib/librte_eal/common/include/rte_service.h +++ b/lib/librte_eal/common/include/rte_service.h @@ -61,9 +61,6 @@ extern "C" { #include <rte_lcore.h> -/* forward declaration only. Definition in rte_service_private.h */ -struct rte_service_spec; - #define RTE_SERVICE_NAME_MAX 32 /* Capabilities of a service. @@ -89,40 +86,32 @@ struct rte_service_spec; */ uint32_t rte_service_get_count(void); - /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Return the specification of a service by integer id. + * Return the id of a service by name. * - * This function provides the specification of a service. This can be used by - * the application to understand what the service represents. The service - * must not be modified by the application directly, only passed to the various - * rte_service_* functions. - * - * @param id The integer id of the service to retrieve - * @retval non-zero A valid pointer to the service_spec - * @retval NULL Invalid *id* provided. - */ -struct rte_service_spec *rte_service_get_by_id(uint32_t id); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice + * This function provides the id of the service using the service name as + * lookup key. The service id is to be passed to other functions in the + * rte_service_* API. * - * Return the specification of a service by name. - * - * This function provides the specification of a service using the service name - * as lookup key. This can be used by the application to understand what the - * service represents. The service must not be modified by the application - * directly, only passed to the various rte_service_* functions. + * Example usage: + * @code + * uint32_t service_id; + * int32_t ret = rte_service_get_by_name("service_X", &service_id); + * if (ret) { + * // handle error + * } + * @endcode * * @param name The name of the service to retrieve - * @retval non-zero A valid pointer to the service_spec - * @retval NULL Invalid *name* provided. + * @param[out] service_id A pointer to a uint32_t, to be filled in with the id. + * @retval 0 Success. The service id is provided in *service_id*. + * @retval -EINVAL Null *service_id* pointer provided + * @retval -ENODEV No such service registered */ -struct rte_service_spec *rte_service_get_by_name(const char *name); +int32_t rte_service_get_by_name(const char *name, uint32_t *service_id); /** * @warning @@ -133,7 +122,7 @@ struct rte_service_spec *rte_service_get_by_name(const char *name); * @return A pointer to the name of the service. The returned pointer remains * in ownership of the service, and the application must not free it. */ -const char *rte_service_get_name(const struct rte_service_spec *service); +const char *rte_service_get_name(uint32_t id); /** * @warning @@ -146,17 +135,16 @@ const char *rte_service_get_name(const struct rte_service_spec *service); * @retval 1 Capability supported by this service instance * @retval 0 Capability not supported by this service instance */ -int32_t rte_service_probe_capability(const struct rte_service_spec *service, - uint32_t capability); +int32_t rte_service_probe_capability(uint32_t id, uint32_t capability); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Enable a core to run a service. + * Map or unmap a lcore to a service. * - * Each core can be added or removed from running specific services. This - * functions adds *lcore* to the set of cores that will run *service*. + * Each core can be added or removed from running a specific service. This + * function enables or disables *lcore* to run *service_id*. * * If multiple cores are enabled on a service, an atomic is used to ensure that * only one cores runs the service at a time. The exception to this is when @@ -164,82 +152,120 @@ int32_t rte_service_probe_capability(const struct rte_service_spec *service, * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set, * the service function can be run on multiple threads at the same time. * - * @retval 0 lcore added successfully + * @param service_id the service to apply the lcore to + * @param lcore The lcore that will be mapped to service + * @param enable Zero to unmap or disable the core, non-zero to enable + * + * @retval 0 lcore map updated successfully * @retval -EINVAL An invalid service or lcore was provided. */ -int32_t rte_service_enable_on_lcore(struct rte_service_spec *service, - uint32_t lcore); +int32_t rte_service_map_lcore_set(uint32_t service_id, uint32_t lcore, + uint32_t enable); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Disable a core to run a service. + * Retrieve the mapping of an lcore to a service. * - * Each core can be added or removed from running specific services. This - * functions removes *lcore* to the set of cores that will run *service*. + * @param service_id the service to apply the lcore to + * @param lcore The lcore that will be mapped to service * - * @retval 0 Lcore removed successfully + * @retval 1 lcore is mapped to service + * @retval 0 lcore is not mapped to service * @retval -EINVAL An invalid service or lcore was provided. */ -int32_t rte_service_disable_on_lcore(struct rte_service_spec *service, - uint32_t lcore); +int32_t rte_service_map_lcore_get(uint32_t service_id, uint32_t lcore); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Return if an lcore is enabled for the service. + * Set the runstate of the service. * - * This function allows the application to query if *lcore* is currently set to - * run *service*. + * Each service is either running or stopped. Setting a non-zero runstate + * enables the service to run, while setting runstate zero disables it. * - * @retval 1 Lcore enabled on this lcore - * @retval 0 Lcore disabled on this lcore - * @retval -EINVAL An invalid service or lcore was provided. + * @param id The id of the service + * @param runstate The run state to apply to the service + * + * @retval 0 The service was successfully started + * @retval -EINVAL Invalid service id */ -int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service, - uint32_t lcore); - +int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Enable *service* to run. - * - * This function switches on a service during runtime. - * @retval 0 The service was successfully started + * Get the runstate for the service with *id*. See *rte_service_runstate_set* + * for details of runstates. A service can call this function to ensure that + * the application has indicated that it will receive CPU cycles. Either a + * service-core is mapped (default case), or the application has explicitly + * disabled the check that a service-cores is mapped to the service and takes + * responsibility to run the service manually using the available function + * *rte_service_run_iter_on_app_lcore* to do so. + * + * @retval 1 Service is running + * @retval 0 Service is stopped + * @retval -EINVAL Invalid service id */ -int32_t rte_service_start(struct rte_service_spec *service); +int32_t rte_service_runstate_get(uint32_t id); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Disable *service*. + * Enable or disable the check for a service-core being mapped to the service. + * An application can disable the check when takes the responsibility to run a + * service itself using *rte_service_run_iter_on_app_lcore*. + * + * @param id The id of the service to set the check on + * @param enable When zero, the check is disabled. Non-zero enables the check. * - * Switch off a service, so it is not run until it is *rte_service_start* is - * called on it. - * @retval 0 Service successfully switched off + * @retval 0 Success + * @retval -EINVAL Invalid service ID */ -int32_t rte_service_stop(struct rte_service_spec *service); +int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enable); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Returns if *service* is currently running. - * - * This function returns true if the service has been started using - * *rte_service_start*, AND a service core is mapped to the service. This - * function can be used to ensure that the service will be run. - * - * @retval 1 Service is currently running, and has a service lcore mapped - * @retval 0 Service is currently stopped, or no service lcore is mapped - * @retval -EINVAL Invalid service pointer provided + * This function runs a service callback from a non-service lcore. + * + * This function is designed to enable gradual porting to service cores, and + * to enable unit tests to verify a service behaves as expected. + * + * When called, this function ensures that the service identified by *id* is + * safe to run on this lcore. Multi-thread safe services are invoked even if + * other cores are simultaneously running them as they are multi-thread safe. + * + * Multi-thread unsafe services are handled depending on the variable + * *serialize_multithread_unsafe*: + * - When set, the function will check if a service is already being invoked + * on another lcore, refusing to run it and returning -EBUSY. + * - When zero, the application takes responsibility to ensure that the service + * indicated by *id* is not going to be invoked by another lcore. This setting + * avoids atomic operations, so is likely to be more performant. + * + * @param id The ID of the service to run + * @param serialize_multithread_unsafe This parameter indicates to the service + * cores library if it is required to use atomics to serialize access + * to mult-thread unsafe services. As there is an overhead in using + * atomics, applications can choose to enable or disable this feature + * + * Note that any thread calling this function MUST be a DPDK EAL thread, as + * the *rte_lcore_id* function is used to access internal data structures. + * + * @retval 0 Service was run on the calling thread successfully + * @retval -EBUSY Another lcore is executing the service, and it is not a + * multi-thread safe service, so the service was not run on this lcore + * @retval -ENOEXEC Service is not in a run-able state + * @retval -EINVAL Invalid service id */ -int32_t rte_service_is_running(const struct rte_service_spec *service); +int32_t rte_service_run_iter_on_app_lcore(uint32_t id, + uint32_t serialize_multithread_unsafe); /** * @warning @@ -341,13 +367,12 @@ int32_t rte_service_lcore_reset_all(void); * Enable or disable statistics collection for *service*. * * This function enables per core, per-service cycle count collection. - * @param service The service to enable statistics gathering on. + * @param id The service to enable statistics gathering on. * @param enable Zero to disable statistics, non-zero to enable. * @retval 0 Success * @retval -EINVAL Invalid service pointer passed */ -int32_t rte_service_set_stats_enable(struct rte_service_spec *service, - int32_t enable); +int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable); /** * @warning @@ -374,10 +399,26 @@ int32_t rte_service_lcore_list(uint32_t array[], uint32_t n); * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Dumps any information available about the service. If service is NULL, - * dumps info for all services. + * Get the numer of services running on the supplied lcore. + * + * @param lcore Id of the service core. + * @retval >=0 Number of services registered to this core. + * @retval -EINVAL Invalid lcore provided + * @retval -ENOTSUP The provided lcore is not a service core. + */ +int32_t rte_service_lcore_count_services(uint32_t lcore); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Dumps any information available about the service. When id is UINT32_MAX, + * this function dumps info for all services. + * + * @retval 0 Statistics have been successfully dumped + * @retval -EINVAL Invalid service id provided */ -int32_t rte_service_dump(FILE *f, struct rte_service_spec *service); +int32_t rte_service_dump(FILE *f, uint32_t id); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h index 7a946a1e..ac965cb4 100644 --- a/lib/librte_eal/common/include/rte_service_component.h +++ b/lib/librte_eal/common/include/rte_service_component.h @@ -85,21 +85,30 @@ struct rte_service_spec { * * For example the eventdev SW PMD requires CPU cycles to perform its * scheduling. This can be achieved by registering it as a service, and the - * application can then assign CPU resources to it using - * *rte_service_set_coremask*. + * application can then assign CPU resources to that service. + * + * Note that when a service component registers itself, it is not permitted to + * add or remove service-core threads, or modify lcore-to-service mappings. The + * only API that may be called by the service-component is + * *rte_service_component_runstate_set*, which indicates that the service + * component is ready to be executed. * * @param spec The specification of the service to register + * @param[out] service_id A pointer to a uint32_t, which will be filled in + * during registration of the service. It is set to the integers + * service number given to the service. This parameter may be NULL. * @retval 0 Successfully registered the service. * -EINVAL Attempted to register an invalid service (eg, no callback * set) */ -int32_t rte_service_register(const struct rte_service_spec *spec); +int32_t rte_service_component_register(const struct rte_service_spec *spec, + uint32_t *service_id); /** * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Unregister a service. + * Unregister a service component. * * The service being removed must be stopped before calling this function. * @@ -107,7 +116,7 @@ int32_t rte_service_register(const struct rte_service_spec *spec); * @retval -EBUSY The service is currently running, stop the service before * calling unregister. No action has been taken. */ -int32_t rte_service_unregister(struct rte_service_spec *service); +int32_t rte_service_component_unregister(uint32_t id); /** * @warning @@ -131,6 +140,23 @@ int32_t rte_service_start_with_defaults(void); * @warning * @b EXPERIMENTAL: this API may change without prior notice * + * Set the backend runstate of a component. + * + * This function allows services to be registered at startup, but not yet + * enabled to run by default. When the service has been configured (via the + * usual method; eg rte_eventdev_configure, the service can mark itself as + * ready to run. The differentiation between backend runstate and + * service_runstate is that the backend runstate is set by the service + * component while the service runstate is reserved for application usage. + * + * @retval 0 Success + */ +int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * * Initialize the service library. * * In order to use the service library, it must be initialized. EAL initializes diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h deleted file mode 100644 index 29f5a523..00000000 --- a/lib/librte_eal/common/include/rte_vdev.h +++ /dev/null @@ -1,131 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2016 RehiveTech. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of RehiveTech nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RTE_VDEV_H -#define RTE_VDEV_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/queue.h> -#include <rte_dev.h> -#include <rte_devargs.h> - -struct rte_vdev_device { - TAILQ_ENTRY(rte_vdev_device) next; /**< Next attached vdev */ - struct rte_device device; /**< Inherit core device */ -}; - -/** - * @internal - * Helper macro for drivers that need to convert to struct rte_vdev_device. - */ -#define RTE_DEV_TO_VDEV(ptr) \ - container_of(ptr, struct rte_vdev_device, device) - -static inline const char * -rte_vdev_device_name(const struct rte_vdev_device *dev) -{ - if (dev && dev->device.name) - return dev->device.name; - return NULL; -} - -static inline const char * -rte_vdev_device_args(const struct rte_vdev_device *dev) -{ - if (dev && dev->device.devargs) - return dev->device.devargs->args; - return ""; -} - -/** Double linked list of virtual device drivers. */ -TAILQ_HEAD(vdev_driver_list, rte_vdev_driver); - -/** - * Probe function called for each virtual device driver once. - */ -typedef int (rte_vdev_probe_t)(struct rte_vdev_device *dev); - -/** - * Remove function called for each virtual device driver once. - */ -typedef int (rte_vdev_remove_t)(struct rte_vdev_device *dev); - -/** - * A virtual device driver abstraction. - */ -struct rte_vdev_driver { - TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */ - struct rte_driver driver; /**< Inherited general driver. */ - rte_vdev_probe_t *probe; /**< Virtual device probe function. */ - rte_vdev_remove_t *remove; /**< Virtual device remove function. */ -}; - -/** - * Register a virtual device driver. - * - * @param driver - * A pointer to a rte_vdev_driver structure describing the driver - * to be registered. - */ -void rte_vdev_register(struct rte_vdev_driver *driver); - -/** - * Unregister a virtual device driver. - * - * @param driver - * A pointer to a rte_vdev_driver structure describing the driver - * to be unregistered. - */ -void rte_vdev_unregister(struct rte_vdev_driver *driver); - -#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\ -RTE_INIT(vdrvinitfn_ ##vdrv);\ -static const char *vdrvinit_ ## nm ## _alias;\ -static void vdrvinitfn_ ##vdrv(void)\ -{\ - (vdrv).driver.name = RTE_STR(nm);\ - (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\ - rte_vdev_register(&vdrv);\ -} \ -RTE_PMD_EXPORT_NAME(nm, __COUNTER__) - -#define RTE_PMD_REGISTER_ALIAS(nm, alias)\ -static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index a69a7075..d08cf48a 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -61,7 +61,7 @@ extern "C" { /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 8 +#define RTE_VER_MONTH 11 /** * Patch level number i.e. the z in yy.mm.z @@ -71,14 +71,14 @@ extern "C" { /** * Extra string to be appended to version number */ -#define RTE_VER_SUFFIX "" +#define RTE_VER_SUFFIX "-rc" /** * Patch release number * 0-15 = release candidates * 16 = release */ -#define RTE_VER_RELEASE 16 +#define RTE_VER_RELEASE 3 /** * Macro to compute a version number usable for comparisons diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h new file mode 100644 index 00000000..a69c4ff6 --- /dev/null +++ b/lib/librte_eal/common/include/rte_vfio.h @@ -0,0 +1,153 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 6WIND S.A. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VFIO_H_ +#define _RTE_VFIO_H_ + +/* + * determine if VFIO is present on the system + */ +#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO) +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) +#define VFIO_PRESENT +#endif /* kernel version >= 3.6.0 */ +#endif /* RTE_EAL_VFIO */ + +#ifdef VFIO_PRESENT + +#include <linux/vfio.h> + +#define VFIO_DIR "/dev/vfio" +#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" +#define VFIO_GROUP_FMT "/dev/vfio/%u" +#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" +#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) +#define VFIO_GET_REGION_IDX(x) (x >> 40) +#define VFIO_NOIOMMU_MODE \ + "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode" + +/** + * Setup vfio_cfg for the device identified by its address. + * It discovers the configured I/O MMU groups or sets a new one for the device. + * If a new groups is assigned, the DMA mapping is performed. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param sysfs_base + * sysfs path prefix. + * + * @param dev_addr + * device location. + * + * @param vfio_dev_fd + * VFIO fd. + * + * @param device_info + * Device information. + * + * @return + * 0 on success. + * <0 on failure. + * >1 if the device cannot be managed this way. + */ +int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, + int *vfio_dev_fd, struct vfio_device_info *device_info); + +/** + * Release a device mapped to a VFIO-managed I/O MMU group. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param sysfs_base + * sysfs path prefix. + * + * @param dev_addr + * device location. + * + * @param fd + * VFIO fd. + * + * @return + * 0 on success. + * <0 on failure. + */ +int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); + +/** + * Enable a VFIO-related kmod. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param modname + * kernel module name. + * + * @return + * 0 on success. + * <0 on failure. + */ +int rte_vfio_enable(const char *modname); + +/** + * Check whether a VFIO-related kmod is enabled. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @param modname + * kernel module name. + * + * @return + * !0 if true. + * 0 otherwise. + */ +int rte_vfio_is_enabled(const char *modname); + +/** + * Whether VFIO NOIOMMU mode is enabled. + * + * This function is only relevant to linux and will return + * an error on BSD. + * + * @return + * !0 if true. + * 0 otherwise. + */ +int rte_vfio_noiommu_is_enabled(void); + +#endif /* VFIO_PRESENT */ + +#endif /* _RTE_VFIO_H_ */ diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index 15076905..889dffd2 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -275,14 +275,14 @@ malloc_elem_free(struct malloc_elem *elem) return -1; rte_spinlock_lock(&(elem->heap->lock)); - size_t sz = elem->size - sizeof(*elem); + size_t sz = elem->size - sizeof(*elem) - MALLOC_ELEM_TRAILER_LEN; uint8_t *ptr = (uint8_t *)&elem[1]; struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); if (next->state == ELEM_FREE){ /* remove from free list, join to this one */ elem_free_list_remove(next); join_elem(elem, next); - sz += sizeof(*elem); + sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN); } /* check if previous element is free, if so join with it and return, @@ -291,8 +291,8 @@ malloc_elem_free(struct malloc_elem *elem) if (elem->prev != NULL && elem->prev->state == ELEM_FREE) { elem_free_list_remove(elem->prev); join_elem(elem->prev, elem); - sz += sizeof(*elem); - ptr -= sizeof(*elem); + sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN); + ptr -= (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN); elem = elem->prev; } malloc_elem_free_list_insert(elem); diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h index f04b2d1e..ce39129d 100644 --- a/lib/librte_eal/common/malloc_elem.h +++ b/lib/librte_eal/common/malloc_elem.h @@ -53,13 +53,13 @@ struct malloc_elem { volatile enum elem_state state; uint32_t pad; size_t size; -#ifdef RTE_LIBRTE_MALLOC_DEBUG +#ifdef RTE_MALLOC_DEBUG uint64_t header_cookie; /* Cookie marking start of data */ /* trailer cookie at start + size */ #endif } __rte_cache_aligned; -#ifndef RTE_LIBRTE_MALLOC_DEBUG +#ifndef RTE_MALLOC_DEBUG static const unsigned MALLOC_ELEM_TRAILER_LEN = 0; /* dummy function - just check if pointer is non-null */ diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index 5c0627bf..fe2278bc 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -246,15 +246,22 @@ rte_malloc_set_limit(__rte_unused const char *type, } /* - * Return the physical address of a virtual address obtained through rte_malloc + * Return the IO address of a virtual address obtained through rte_malloc */ -phys_addr_t -rte_malloc_virt2phy(const void *addr) +rte_iova_t +rte_malloc_virt2iova(const void *addr) { + rte_iova_t iova; const struct malloc_elem *elem = malloc_elem_from_data(addr); if (elem == NULL) - return RTE_BAD_PHYS_ADDR; - if (elem->ms->phys_addr == RTE_BAD_PHYS_ADDR) - return RTE_BAD_PHYS_ADDR; - return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr); + return RTE_BAD_IOVA; + if (elem->ms->iova == RTE_BAD_IOVA) + return RTE_BAD_IOVA; + + if (rte_eal_iova_mode() == RTE_IOVA_VA) + iova = (uintptr_t)addr; + else + iova = elem->ms->iova + + RTE_PTR_DIFF(addr, elem->ms->addr); + return iova; } diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c index 7efb76dc..09b758c9 100644 --- a/lib/librte_eal/common/rte_service.c +++ b/lib/librte_eal/common/rte_service.c @@ -54,6 +54,7 @@ #define SERVICE_F_REGISTERED (1 << 0) #define SERVICE_F_STATS_ENABLED (1 << 1) +#define SERVICE_F_START_CHECK (1 << 2) /* runstates for services and lcores, denoting if they are active or not */ #define RUNSTATE_STOPPED 0 @@ -71,11 +72,12 @@ struct rte_service_spec_impl { rte_atomic32_t execute_lock; /* API set/get-able variables */ - int32_t runstate; + int8_t app_runstate; + int8_t comp_runstate; uint8_t internal_flags; /* per service statistics */ - uint32_t num_mapped_cores; + rte_atomic32_t num_mapped_cores; uint64_t calls; uint64_t cycles_spent; } __rte_cache_aligned; @@ -144,6 +146,13 @@ service_valid(uint32_t id) return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED); } +/* validate ID and retrieve service pointer, or return error value */ +#define SERVICE_VALID_GET_OR_ERR_RET(id, service, retval) do { \ + if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id)) \ + return retval; \ + service = &rte_services[id]; \ +} while (0) + /* returns 1 if statistics should be colleced for service * Returns 0 if statistics should not be collected for service */ @@ -156,21 +165,31 @@ service_stats_enabled(struct rte_service_spec_impl *impl) static inline int service_mt_safe(struct rte_service_spec_impl *s) { - return s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE; + return !!(s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE); } -int32_t rte_service_set_stats_enable(struct rte_service_spec *service, - int32_t enabled) +int32_t rte_service_set_stats_enable(uint32_t id, int32_t enabled) { - struct rte_service_spec_impl *impl = - (struct rte_service_spec_impl *)service; - if (!impl) - return -EINVAL; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, 0); if (enabled) - impl->internal_flags |= SERVICE_F_STATS_ENABLED; + s->internal_flags |= SERVICE_F_STATS_ENABLED; else - impl->internal_flags &= ~(SERVICE_F_STATS_ENABLED); + s->internal_flags &= ~(SERVICE_F_STATS_ENABLED); + + return 0; +} + +int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled) +{ + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, 0); + + if (enabled) + s->internal_flags |= SERVICE_F_START_CHECK; + else + s->internal_flags &= ~(SERVICE_F_START_CHECK); return 0; } @@ -181,58 +200,42 @@ rte_service_get_count(void) return rte_service_count; } -struct rte_service_spec * -rte_service_get_by_id(uint32_t id) +int32_t rte_service_get_by_name(const char *name, uint32_t *service_id) { - struct rte_service_spec *service = NULL; - if (id < rte_service_count) - service = (struct rte_service_spec *)&rte_services[id]; - - return service; -} + if (!service_id) + return -EINVAL; -struct rte_service_spec *rte_service_get_by_name(const char *name) -{ - struct rte_service_spec *service = NULL; int i; for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { if (service_valid(i) && strcmp(name, rte_services[i].spec.name) == 0) { - service = (struct rte_service_spec *)&rte_services[i]; - break; + *service_id = i; + return 0; } } - return service; + return -ENODEV; } const char * -rte_service_get_name(const struct rte_service_spec *service) +rte_service_get_name(uint32_t id) { - return service->name; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, 0); + return s->spec.name; } int32_t -rte_service_probe_capability(const struct rte_service_spec *service, - uint32_t capability) +rte_service_probe_capability(uint32_t id, uint32_t capability) { - return service->capabilities & capability; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + return !!(s->spec.capabilities & capability); } int32_t -rte_service_is_running(const struct rte_service_spec *spec) -{ - const struct rte_service_spec_impl *impl = - (const struct rte_service_spec_impl *)spec; - if (!impl) - return -EINVAL; - - return (impl->runstate == RUNSTATE_RUNNING) && - (impl->num_mapped_cores > 0); -} - -int32_t -rte_service_register(const struct rte_service_spec *spec) +rte_service_component_register(const struct rte_service_spec *spec, + uint32_t *id_ptr) { uint32_t i; int32_t free_slot = -1; @@ -252,68 +255,161 @@ rte_service_register(const struct rte_service_spec *spec) struct rte_service_spec_impl *s = &rte_services[free_slot]; s->spec = *spec; - s->internal_flags |= SERVICE_F_REGISTERED; + s->internal_flags |= SERVICE_F_REGISTERED | SERVICE_F_START_CHECK; rte_smp_wmb(); rte_service_count++; + if (id_ptr) + *id_ptr = free_slot; + return 0; } int32_t -rte_service_unregister(struct rte_service_spec *spec) +rte_service_component_unregister(uint32_t id) { - struct rte_service_spec_impl *s = NULL; - struct rte_service_spec_impl *spec_impl = - (struct rte_service_spec_impl *)spec; - uint32_t i; - uint32_t service_id; - for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { - if (&rte_services[i] == spec_impl) { - s = spec_impl; - service_id = i; - break; - } - } - - if (!s) - return -EINVAL; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); rte_service_count--; rte_smp_wmb(); s->internal_flags &= ~(SERVICE_F_REGISTERED); + /* clear the run-bit in all cores */ for (i = 0; i < RTE_MAX_LCORE; i++) - lcore_states[i].service_mask &= ~(UINT64_C(1) << service_id); + lcore_states[i].service_mask &= ~(UINT64_C(1) << id); - memset(&rte_services[service_id], 0, - sizeof(struct rte_service_spec_impl)); + memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl)); return 0; } int32_t -rte_service_start(struct rte_service_spec *service) +rte_service_component_runstate_set(uint32_t id, uint32_t runstate) { - struct rte_service_spec_impl *s = - (struct rte_service_spec_impl *)service; - s->runstate = RUNSTATE_RUNNING; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + + if (runstate) + s->comp_runstate = RUNSTATE_RUNNING; + else + s->comp_runstate = RUNSTATE_STOPPED; + rte_smp_wmb(); return 0; } int32_t -rte_service_stop(struct rte_service_spec *service) +rte_service_runstate_set(uint32_t id, uint32_t runstate) { - struct rte_service_spec_impl *s = - (struct rte_service_spec_impl *)service; - s->runstate = RUNSTATE_STOPPED; + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + + if (runstate) + s->app_runstate = RUNSTATE_RUNNING; + else + s->app_runstate = RUNSTATE_STOPPED; + rte_smp_wmb(); return 0; } +int32_t +rte_service_runstate_get(uint32_t id) +{ + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + rte_smp_rmb(); + + int check_disabled = !(s->internal_flags & SERVICE_F_START_CHECK); + int lcore_mapped = (rte_atomic32_read(&s->num_mapped_cores) > 0); + + return (s->app_runstate == RUNSTATE_RUNNING) && + (s->comp_runstate == RUNSTATE_RUNNING) && + (check_disabled | lcore_mapped); +} + +static inline void +rte_service_runner_do_callback(struct rte_service_spec_impl *s, + struct core_state *cs, uint32_t service_idx) +{ + void *userdata = s->spec.callback_userdata; + + if (service_stats_enabled(s)) { + uint64_t start = rte_rdtsc(); + s->spec.callback(userdata); + uint64_t end = rte_rdtsc(); + s->cycles_spent += end - start; + cs->calls_per_service[service_idx]++; + s->calls++; + } else + s->spec.callback(userdata); +} + + +static inline int32_t +service_run(uint32_t i, struct core_state *cs, uint64_t service_mask) +{ + if (!service_valid(i)) + return -EINVAL; + struct rte_service_spec_impl *s = &rte_services[i]; + if (s->comp_runstate != RUNSTATE_RUNNING || + s->app_runstate != RUNSTATE_RUNNING || + !(service_mask & (UINT64_C(1) << i))) + return -ENOEXEC; + + /* check do we need cmpset, if MT safe or <= 1 core + * mapped, atomic ops are not required. + */ + const int use_atomics = (service_mt_safe(s) == 0) && + (rte_atomic32_read(&s->num_mapped_cores) > 1); + if (use_atomics) { + if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1)) + return -EBUSY; + + rte_service_runner_do_callback(s, cs, i); + rte_atomic32_clear(&s->execute_lock); + } else + rte_service_runner_do_callback(s, cs, i); + + return 0; +} + +int32_t rte_service_run_iter_on_app_lcore(uint32_t id, + uint32_t serialize_mt_unsafe) +{ + /* run service on calling core, using all-ones as the service mask */ + if (!service_valid(id)) + return -EINVAL; + + struct core_state *cs = &lcore_states[rte_lcore_id()]; + struct rte_service_spec_impl *s = &rte_services[id]; + + /* Atomically add this core to the mapped cores first, then examine if + * we can run the service. This avoids a race condition between + * checking the value, and atomically adding to the mapped count. + */ + if (serialize_mt_unsafe) + rte_atomic32_inc(&s->num_mapped_cores); + + if (service_mt_safe(s) == 0 && + rte_atomic32_read(&s->num_mapped_cores) > 1) { + if (serialize_mt_unsafe) + rte_atomic32_dec(&s->num_mapped_cores); + return -EBUSY; + } + + int ret = service_run(id, cs, UINT64_MAX); + + if (serialize_mt_unsafe) + rte_atomic32_dec(&s->num_mapped_cores); + + return ret; +} + static int32_t rte_service_runner_func(void *arg) { @@ -324,35 +420,10 @@ rte_service_runner_func(void *arg) while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) { const uint64_t service_mask = cs->service_mask; - for (i = 0; i < rte_service_count; i++) { - struct rte_service_spec_impl *s = &rte_services[i]; - if (s->runstate != RUNSTATE_RUNNING || - !(service_mask & (UINT64_C(1) << i))) - continue; - /* check do we need cmpset, if MT safe or <= 1 core - * mapped, atomic ops are not required. - */ - const int need_cmpset = !((service_mt_safe(s) == 0) && - (s->num_mapped_cores > 1)); - uint32_t *lock = (uint32_t *)&s->execute_lock; - - if (need_cmpset || rte_atomic32_cmpset(lock, 0, 1)) { - void *userdata = s->spec.callback_userdata; - - if (service_stats_enabled(s)) { - uint64_t start = rte_rdtsc(); - s->spec.callback(userdata); - uint64_t end = rte_rdtsc(); - s->cycles_spent += end - start; - cs->calls_per_service[i]++; - s->calls++; - } else - s->spec.callback(userdata); - - if (need_cmpset) - rte_atomic32_clear(&s->execute_lock); - } + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + /* return value ignored as no change to code flow */ + service_run(i, cs, service_mask); } rte_smp_rmb(); @@ -397,6 +468,19 @@ rte_service_lcore_list(uint32_t array[], uint32_t n) } int32_t +rte_service_lcore_count_services(uint32_t lcore) +{ + if (lcore >= RTE_MAX_LCORE) + return -EINVAL; + + struct core_state *cs = &lcore_states[lcore]; + if (!cs->is_service_core) + return -ENOTSUP; + + return __builtin_popcountll(cs->service_mask); +} + +int32_t rte_service_start_with_defaults(void) { /* create a default mapping from cores to services, then start the @@ -407,7 +491,7 @@ rte_service_start_with_defaults(void) uint32_t count = rte_service_get_count(); int32_t lcore_iter = 0; - uint32_t ids[RTE_MAX_LCORE]; + uint32_t ids[RTE_MAX_LCORE] = {0}; int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE); if (lcore_count == 0) @@ -417,16 +501,12 @@ rte_service_start_with_defaults(void) rte_service_lcore_start(ids[i]); for (i = 0; i < count; i++) { - struct rte_service_spec *s = rte_service_get_by_id(i); - if (!s) - return -EINVAL; - /* do 1:1 core mapping here, with each service getting * assigned a single core by default. Adding multiple services * should multiplex to a single core, or 1:1 if there are the * same amount of services as service-cores */ - ret = rte_service_enable_on_lcore(s, ids[lcore_iter]); + ret = rte_service_map_lcore_set(i, ids[lcore_iter], 1); if (ret) return -ENODEV; @@ -434,7 +514,7 @@ rte_service_start_with_defaults(void) if (lcore_iter >= lcore_count) lcore_iter = 0; - ret = rte_service_start(s); + ret = rte_service_runstate_set(i, 1); if (ret) return -ENOEXEC; } @@ -467,43 +547,40 @@ service_update(struct rte_service_spec *service, uint32_t lcore, if (set) { if (*set) { lcore_states[lcore].service_mask |= sid_mask; - rte_services[sid].num_mapped_cores++; + rte_atomic32_inc(&rte_services[sid].num_mapped_cores); } else { lcore_states[lcore].service_mask &= ~(sid_mask); - rte_services[sid].num_mapped_cores--; + rte_atomic32_dec(&rte_services[sid].num_mapped_cores); } } if (enabled) - *enabled = (lcore_states[lcore].service_mask & (sid_mask)); + *enabled = !!(lcore_states[lcore].service_mask & (sid_mask)); rte_smp_wmb(); return 0; } -int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service, - uint32_t lcore) -{ - uint32_t enabled; - int ret = service_update(service, lcore, 0, &enabled); - if (ret == 0) - return enabled; - return -EINVAL; -} - int32_t -rte_service_enable_on_lcore(struct rte_service_spec *service, uint32_t lcore) +rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled) { - uint32_t on = 1; - return service_update(service, lcore, &on, 0); + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + uint32_t on = enabled > 0; + return service_update(&s->spec, lcore, &on, 0); } int32_t -rte_service_disable_on_lcore(struct rte_service_spec *service, uint32_t lcore) +rte_service_map_lcore_get(uint32_t id, uint32_t lcore) { - uint32_t off = 0; - return service_update(service, lcore, &off, 0); + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + uint32_t enabled; + int ret = service_update(&s->spec, lcore, 0, &enabled); + if (ret == 0) + return enabled; + return ret; } int32_t rte_service_lcore_reset_all(void) @@ -516,7 +593,7 @@ int32_t rte_service_lcore_reset_all(void) lcore_states[i].runstate = RUNSTATE_STOPPED; } for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) - rte_services[i].num_mapped_cores = 0; + rte_atomic32_set(&rte_services[i].num_mapped_cores, 0); rte_smp_wmb(); @@ -552,7 +629,8 @@ rte_service_lcore_add(uint32_t lcore) lcore_states[lcore].runstate = RUNSTATE_STOPPED; rte_smp_wmb(); - return 0; + + return rte_eal_wait_lcore(lcore); } int32_t @@ -607,12 +685,12 @@ rte_service_lcore_stop(uint32_t lcore) return -EALREADY; uint32_t i; + uint64_t service_mask = lcore_states[lcore].service_mask; for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { - int32_t enabled = - lcore_states[i].service_mask & (UINT64_C(1) << i); - int32_t service_running = rte_services[i].runstate != - RUNSTATE_STOPPED; - int32_t only_core = rte_services[i].num_mapped_cores == 1; + int32_t enabled = service_mask & (UINT64_C(1) << i); + int32_t service_running = rte_service_runstate_get(i); + int32_t only_core = (1 == + rte_atomic32_read(&rte_services[i].num_mapped_cores)); /* if the core is mapped, and the service is running, and this * is the only core that is mapped, the service would cease to @@ -667,28 +745,34 @@ service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset) fprintf(f, "\n"); } -int32_t rte_service_dump(FILE *f, struct rte_service_spec *service) +int32_t rte_service_dump(FILE *f, uint32_t id) { uint32_t i; + int print_one = (id != UINT32_MAX); uint64_t total_cycles = 0; - for (i = 0; i < rte_service_count; i++) { + + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { if (!service_valid(i)) continue; total_cycles += rte_services[i].cycles_spent; } - if (service) { - struct rte_service_spec_impl *s = - (struct rte_service_spec_impl *)service; + /* print only the specified service */ + if (print_one) { + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); fprintf(f, "Service %s Summary\n", s->spec.name); uint32_t reset = 0; rte_service_dump_one(f, s, total_cycles, reset); return 0; } + /* print all services, as UINT32_MAX was passed as id */ fprintf(f, "Services Summary\n"); - for (i = 0; i < rte_service_count; i++) { + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + if (!service_valid(i)) + continue; uint32_t reset = 1; rte_service_dump_one(f, &rte_services[i], total_cycles, reset); } @@ -698,7 +782,7 @@ int32_t rte_service_dump(FILE *f, struct rte_service_spec *service) if (lcore_config[i].core_role != ROLE_SERVICE) continue; - uint32_t reset = 0; + uint32_t reset = 1; service_dump_calls_per_lcore(f, i, reset); } diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile index 4794696b..2ebdf313 100644 --- a/lib/librte_eal/linuxapp/Makefile +++ b/lib/librte_eal/linuxapp/Makefile @@ -35,7 +35,5 @@ DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio DIRS-$(CONFIG_RTE_KNI_KMOD) += kni DEPDIRS-kni := eal -DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0 -DEPDIRS-xen_dom0 := eal include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 90bca4d6..5a7b8b2a 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -34,10 +34,10 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_eal.a ARCH_DIR ?= $(RTE_ARCH) -EXPORT_MAP := rte_eal_version.map +EXPORT_MAP := ../../rte_eal_version.map VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) -LIBABIVER := 5 +LIBABIVER := 6 VPATH += $(RTE_SDK)/lib/librte_eal/common @@ -58,16 +58,10 @@ endif SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c -ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y) -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c -endif SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c @@ -80,9 +74,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_vdev.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c @@ -104,6 +95,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c +SRCS-y += rte_cycles.c CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST) @@ -116,13 +108,11 @@ CFLAGS_eal_thread.o := -D_GNU_SOURCE CFLAGS_eal_log.o := -D_GNU_SOURCE CFLAGS_eal_common_log.o := -D_GNU_SOURCE CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE -CFLAGS_eal_pci.o := -D_GNU_SOURCE -CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE -CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE CFLAGS_eal_common_options.o := -D_GNU_SOURCE CFLAGS_eal_common_thread.o := -D_GNU_SOURCE CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE +CFLAGS_rte_cycles.o := -D_GNU_SOURCE # workaround for a gcc bug with noreturn attribute # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 @@ -130,7 +120,7 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) CFLAGS_eal_thread.o += -Wno-return-type endif -INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h +INC := rte_kni_common.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 48f12f44..229eec9f 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -56,7 +56,6 @@ #include <rte_common.h> #include <rte_debug.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> @@ -71,12 +70,12 @@ #include <rte_cpuflags.h> #include <rte_interrupts.h> #include <rte_bus.h> -#include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> #include <rte_version.h> #include <rte_atomic.h> #include <malloc_heap.h> +#include <rte_vfio.h> #include "eal_private.h" #include "eal_thread.h" @@ -121,6 +120,13 @@ struct internal_config internal_config; /* used by rte_rdtsc() */ int rte_cycles_vmware_tsc_map; +/* Return mbuf pool ops name */ +const char * +rte_eal_mbuf_default_mempool_ops(void) +{ + return internal_config.mbuf_pool_ops_name; +} + /* Return a pointer to the configuration structure */ struct rte_config * rte_eal_get_configuration(void) @@ -128,6 +134,12 @@ rte_eal_get_configuration(void) return &rte_config; } +enum rte_iova_mode +rte_eal_iova_mode(void) +{ + return rte_eal_get_configuration()->iova_mode; +} + /* parse a sysfs (or other) file containing one integer value */ int eal_parse_sysfs_value(const char *filename, unsigned long *val) @@ -354,7 +366,6 @@ eal_usage(const char *prgname) " --"OPT_BASE_VIRTADDR" Base virtual address\n" " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" - " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n" "\n"); /* Allow the application to print its usage message too if hook is set */ if ( rte_application_usage_hook ) { @@ -555,25 +566,12 @@ eal_parse_args(int argc, char **argv) eal_usage(prgname); exit(EXIT_SUCCESS); - /* long options */ - case OPT_XEN_DOM0_NUM: -#ifdef RTE_LIBRTE_XEN_DOM0 - internal_config.xen_dom0_support = 1; -#else - RTE_LOG(ERR, EAL, "Can't support DPDK app " - "running on Dom0, please configure" - " RTE_LIBRTE_XEN_DOM0=y\n"); - ret = -1; - goto out; -#endif - break; - case OPT_HUGE_DIR_NUM: - internal_config.hugepage_dir = optarg; + internal_config.hugepage_dir = strdup(optarg); break; case OPT_FILE_PREFIX_NUM: - internal_config.hugefile_prefix = optarg; + internal_config.hugefile_prefix = strdup(optarg); break; case OPT_SOCKET_MEM_NUM: @@ -610,6 +608,10 @@ eal_parse_args(int argc, char **argv) internal_config.create_uio_dev = 1; break; + case OPT_MBUF_POOL_OPS_NAME_NUM: + internal_config.mbuf_pool_ops_name = optarg; + break; + default: if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { RTE_LOG(ERR, EAL, "Option %c is not supported " @@ -641,15 +643,6 @@ eal_parse_args(int argc, char **argv) goto out; } - /* --xen-dom0 doesn't make sense with --socket-mem */ - if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) { - RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified " - "together with --"OPT_XEN_DOM0"\n"); - eal_usage(prgname); - ret = -1; - goto out; - } - if (optind >= 0) argv[optind-1] = prgname; ret = optind-1; @@ -716,10 +709,9 @@ static int rte_eal_vfio_setup(void) { int vfio_enabled = 0; - if (!internal_config.no_pci) { - pci_vfio_enable(); - vfio_enabled |= pci_vfio_is_enabled(); - } + if (rte_vfio_enable("vfio")) + return -1; + vfio_enabled = rte_vfio_is_enabled("vfio"); if (vfio_enabled) { @@ -792,9 +784,40 @@ rte_eal_init(int argc, char **argv) return -1; } + if (eal_plugins_init() < 0) { + rte_eal_init_alert("Cannot init plugins\n"); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } + + if (eal_option_device_parse()) { + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + /* autodetect the iova mapping mode (default is iova_pa) */ + rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class(); + + /* Workaround for KNI which requires physical address to work */ + if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA && + rte_eal_check_module("rte_kni") == 1) { + rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA; + RTE_LOG(WARNING, EAL, + "Some devices want IOVA as VA but PA will be used because.. " + "KNI module inserted\n"); + } + if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && - internal_config.xen_dom0_support == 0 && eal_hugepage_info_init() < 0) { rte_eal_init_alert("Cannot get hugepage information."); rte_errno = EACCES; @@ -873,9 +896,6 @@ rte_eal_init(int argc, char **argv) eal_check_mem_on_local_socket(); - if (eal_plugins_init() < 0) - rte_eal_init_alert("Cannot init plugins\n"); - eal_thread_init_master(rte_config.master_lcore); ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); @@ -889,17 +909,6 @@ rte_eal_init(int argc, char **argv) return -1; } - if (eal_option_device_parse()) { - rte_errno = ENODEV; - return -1; - } - - if (rte_bus_scan()) { - rte_eal_init_alert("Cannot scan the buses for devices\n"); - rte_errno = ENODEV; - return -1; - } - RTE_LCORE_FOREACH_SLAVE(i) { /* @@ -983,6 +992,22 @@ int rte_eal_has_hugepages(void) return ! internal_config.no_hugetlbfs; } +int rte_eal_has_pci(void) +{ + return !internal_config.no_pci; +} + +int rte_eal_create_uio_dev(void) +{ + return internal_config.create_uio_dev; +} + +enum rte_intr_mode +rte_eal_vfio_intr_mode(void) +{ + return internal_config.vfio_intr_mode; +} + int rte_eal_check_module(const char *module_name) { diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c index fbae4613..8e4a775b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -40,7 +40,6 @@ #include <sys/timerfd.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_interrupts.h> #include <rte_alarm.h> #include <rte_common.h> diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 7a21e8f6..86e174fc 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -46,7 +46,6 @@ #include <sys/queue.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_launch.h> #include <rte_per_lcore.h> diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 3e9ac41e..1c20693d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -51,7 +51,6 @@ #include <rte_common.h> #include <rte_interrupts.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_per_lcore.h> @@ -60,7 +59,6 @@ #include <rte_branch_prediction.h> #include <rte_debug.h> #include <rte_log.h> -#include <rte_pci.h> #include <rte_malloc.h> #include <rte_errno.h> #include <rte_spinlock.h> @@ -914,7 +912,7 @@ static void eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) { union rte_intr_read_buffer buf; - int bytes_read = 1; + int bytes_read = 0; int nbytes; switch (intr_handle->type) { @@ -930,11 +928,9 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) break; #endif case RTE_INTR_HANDLE_VDEV: - /* for vdev, fd points to: - * a. eventfd which does not need to read out; - * b. datapath fd which needs PMD to read out. - */ - return; + bytes_read = intr_handle->efd_counter_size; + /* For vdev, number of bytes to read is set by driver */ + break; case RTE_INTR_HANDLE_EXT: return; default: @@ -947,6 +943,8 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) * read out to clear the ready-to-be-read flag * for epoll_wait. */ + if (bytes_read == 0) + return; do { nbytes = read(fd, &buf, bytes_read); if (nbytes < 0) { @@ -1206,7 +1204,12 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) intr_handle->nb_efd = n; intr_handle->max_intr = NB_OTHER_INTR + n; } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) { - /* do nothing, and let vdev driver to initialize this struct */ + /* only check, initialization would be done in vdev driver.*/ + if (intr_handle->efd_counter_size > + sizeof(union rte_intr_read_buffer)) { + RTE_LOG(ERR, EAL, "the efd_counter_size is oversized"); + return -EINVAL; + } } else { intr_handle->efds[0] = intr_handle->fd; intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c index e3a50aa3..c088bd9b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_log.c +++ b/lib/librte_eal/linuxapp/eal/eal_log.c @@ -39,7 +39,6 @@ #include <sys/queue.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_launch.h> #include <rte_per_lcore.h> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 52791282..a54b822a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -59,7 +59,6 @@ #include <rte_log.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> @@ -75,13 +74,6 @@ #define PFN_MASK_SIZE 8 -#ifdef RTE_LIBRTE_XEN_DOM0 -int rte_xen_dom0_supported(void) -{ - return internal_config.xen_dom0_support; -} -#endif - /** * @file * Huge page mapping under linux @@ -106,10 +98,6 @@ test_phys_addrs_available(void) uint64_t tmp; phys_addr_t physaddr; - /* For dom0, phys addresses can always be available */ - if (rte_xen_dom0_supported()) - return; - if (!rte_eal_has_hugepages()) { RTE_LOG(ERR, EAL, "Started without hugepages support, physical addresses not available\n"); @@ -119,10 +107,11 @@ test_phys_addrs_available(void) physaddr = rte_mem_virt2phy(&tmp); if (physaddr == RTE_BAD_PHYS_ADDR) { - RTE_LOG(ERR, EAL, - "Cannot obtain physical addresses: %s. " - "Only vfio will function.\n", - strerror(errno)); + if (rte_eal_iova_mode() == RTE_IOVA_PA) + RTE_LOG(ERR, EAL, + "Cannot obtain physical addresses: %s. " + "Only vfio will function.\n", + strerror(errno)); phys_addrs_available = false; } } @@ -139,32 +128,9 @@ rte_mem_virt2phy(const void *virtaddr) int page_size; off_t offset; - /* when using dom0, /proc/self/pagemap always returns 0, check in - * dpdk memory by browsing the memsegs */ - if (rte_xen_dom0_supported()) { - struct rte_mem_config *mcfg; - struct rte_memseg *memseg; - unsigned i; - - mcfg = rte_eal_get_configuration()->mem_config; - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - memseg = &mcfg->memseg[i]; - if (memseg->addr == NULL) - break; - if (virtaddr > memseg->addr && - virtaddr < RTE_PTR_ADD(memseg->addr, - memseg->len)) { - return memseg->phys_addr + - RTE_PTR_DIFF(virtaddr, memseg->addr); - } - } - - return RTE_BAD_PHYS_ADDR; - } - /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ if (!phys_addrs_available) - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; /* standard page size */ page_size = getpagesize(); @@ -173,7 +139,7 @@ rte_mem_virt2phy(const void *virtaddr) if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", __func__, strerror(errno)); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } virt_pfn = (unsigned long)virtaddr / page_size; @@ -182,7 +148,7 @@ rte_mem_virt2phy(const void *virtaddr) RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", __func__, strerror(errno)); close(fd); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } retval = read(fd, &page, PFN_MASK_SIZE); @@ -190,12 +156,12 @@ rte_mem_virt2phy(const void *virtaddr) if (retval < 0) { RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", __func__, strerror(errno)); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } else if (retval != PFN_MASK_SIZE) { RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap " "but expected %d:\n", __func__, retval, PFN_MASK_SIZE); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } /* @@ -203,7 +169,7 @@ rte_mem_virt2phy(const void *virtaddr) * pagemap.txt in linux Documentation) */ if ((page & 0x7fffffffffffffULL) == 0) - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size); @@ -211,6 +177,14 @@ rte_mem_virt2phy(const void *virtaddr) return physaddr; } +rte_iova_t +rte_mem_virt2iova(const void *virtaddr) +{ + if (rte_eal_iova_mode() == RTE_IOVA_VA) + return (uintptr_t)virtaddr; + return rte_mem_virt2phy(virtaddr); +} + /* * For each hugepage in hugepg_tbl, fill the physaddr value. We find * it by browsing the /proc/self/pagemap special file. @@ -716,6 +690,8 @@ create_shared_memory(const char *filename, const size_t mem_size) } retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); + if (retval == MAP_FAILED) + return NULL; return retval; } @@ -1059,7 +1035,10 @@ rte_eal_hugepage_init(void) strerror(errno)); return -1; } - mcfg->memseg[0].phys_addr = RTE_BAD_PHYS_ADDR; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + mcfg->memseg[0].iova = (uintptr_t)addr; + else + mcfg->memseg[0].iova = RTE_BAD_IOVA; mcfg->memseg[0].addr = addr; mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; mcfg->memseg[0].len = internal_config.memory; @@ -1067,17 +1046,6 @@ rte_eal_hugepage_init(void) return 0; } -/* check if app runs on Xen Dom0 */ - if (internal_config.xen_dom0_support) { -#ifdef RTE_LIBRTE_XEN_DOM0 - /* use dom0_mm kernel driver to init memory */ - if (rte_xen_dom0_memory_init() < 0) - return -1; - else - return 0; -#endif - } - /* calculate total number of hugepages available. at this point we haven't * yet started sorting them so they all are on socket 0 */ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { @@ -1319,7 +1287,7 @@ rte_eal_hugepage_init(void) if (j == RTE_MAX_MEMSEG) break; - mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].iova = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; mcfg->memseg[j].len = hugepage[i].size; mcfg->memseg[j].socket_id = hugepage[i].socket_id; @@ -1330,7 +1298,7 @@ rte_eal_hugepage_init(void) #ifdef RTE_ARCH_PPC_64 /* Use the phy and virt address of the last page as segment * address for IBM Power architecture */ - mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].iova = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; #endif mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; @@ -1400,17 +1368,6 @@ rte_eal_hugepage_attach(void) test_phys_addrs_available(); - if (internal_config.xen_dom0_support) { -#ifdef RTE_LIBRTE_XEN_DOM0 - if (rte_xen_dom0_memory_attach() < 0) { - RTE_LOG(ERR, EAL, "Failed to attach memory segments of primary " - "process\n"); - return -1; - } - return 0; -#endif - } - fd_zero = open("/dev/zero", O_RDONLY); if (fd_zero < 0) { RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); @@ -1542,7 +1499,7 @@ error: return -1; } -bool +int rte_eal_using_phys_addrs(void) { return phys_addrs_available; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c deleted file mode 100644 index 8951ce74..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ /dev/null @@ -1,722 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <string.h> -#include <dirent.h> - -#include <rte_log.h> -#include <rte_bus.h> -#include <rte_pci.h> -#include <rte_eal_memconfig.h> -#include <rte_malloc.h> -#include <rte_devargs.h> -#include <rte_memcpy.h> - -#include "eal_filesystem.h" -#include "eal_private.h" -#include "eal_pci_init.h" - -/** - * @file - * PCI probing under linux - * - * This code is used to simulate a PCI probe by parsing information in sysfs. - * When a registered device matches a driver, it is then initialized with - * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). - */ - -extern struct rte_pci_bus rte_pci_bus; - -static int -pci_get_kernel_driver_by_path(const char *filename, char *dri_name) -{ - int count; - char path[PATH_MAX]; - char *name; - - if (!filename || !dri_name) - return -1; - - count = readlink(filename, path, PATH_MAX); - if (count >= PATH_MAX) - return -1; - - /* For device does not have a driver */ - if (count < 0) - return 1; - - path[count] = '\0'; - - name = strrchr(path, '/'); - if (name) { - strncpy(dri_name, name + 1, strlen(name + 1) + 1); - return 0; - } - - return -1; -} - -/* Map pci device */ -int -rte_pci_map_device(struct rte_pci_device *dev) -{ - int ret = -1; - - /* try mapping the NIC resources using VFIO if it exists */ - switch (dev->kdrv) { - case RTE_KDRV_VFIO: -#ifdef VFIO_PRESENT - if (pci_vfio_is_enabled()) - ret = pci_vfio_map_resource(dev); -#endif - break; - case RTE_KDRV_IGB_UIO: - case RTE_KDRV_UIO_GENERIC: - if (rte_eal_using_phys_addrs()) { - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); - } - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - ret = 1; - break; - } - - return ret; -} - -/* Unmap pci device */ -void -rte_pci_unmap_device(struct rte_pci_device *dev) -{ - /* try unmapping the NIC resources using VFIO if it exists */ - switch (dev->kdrv) { - case RTE_KDRV_VFIO: -#ifdef VFIO_PRESENT - if (pci_vfio_is_enabled()) - pci_vfio_unmap_resource(dev); -#endif - break; - case RTE_KDRV_IGB_UIO: - case RTE_KDRV_UIO_GENERIC: - /* unmap resources for devices that use uio */ - pci_uio_unmap_resource(dev); - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - break; - } -} - -void * -pci_find_max_end_va(void) -{ - const struct rte_memseg *seg = rte_eal_get_physmem_layout(); - const struct rte_memseg *last = seg; - unsigned i = 0; - - for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { - if (seg->addr == NULL) - break; - - if (seg->addr > last->addr) - last = seg; - - } - return RTE_PTR_ADD(last->addr, last->len); -} - -/* parse one line of the "resource" sysfs file (note that the 'line' - * string is modified) - */ -int -pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, - uint64_t *end_addr, uint64_t *flags) -{ - union pci_resource_info { - struct { - char *phys_addr; - char *end_addr; - char *flags; - }; - char *ptrs[PCI_RESOURCE_FMT_NVAL]; - } res_info; - - if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); - return -1; - } - errno = 0; - *phys_addr = strtoull(res_info.phys_addr, NULL, 16); - *end_addr = strtoull(res_info.end_addr, NULL, 16); - *flags = strtoull(res_info.flags, NULL, 16); - if (errno != 0) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); - return -1; - } - - return 0; -} - -/* parse the "resource" sysfs file */ -static int -pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) -{ - FILE *f; - char buf[BUFSIZ]; - int i; - uint64_t phys_addr, end_addr, flags; - - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); - return -1; - } - - for (i = 0; i<PCI_MAX_RESOURCE; i++) { - - if (fgets(buf, sizeof(buf), f) == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot read resource\n", __func__); - goto error; - } - if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, - &end_addr, &flags) < 0) - goto error; - - if (flags & IORESOURCE_MEM) { - dev->mem_resource[i].phys_addr = phys_addr; - dev->mem_resource[i].len = end_addr - phys_addr + 1; - /* not mapped for now */ - dev->mem_resource[i].addr = NULL; - } - } - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} - -/* Scan one pci sysfs entry, and fill the devices list from it. */ -static int -pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) -{ - char filename[PATH_MAX]; - unsigned long tmp; - struct rte_pci_device *dev; - char driver[PATH_MAX]; - int ret; - - dev = malloc(sizeof(*dev)); - if (dev == NULL) - return -1; - - memset(dev, 0, sizeof(*dev)); - dev->addr = *addr; - - /* get vendor id */ - snprintf(filename, sizeof(filename), "%s/vendor", dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.vendor_id = (uint16_t)tmp; - - /* get device id */ - snprintf(filename, sizeof(filename), "%s/device", dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.device_id = (uint16_t)tmp; - - /* get subsystem_vendor id */ - snprintf(filename, sizeof(filename), "%s/subsystem_vendor", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.subsystem_vendor_id = (uint16_t)tmp; - - /* get subsystem_device id */ - snprintf(filename, sizeof(filename), "%s/subsystem_device", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.subsystem_device_id = (uint16_t)tmp; - - /* get class_id */ - snprintf(filename, sizeof(filename), "%s/class", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - /* the least 24 bits are valid: class, subclass, program interface */ - dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID; - - /* get max_vfs */ - dev->max_vfs = 0; - snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); - if (!access(filename, F_OK) && - eal_parse_sysfs_value(filename, &tmp) == 0) - dev->max_vfs = (uint16_t)tmp; - else { - /* for non igb_uio driver, need kernel version >= 3.8 */ - snprintf(filename, sizeof(filename), - "%s/sriov_numvfs", dirname); - if (!access(filename, F_OK) && - eal_parse_sysfs_value(filename, &tmp) == 0) - dev->max_vfs = (uint16_t)tmp; - } - - /* get numa node, default to 0 if not present */ - snprintf(filename, sizeof(filename), "%s/numa_node", - dirname); - - if (access(filename, F_OK) != -1) { - if (eal_parse_sysfs_value(filename, &tmp) == 0) - dev->device.numa_node = tmp; - else - dev->device.numa_node = -1; - } else { - dev->device.numa_node = 0; - } - - pci_name_set(dev); - - /* parse resources */ - snprintf(filename, sizeof(filename), "%s/resource", dirname); - if (pci_parse_sysfs_resource(filename, dev) < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); - free(dev); - return -1; - } - - /* parse driver */ - snprintf(filename, sizeof(filename), "%s/driver", dirname); - ret = pci_get_kernel_driver_by_path(filename, driver); - if (ret < 0) { - RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); - free(dev); - return -1; - } - - if (!ret) { - if (!strcmp(driver, "vfio-pci")) - dev->kdrv = RTE_KDRV_VFIO; - else if (!strcmp(driver, "igb_uio")) - dev->kdrv = RTE_KDRV_IGB_UIO; - else if (!strcmp(driver, "uio_pci_generic")) - dev->kdrv = RTE_KDRV_UIO_GENERIC; - else - dev->kdrv = RTE_KDRV_UNKNOWN; - } else - dev->kdrv = RTE_KDRV_NONE; - - /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { - rte_pci_add_device(dev); - } else { - struct rte_pci_device *dev2; - int ret; - - TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { - ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); - if (ret > 0) - continue; - - if (ret < 0) { - rte_pci_insert_device(dev2, dev); - } else { /* already registered */ - dev2->kdrv = dev->kdrv; - dev2->max_vfs = dev->max_vfs; - pci_name_set(dev2); - memmove(dev2->mem_resource, dev->mem_resource, - sizeof(dev->mem_resource)); - free(dev); - } - return 0; - } - - rte_pci_add_device(dev); - } - - return 0; -} - -int -pci_update_device(const struct rte_pci_addr *addr) -{ - char filename[PATH_MAX]; - - snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT, - pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, - addr->function); - - return pci_scan_one(filename, addr); -} - -/* - * split up a pci address into its constituent parts. - */ -static int -parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) -{ - /* first split on ':' */ - union splitaddr { - struct { - char *domain; - char *bus; - char *devid; - char *function; - }; - char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */ - } splitaddr; - - char *buf_copy = strndup(buf, bufsize); - if (buf_copy == NULL) - return -1; - - if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':') - != PCI_FMT_NVAL - 1) - goto error; - /* final split is on '.' between devid and function */ - splitaddr.function = strchr(splitaddr.devid,'.'); - if (splitaddr.function == NULL) - goto error; - *splitaddr.function++ = '\0'; - - /* now convert to int values */ - errno = 0; - addr->domain = strtoul(splitaddr.domain, NULL, 16); - addr->bus = strtoul(splitaddr.bus, NULL, 16); - addr->devid = strtoul(splitaddr.devid, NULL, 16); - addr->function = strtoul(splitaddr.function, NULL, 10); - if (errno != 0) - goto error; - - free(buf_copy); /* free the copy made with strdup */ - return 0; -error: - free(buf_copy); - return -1; -} - -/* - * Scan the content of the PCI bus, and the devices in the devices - * list - */ -int -rte_pci_scan(void) -{ - struct dirent *e; - DIR *dir; - char dirname[PATH_MAX]; - struct rte_pci_addr addr; - - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - dir = opendir(pci_get_sysfs_path()); - if (dir == NULL) { - RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", - __func__, strerror(errno)); - return -1; - } - - while ((e = readdir(dir)) != NULL) { - if (e->d_name[0] == '.') - continue; - - if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) - continue; - - snprintf(dirname, sizeof(dirname), "%s/%s", - pci_get_sysfs_path(), e->d_name); - - if (pci_scan_one(dirname, &addr) < 0) - goto error; - } - closedir(dir); - return 0; - -error: - closedir(dir); - return -1; -} - -/* Read PCI config space. */ -int rte_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &device->intr_handle; - - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: - return pci_uio_read_config(intr_handle, buf, len, offset); - -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: - return pci_vfio_read_config(intr_handle, buf, len, offset); -#endif - default: - RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); - return -1; - } -} - -/* Write PCI config space. */ -int rte_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &device->intr_handle; - - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: - return pci_uio_write_config(intr_handle, buf, len, offset); - -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: - return pci_vfio_write_config(intr_handle, buf, len, offset); -#endif - default: - RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); - return -1; - } -} - -#if defined(RTE_ARCH_X86) -static int -pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, - struct rte_pci_ioport *p) -{ - uint16_t start, end; - FILE *fp; - char *line = NULL; - char pci_id[16]; - int found = 0; - size_t linesz; - - snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT, - dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - - fp = fopen("/proc/ioports", "r"); - if (fp == NULL) { - RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__); - return -1; - } - - while (getdelim(&line, &linesz, '\n', fp) > 0) { - char *ptr = line; - char *left; - int n; - - n = strcspn(ptr, ":"); - ptr[n] = 0; - left = &ptr[n + 1]; - - while (*left && isspace(*left)) - left++; - - if (!strncmp(left, pci_id, strlen(pci_id))) { - found = 1; - - while (*ptr && isspace(*ptr)) - ptr++; - - sscanf(ptr, "%04hx-%04hx", &start, &end); - - break; - } - } - - free(line); - fclose(fp); - - if (!found) - return -1; - - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - p->base = start; - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start); - - return 0; -} -#endif - -int -rte_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - int ret = -1; - - switch (dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - if (pci_vfio_is_enabled()) - ret = pci_vfio_ioport_map(dev, bar, p); - break; -#endif - case RTE_KDRV_IGB_UIO: - ret = pci_uio_ioport_map(dev, bar, p); - break; - case RTE_KDRV_UIO_GENERIC: -#if defined(RTE_ARCH_X86) - ret = pci_ioport_map(dev, bar, p); -#else - ret = pci_uio_ioport_map(dev, bar, p); -#endif - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - ret = pci_ioport_map(dev, bar, p); -#endif - break; - default: - break; - } - - if (!ret) - p->dev = dev; - - return ret; -} - -void -rte_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - pci_vfio_ioport_read(p, data, len, offset); - break; -#endif - case RTE_KDRV_IGB_UIO: - pci_uio_ioport_read(p, data, len, offset); - break; - case RTE_KDRV_UIO_GENERIC: - pci_uio_ioport_read(p, data, len, offset); - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - pci_uio_ioport_read(p, data, len, offset); -#endif - break; - default: - break; - } -} - -void -rte_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - pci_vfio_ioport_write(p, data, len, offset); - break; -#endif - case RTE_KDRV_IGB_UIO: - pci_uio_ioport_write(p, data, len, offset); - break; - case RTE_KDRV_UIO_GENERIC: - pci_uio_ioport_write(p, data, len, offset); - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - pci_uio_ioport_write(p, data, len, offset); -#endif - break; - default: - break; - } -} - -int -rte_pci_ioport_unmap(struct rte_pci_ioport *p) -{ - int ret = -1; - - switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - if (pci_vfio_is_enabled()) - ret = pci_vfio_ioport_unmap(p); - break; -#endif - case RTE_KDRV_IGB_UIO: - ret = pci_uio_ioport_unmap(p); - break; - case RTE_KDRV_UIO_GENERIC: -#if defined(RTE_ARCH_X86) - ret = 0; -#else - ret = pci_uio_ioport_unmap(p); -#endif - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - ret = 0; -#endif - break; - default: - break; - } - - return ret; -} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h deleted file mode 100644 index ae2980d6..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ /dev/null @@ -1,97 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef EAL_PCI_INIT_H_ -#define EAL_PCI_INIT_H_ - -#include "eal_vfio.h" - -/** IO resource type: */ -#define IORESOURCE_IO 0x00000100 -#define IORESOURCE_MEM 0x00000200 - -/* - * Helper function to map PCI resources right after hugepages in virtual memory - */ -extern void *pci_map_addr; -void *pci_find_max_end_va(void); - -/* parse one line of the "resource" sysfs file (note that the 'line' - * string is modified) - */ -int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, - uint64_t *end_addr, uint64_t *flags); - -int pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res); -void pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res); -int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx); - -int pci_uio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs); -int pci_uio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs); - -int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); -void pci_uio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); -void pci_uio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); -int pci_uio_ioport_unmap(struct rte_pci_ioport *p); - -#ifdef VFIO_PRESENT - -/* access config space */ -int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs); -int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs); - -int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); -void pci_vfio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); -void pci_vfio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); -int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); - -/* map/unmap VFIO resource prototype */ -int pci_vfio_map_resource(struct rte_pci_device *dev); -int pci_vfio_unmap_resource(struct rte_pci_device *dev); - -#endif - -#endif /* EAL_PCI_INIT_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c deleted file mode 100644 index fa10329f..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ /dev/null @@ -1,567 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <string.h> -#include <unistd.h> -#include <fcntl.h> -#include <dirent.h> -#include <inttypes.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <sys/sysmacros.h> -#include <linux/pci_regs.h> - -#if defined(RTE_ARCH_X86) -#include <sys/io.h> -#endif - -#include <rte_log.h> -#include <rte_pci.h> -#include <rte_eal_memconfig.h> -#include <rte_common.h> -#include <rte_malloc.h> - -#include "eal_filesystem.h" -#include "eal_pci_init.h" - -void *pci_map_addr = NULL; - -#define OFF_MAX ((uint64_t)(off_t)-1) - -int -pci_uio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offset) -{ - return pread(intr_handle->uio_cfg_fd, buf, len, offset); -} - -int -pci_uio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offset) -{ - return pwrite(intr_handle->uio_cfg_fd, buf, len, offset); -} - -static int -pci_uio_set_bus_master(int dev_fd) -{ - uint16_t reg; - int ret; - - ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, - "Cannot read command from PCI config space!\n"); - return -1; - } - - /* return if bus mastering is already on */ - if (reg & PCI_COMMAND_MASTER) - return 0; - - reg |= PCI_COMMAND_MASTER; - - ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, - "Cannot write command to PCI config space!\n"); - return -1; - } - - return 0; -} - -static int -pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) -{ - FILE *f; - char filename[PATH_MAX]; - int ret; - unsigned major, minor; - dev_t dev; - - /* get the name of the sysfs file that contains the major and minor - * of the uio device and read its content */ - snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path); - - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n", - __func__); - return -1; - } - - ret = fscanf(f, "%u:%u", &major, &minor); - if (ret != 2) { - RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n", - __func__); - fclose(f); - return -1; - } - fclose(f); - - /* create the char device "mknod /dev/uioX c major minor" */ - snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); - dev = makedev(major, minor); - ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); - if (ret != 0) { - RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", - __func__, strerror(errno)); - return -1; - } - - return ret; -} - -/* - * Return the uioX char device used for a pci device. On success, return - * the UIO number and fill dstbuf string with the path of the device in - * sysfs. On error, return a negative value. In this case dstbuf is - * invalid. - */ -static int -pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, - unsigned int buflen, int create) -{ - struct rte_pci_addr *loc = &dev->addr; - unsigned int uio_num; - struct dirent *e; - DIR *dir; - char dirname[PATH_MAX]; - - /* depending on kernel version, uio can be located in uio/uioX - * or uio:uioX */ - - snprintf(dirname, sizeof(dirname), - "%s/" PCI_PRI_FMT "/uio", pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - - dir = opendir(dirname); - if (dir == NULL) { - /* retry with the parent directory */ - snprintf(dirname, sizeof(dirname), - "%s/" PCI_PRI_FMT, pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - dir = opendir(dirname); - - if (dir == NULL) { - RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); - return -1; - } - } - - /* take the first file starting with "uio" */ - while ((e = readdir(dir)) != NULL) { - /* format could be uio%d ...*/ - int shortprefix_len = sizeof("uio") - 1; - /* ... or uio:uio%d */ - int longprefix_len = sizeof("uio:uio") - 1; - char *endptr; - - if (strncmp(e->d_name, "uio", 3) != 0) - continue; - - /* first try uio%d */ - errno = 0; - uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); - if (errno == 0 && endptr != (e->d_name + shortprefix_len)) { - snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); - break; - } - - /* then try uio:uio%d */ - errno = 0; - uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); - if (errno == 0 && endptr != (e->d_name + longprefix_len)) { - snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num); - break; - } - } - closedir(dir); - - /* No uio resource found */ - if (e == NULL) - return -1; - - /* create uio device if we've been asked to */ - if (internal_config.create_uio_dev && create && - pci_mknod_uio_dev(dstbuf, uio_num) < 0) - RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num); - - return uio_num; -} - -void -pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res) -{ - rte_free(uio_res); - - if (dev->intr_handle.uio_cfg_fd >= 0) { - close(dev->intr_handle.uio_cfg_fd); - dev->intr_handle.uio_cfg_fd = -1; - } - if (dev->intr_handle.fd >= 0) { - close(dev->intr_handle.fd); - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - } -} - -int -pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res) -{ - char dirname[PATH_MAX]; - char cfgname[PATH_MAX]; - char devname[PATH_MAX]; /* contains the /dev/uioX */ - int uio_num; - struct rte_pci_addr *loc; - - loc = &dev->addr; - - /* find uio resource */ - uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1); - if (uio_num < 0) { - RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " - "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); - return 1; - } - snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); - - /* save fd if in primary process */ - dev->intr_handle.fd = open(devname, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - - snprintf(cfgname, sizeof(cfgname), - "/sys/class/uio/uio%u/device/config", uio_num); - dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); - if (dev->intr_handle.uio_cfg_fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - cfgname, strerror(errno)); - goto error; - } - - if (dev->kdrv == RTE_KDRV_IGB_UIO) - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - else { - dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; - - /* set bus master that is not done by uio_pci_generic */ - if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { - RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); - goto error; - } - } - - /* allocate the mapping details for secondary processes*/ - *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); - if (*uio_res == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - goto error; - } - - snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); - memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); - - return 0; - -error: - pci_uio_free_resource(dev, *uio_res); - return -1; -} - -int -pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx) -{ - int fd; - char devname[PATH_MAX]; - void *mapaddr; - struct rte_pci_addr *loc; - struct pci_map *maps; - - loc = &dev->addr; - maps = uio_res->maps; - - /* update devname for mmap */ - snprintf(devname, sizeof(devname), - "%s/" PCI_PRI_FMT "/resource%d", - pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, - loc->function, res_idx); - - /* allocate memory to keep path */ - maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); - if (maps[map_idx].path == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", - strerror(errno)); - return -1; - } - - /* - * open resource file, to mmap it - */ - fd = open(devname, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - - /* try mapping somewhere close to the end of hugepages */ - if (pci_map_addr == NULL) - pci_map_addr = pci_find_max_end_va(); - - mapaddr = pci_map_resource(pci_map_addr, fd, 0, - (size_t)dev->mem_resource[res_idx].len, 0); - close(fd); - if (mapaddr == MAP_FAILED) - goto error; - - pci_map_addr = RTE_PTR_ADD(mapaddr, - (size_t)dev->mem_resource[res_idx].len); - - maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; - maps[map_idx].size = dev->mem_resource[res_idx].len; - maps[map_idx].addr = mapaddr; - maps[map_idx].offset = 0; - strcpy(maps[map_idx].path, devname); - dev->mem_resource[res_idx].addr = mapaddr; - - return 0; - -error: - rte_free(maps[map_idx].path); - return -1; -} - -#if defined(RTE_ARCH_X86) -int -pci_uio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - char dirname[PATH_MAX]; - char filename[PATH_MAX]; - int uio_num; - unsigned long start; - - uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0); - if (uio_num < 0) - return -1; - - /* get portio start */ - snprintf(filename, sizeof(filename), - "%s/portio/port%d/start", dirname, bar); - if (eal_parse_sysfs_value(filename, &start) < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n", - __func__); - return -1; - } - /* ensure we don't get anything funny here, read/write will cast to - * uin16_t */ - if (start > UINT16_MAX) - return -1; - - /* FIXME only for primary process ? */ - if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) { - - snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); - dev->intr_handle.fd = open(filename, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - filename, strerror(errno)); - return -1; - } - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - } - - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start); - - p->base = start; - p->len = 0; - return 0; -} -#else -int -pci_uio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - FILE *f; - char buf[BUFSIZ]; - char filename[PATH_MAX]; - uint64_t phys_addr, end_addr, flags; - int fd, i; - void *addr; - - /* open and read addresses of the corresponding resource in sysfs */ - snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource", - pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n", - strerror(errno)); - return -1; - } - for (i = 0; i < bar + 1; i++) { - if (fgets(buf, sizeof(buf), f) == NULL) { - RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n"); - goto error; - } - } - if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, - &end_addr, &flags) < 0) - goto error; - if ((flags & IORESOURCE_IO) == 0) { - RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar); - goto error; - } - snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d", - pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function, bar); - - /* mmap the pci resource */ - fd = open(filename, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, - strerror(errno)); - goto error; - } - addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - close(fd); - if (addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n", - strerror(errno)); - goto error; - } - - /* strangely, the base address is mmap addr + phys_addr */ - p->base = (uintptr_t)addr + phys_addr; - p->len = end_addr + 1; - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base); - fclose(f); - - return 0; - -error: - fclose(f); - return -1; -} -#endif - -void -pci_uio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - uint8_t *d; - int size; - uintptr_t reg = p->base + offset; - - for (d = data; len > 0; d += size, reg += size, len -= size) { - if (len >= 4) { - size = 4; -#if defined(RTE_ARCH_X86) - *(uint32_t *)d = inl(reg); -#else - *(uint32_t *)d = *(volatile uint32_t *)reg; -#endif - } else if (len >= 2) { - size = 2; -#if defined(RTE_ARCH_X86) - *(uint16_t *)d = inw(reg); -#else - *(uint16_t *)d = *(volatile uint16_t *)reg; -#endif - } else { - size = 1; -#if defined(RTE_ARCH_X86) - *d = inb(reg); -#else - *d = *(volatile uint8_t *)reg; -#endif - } - } -} - -void -pci_uio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - const uint8_t *s; - int size; - uintptr_t reg = p->base + offset; - - for (s = data; len > 0; s += size, reg += size, len -= size) { - if (len >= 4) { - size = 4; -#if defined(RTE_ARCH_X86) - outl_p(*(const uint32_t *)s, reg); -#else - *(volatile uint32_t *)reg = *(const uint32_t *)s; -#endif - } else if (len >= 2) { - size = 2; -#if defined(RTE_ARCH_X86) - outw_p(*(const uint16_t *)s, reg); -#else - *(volatile uint16_t *)reg = *(const uint16_t *)s; -#endif - } else { - size = 1; -#if defined(RTE_ARCH_X86) - outb_p(*s, reg); -#else - *(volatile uint8_t *)reg = *s; -#endif - } - } -} - -int -pci_uio_ioport_unmap(struct rte_pci_ioport *p) -{ -#if defined(RTE_ARCH_X86) - RTE_SET_USED(p); - /* FIXME close intr fd ? */ - return 0; -#else - return munmap((void *)(uintptr_t)p->base, p->len); -#endif -} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c deleted file mode 100644 index aa9d96ed..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ /dev/null @@ -1,674 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <string.h> -#include <fcntl.h> -#include <linux/pci_regs.h> -#include <sys/eventfd.h> -#include <sys/socket.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <stdbool.h> - -#include <rte_log.h> -#include <rte_pci.h> -#include <rte_eal_memconfig.h> -#include <rte_malloc.h> - -#include "eal_filesystem.h" -#include "eal_pci_init.h" -#include "eal_vfio.h" -#include "eal_private.h" - -/** - * @file - * PCI probing under linux (VFIO version) - * - * This code tries to determine if the PCI device is bound to VFIO driver, - * and initialize it (map BARs, set up interrupts) if that's the case. - * - * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". - */ - -#ifdef VFIO_PRESENT - -#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) -#define PAGE_MASK (~(PAGE_SIZE - 1)) - -static struct rte_tailq_elem rte_vfio_tailq = { - .name = "VFIO_RESOURCE_LIST", -}; -EAL_REGISTER_TAILQ(rte_vfio_tailq) - -int -pci_vfio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs) -{ - return pread64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); -} - -int -pci_vfio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs) -{ - return pwrite64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); -} - -/* get PCI BAR number where MSI-X interrupts are */ -static int -pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, - uint32_t *msix_table_size) -{ - int ret; - uint32_t reg; - uint16_t flags; - uint8_t cap_id, cap_offset; - - /* read PCI capability pointer from config space */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_CAPABILITY_LIST); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " - "config space!\n"); - return -1; - } - - /* we need first byte */ - cap_offset = reg & 0xFF; - - while (cap_offset) { - - /* read PCI capability ID */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " - "config space!\n"); - return -1; - } - - /* we need first byte */ - cap_id = reg & 0xFF; - - /* if we haven't reached MSI-X, check next capability */ - if (cap_id != PCI_CAP_ID_MSIX) { - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " - "config space!\n"); - return -1; - } - - /* we need second byte */ - cap_offset = (reg & 0xFF00) >> 8; - - continue; - } - /* else, read table offset */ - else { - /* table offset resides in the next 4 bytes */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 4); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " - "space!\n"); - return -1; - } - - ret = pread64(fd, &flags, sizeof(flags), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 2); - if (ret != sizeof(flags)) { - RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " - "space!\n"); - return -1; - } - - *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR; - *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; - *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); - - return 0; - } - } - return 0; -} - -/* set PCI bus mastering */ -static int -pci_vfio_set_bus_master(int dev_fd, bool op) -{ - uint16_t reg; - int ret; - - ret = pread64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); - return -1; - } - - if (op) - /* set the master bit */ - reg |= PCI_COMMAND_MASTER; - else - reg &= ~(PCI_COMMAND_MASTER); - - ret = pwrite64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); - return -1; - } - - return 0; -} - -/* set up interrupt support (but not enable interrupts) */ -static int -pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) -{ - int i, ret, intr_idx; - - /* default to invalid index */ - intr_idx = VFIO_PCI_NUM_IRQS; - - /* get interrupt type from internal config (MSI-X by default, can be - * overridden from the command line - */ - switch (internal_config.vfio_intr_mode) { - case RTE_INTR_MODE_MSIX: - intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; - break; - case RTE_INTR_MODE_MSI: - intr_idx = VFIO_PCI_MSI_IRQ_INDEX; - break; - case RTE_INTR_MODE_LEGACY: - intr_idx = VFIO_PCI_INTX_IRQ_INDEX; - break; - /* don't do anything if we want to automatically determine interrupt type */ - case RTE_INTR_MODE_NONE: - break; - default: - RTE_LOG(ERR, EAL, " unknown default interrupt type!\n"); - return -1; - } - - /* start from MSI-X interrupt type */ - for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { - struct vfio_irq_info irq = { .argsz = sizeof(irq) }; - int fd = -1; - - /* skip interrupt modes we don't want */ - if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE && - i != intr_idx) - continue; - - irq.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); - if (ret < 0) { - RTE_LOG(ERR, EAL, " cannot get IRQ info, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - - /* if this vector cannot be used with eventfd, fail if we explicitly - * specified interrupt type, otherwise continue */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { - if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, - " interrupt vector does not support eventfd!\n"); - return -1; - } else - continue; - } - - /* set up an eventfd for interrupts */ - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - - switch (i) { - case VFIO_PCI_MSIX_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - break; - case VFIO_PCI_MSI_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - break; - case VFIO_PCI_INTX_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - break; - default: - RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); - return -1; - } - - return 0; - } - - /* if we're here, we haven't found a suitable interrupt vector */ - return -1; -} - -/* - * map the PCI resources of a PCI device in virtual memory (VFIO version). - * primary and secondary processes follow almost exactly the same path - */ -int -pci_vfio_map_resource(struct rte_pci_device *dev) -{ - struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; - char pci_addr[PATH_MAX] = {0}; - int vfio_dev_fd; - struct rte_pci_addr *loc = &dev->addr; - int i, ret, msix_bar; - struct mapped_pci_resource *vfio_res = NULL; - struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); - - struct pci_map *maps; - uint32_t msix_table_offset = 0; - uint32_t msix_table_size = 0; - uint32_t ioport_bar; - - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - - /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, - loc->domain, loc->bus, loc->devid, loc->function); - - if ((ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr, - &vfio_dev_fd, &device_info))) - return ret; - - /* get MSI-X BAR, if any (we have to know where it is because we can't - * easily mmap it when using VFIO) */ - msix_bar = -1; - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar, - &msix_table_offset, &msix_table_size); - if (ret < 0) { - RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); - close(vfio_dev_fd); - return -1; - } - - /* if we're in a primary process, allocate vfio_res and get region info */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - close(vfio_dev_fd); - return -1; - } - memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); - - /* get number of registers (up to BAR5) */ - vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions, - VFIO_PCI_BAR5_REGION_INDEX + 1); - } else { - /* if we're in a secondary process, just find our tailq entry */ - TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (rte_eal_compare_pci_addr(&vfio_res->pci_addr, - &dev->addr)) - continue; - break; - } - /* if we haven't found our tailq entry, something's wrong */ - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", - pci_addr); - close(vfio_dev_fd); - return -1; - } - } - - /* map BARs */ - maps = vfio_res->maps; - - for (i = 0; i < (int) vfio_res->nb_maps; i++) { - struct vfio_region_info reg = { .argsz = sizeof(reg) }; - void *bar_addr; - struct memreg { - unsigned long offset, size; - } memreg[2] = {}; - - reg.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); - - if (ret) { - RTE_LOG(ERR, EAL, " %s cannot get device region info " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); - close(vfio_dev_fd); - if (internal_config.process_type == RTE_PROC_PRIMARY) - rte_free(vfio_res); - return -1; - } - - /* chk for io port region */ - ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) - + PCI_BASE_ADDRESS_0 + i*4); - - if (ret != sizeof(ioport_bar)) { - RTE_LOG(ERR, EAL, - "Cannot read command (%x) from config space!\n", - PCI_BASE_ADDRESS_0 + i*4); - return -1; - } - - if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) { - RTE_LOG(INFO, EAL, - "Ignore mapping IO port bar(%d) addr: %x\n", - i, ioport_bar); - continue; - } - - /* skip non-mmapable BARs */ - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) - continue; - - if (i == msix_bar) { - /* - * VFIO will not let us map the MSI-X table, - * but we can map around it. - */ - uint32_t table_start = msix_table_offset; - uint32_t table_end = table_start + msix_table_size; - table_end = (table_end + ~PAGE_MASK) & PAGE_MASK; - table_start &= PAGE_MASK; - - if (table_start == 0 && table_end >= reg.size) { - /* Cannot map this BAR */ - RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i); - continue; - } else { - memreg[0].offset = reg.offset; - memreg[0].size = table_start; - memreg[1].offset = reg.offset + table_end; - memreg[1].size = reg.size - table_end; - - RTE_LOG(DEBUG, EAL, - "Trying to map BAR %d that contains the MSI-X " - "table. Trying offsets: " - "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i, - memreg[0].offset, memreg[0].size, - memreg[1].offset, memreg[1].size); - } - } else { - memreg[0].offset = reg.offset; - memreg[0].size = reg.size; - } - - /* try to figure out an address */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - /* try mapping somewhere close to the end of hugepages */ - if (pci_map_addr == NULL) - pci_map_addr = pci_find_max_end_va(); - - bar_addr = pci_map_addr; - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); - } else { - bar_addr = maps[i].addr; - } - - /* reserve the address using an inaccessible mapping */ - bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (bar_addr != MAP_FAILED) { - void *map_addr = NULL; - if (memreg[0].size) { - /* actual map of first part */ - map_addr = pci_map_resource(bar_addr, vfio_dev_fd, - memreg[0].offset, - memreg[0].size, - MAP_FIXED); - } - - /* if there's a second part, try to map it */ - if (map_addr != MAP_FAILED - && memreg[1].offset && memreg[1].size) { - void *second_addr = RTE_PTR_ADD(bar_addr, - memreg[1].offset - - (uintptr_t)reg.offset); - map_addr = pci_map_resource(second_addr, - vfio_dev_fd, memreg[1].offset, - memreg[1].size, - MAP_FIXED); - } - - if (map_addr == MAP_FAILED || !map_addr) { - munmap(bar_addr, reg.size); - bar_addr = MAP_FAILED; - } - } - - if (bar_addr == MAP_FAILED || - (internal_config.process_type == RTE_PROC_SECONDARY && - bar_addr != maps[i].addr)) { - RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, - strerror(errno)); - close(vfio_dev_fd); - if (internal_config.process_type == RTE_PROC_PRIMARY) - rte_free(vfio_res); - return -1; - } - - maps[i].addr = bar_addr; - maps[i].offset = reg.offset; - maps[i].size = reg.size; - maps[i].path = NULL; /* vfio doesn't have per-resource paths */ - dev->mem_resource[i].addr = bar_addr; - } - - /* if secondary process, do not set up interrupts */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { - RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr); - close(vfio_dev_fd); - rte_free(vfio_res); - return -1; - } - - /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { - RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr); - close(vfio_dev_fd); - rte_free(vfio_res); - return -1; - } - - /* Reset the device */ - ioctl(vfio_dev_fd, VFIO_DEVICE_RESET); - } - - if (internal_config.process_type == RTE_PROC_PRIMARY) - TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); - - return 0; -} - -int -pci_vfio_unmap_resource(struct rte_pci_device *dev) -{ - char pci_addr[PATH_MAX] = {0}; - struct rte_pci_addr *loc = &dev->addr; - int i, ret; - struct mapped_pci_resource *vfio_res = NULL; - struct mapped_pci_res_list *vfio_res_list; - - struct pci_map *maps; - - /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, - loc->domain, loc->bus, loc->devid, loc->function); - - - if (close(dev->intr_handle.fd) < 0) { - RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", - pci_addr); - return -1; - } - - if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { - RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", - pci_addr); - return -1; - } - - ret = vfio_release_device(pci_get_sysfs_path(), pci_addr, - dev->intr_handle.vfio_dev_fd); - if (ret < 0) { - RTE_LOG(ERR, EAL, - "%s(): cannot release device\n", __func__); - return ret; - } - - vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); - /* Get vfio_res */ - TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) - continue; - break; - } - /* if we haven't found our tailq entry, something's wrong */ - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", - pci_addr); - return -1; - } - - /* unmap BARs */ - maps = vfio_res->maps; - - RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n", - pci_addr); - for (i = 0; i < (int) vfio_res->nb_maps; i++) { - - /* - * We do not need to be aware of MSI-X table BAR mappings as - * when mapping. Just using current maps array is enough - */ - if (maps[i].addr) { - RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n", - pci_addr, maps[i].addr); - pci_unmap_resource(maps[i].addr, maps[i].size); - } - } - - TAILQ_REMOVE(vfio_res_list, vfio_res, next); - - return 0; -} - -int -pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - if (bar < VFIO_PCI_BAR0_REGION_INDEX || - bar > VFIO_PCI_BAR5_REGION_INDEX) { - RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); - return -1; - } - - p->dev = dev; - p->base = VFIO_GET_REGION_ADDR(bar); - return 0; -} - -void -pci_vfio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; - - if (pread64(intr_handle->vfio_dev_fd, data, - len, p->base + offset) <= 0) - RTE_LOG(ERR, EAL, - "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n", - VFIO_GET_REGION_IDX(p->base), (int)offset); -} - -void -pci_vfio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; - - if (pwrite64(intr_handle->vfio_dev_fd, data, - len, p->base + offset) <= 0) - RTE_LOG(ERR, EAL, - "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n", - VFIO_GET_REGION_IDX(p->base), (int)offset); -} - -int -pci_vfio_ioport_unmap(struct rte_pci_ioport *p) -{ - RTE_SET_USED(p); - return -1; -} - -int -pci_vfio_enable(void) -{ - return vfio_enable("vfio_pci"); -} - -int -pci_vfio_is_enabled(void) -{ - return vfio_is_enabled("vfio_pci"); -} -#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c index 6481eeea..e9a579e4 100644 --- a/lib/librte_eal/linuxapp/eal/eal_thread.c +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -46,7 +46,6 @@ #include <rte_launch.h> #include <rte_log.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_per_lcore.h> #include <rte_eal.h> #include <rte_lcore.h> diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c index afa32f5c..24349dab 100644 --- a/lib/librte_eal/linuxapp/eal/eal_timer.c +++ b/lib/librte_eal/linuxapp/eal/eal_timer.c @@ -49,7 +49,6 @@ #include <rte_cycles.h> #include <rte_lcore.h> #include <rte_memory.h> -#include <rte_memzone.h> #include <rte_eal.h> #include <rte_debug.h> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 946df7e3..58f0123e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -39,6 +39,7 @@ #include <rte_log.h> #include <rte_memory.h> #include <rte_eal_memconfig.h> +#include <rte_vfio.h> #include "eal_filesystem.h" #include "eal_vfio.h" @@ -68,8 +69,8 @@ vfio_get_group_fd(int iommu_group_no) { int i; int vfio_group_fd; - int group_idx = -1; char filename[PATH_MAX]; + struct vfio_group *cur_grp; /* check if we already have the group descriptor open */ for (i = 0; i < VFIO_MAX_GROUPS; i++) @@ -85,12 +86,12 @@ vfio_get_group_fd(int iommu_group_no) /* Now lets get an index for the new group */ for (i = 0; i < VFIO_MAX_GROUPS; i++) if (vfio_cfg.vfio_groups[i].group_no == -1) { - group_idx = i; + cur_grp = &vfio_cfg.vfio_groups[i]; break; } /* This should not happen */ - if (group_idx == -1) { + if (i == VFIO_MAX_GROUPS) { RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); return -1; } @@ -123,8 +124,8 @@ vfio_get_group_fd(int iommu_group_no) /* noiommu group found */ } - vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd; + cur_grp->group_no = iommu_group_no; + cur_grp->fd = vfio_group_fd; vfio_cfg.vfio_active_groups++; return vfio_group_fd; } @@ -157,9 +158,12 @@ vfio_get_group_fd(int iommu_group_no) return 0; case SOCKET_OK: vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); - /* if we got the fd, return it */ + /* if we got the fd, store it and return it */ if (vfio_group_fd > 0) { close(socket_fd); + cur_grp->group_no = iommu_group_no; + cur_grp->fd = vfio_group_fd; + vfio_cfg.vfio_active_groups++; return vfio_group_fd; } /* fall-through on error */ @@ -280,7 +284,7 @@ clear_group(int vfio_group_fd) } int -vfio_setup_device(const char *sysfs_base, const char *dev_addr, +rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info) { struct vfio_group_status group_status = { @@ -412,7 +416,7 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr, } int -vfio_release_device(const char *sysfs_base, const char *dev_addr, +rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int vfio_dev_fd) { struct vfio_group_status group_status = { @@ -474,7 +478,7 @@ vfio_release_device(const char *sysfs_base, const char *dev_addr, } int -vfio_enable(const char *modname) +rte_vfio_enable(const char *modname) { /* initialize group list */ int i; @@ -489,7 +493,7 @@ vfio_enable(const char *modname) /* inform the user that we are probing for VFIO */ RTE_LOG(INFO, EAL, "Probing VFIO support...\n"); - /* check if vfio-pci module is loaded */ + /* check if vfio module is loaded */ vfio_available = rte_eal_check_module(modname); /* return error directly */ @@ -519,7 +523,7 @@ vfio_enable(const char *modname) } int -vfio_is_enabled(const char *modname) +rte_vfio_is_enabled(const char *modname) { const int mod_available = rte_eal_check_module(modname); return vfio_cfg.vfio_enabled && mod_available; @@ -706,7 +710,10 @@ vfio_type1_dma_map(int vfio_container_fd) dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + dma_map.iova = dma_map.vaddr; + else + dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); @@ -759,10 +766,19 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } - /* calculate window size based on number of hugepages configured */ - create.window_size = rte_eal_get_physmem_size(); + /* create DMA window from 0 to max(phys_addr + len) */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (ms[i].addr == NULL) + break; + + create.window_size = RTE_MAX(create.window_size, + ms[i].iova + ms[i].len); + } + + /* sPAPR requires window size to be a power of 2 */ + create.window_size = rte_align64pow2(create.window_size); create.page_shift = __builtin_ctzll(ms->hugepage_sz); - create.levels = 2; + create.levels = 1; ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); if (ret) { @@ -771,6 +787,11 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } + if (create.start_addr != 0) { + RTE_LOG(ERR, EAL, " DMA window start address != 0\n"); + return -1; + } + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { struct vfio_iommu_type1_dma_map dma_map; @@ -792,7 +813,10 @@ vfio_spapr_dma_map(int vfio_container_fd) dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + dma_map.iova = dma_map.vaddr; + else + dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; @@ -816,4 +840,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) return 0; } +int +rte_vfio_noiommu_is_enabled(void) +{ + int fd, ret, cnt __rte_unused; + char c; + + ret = -1; + fd = open(VFIO_NOIOMMU_MODE, O_RDONLY); + if (fd < 0) + return -1; + + cnt = read(fd, &c, 1); + if (c == 'Y') + ret = 1; + + close(fd); + return ret; +} + #endif diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 5ff63e5d..ba7892b7 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -37,20 +37,18 @@ /* * determine if VFIO is present on the system */ -#ifdef RTE_EAL_VFIO +#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO) #include <linux/version.h> #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) -#include <linux/vfio.h> - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) -#define RTE_PCI_MSIX_TABLE_BIR 0x7 -#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 -#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff +#define VFIO_PRESENT #else -#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR -#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET -#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE -#endif +#pragma message("VFIO configured but not supported by this kernel, disabling.") +#endif /* kernel version >= 3.6.0 */ +#endif /* RTE_EAL_VFIO */ + +#ifdef VFIO_PRESENT + +#include <linux/vfio.h> #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU @@ -144,13 +142,6 @@ struct vfio_config { struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; }; -#define VFIO_DIR "/dev/vfio" -#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" -#define VFIO_GROUP_FMT "/dev/vfio/%u" -#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" -#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) -#define VFIO_GET_REGION_IDX(x) (x >> 40) - /* DMA mapping function prototype. * Takes VFIO container fd as a parameter. * Returns 0 on success, -1 on error. @@ -190,24 +181,6 @@ vfio_get_group_fd(int iommu_group_no); int clear_group(int vfio_group_fd); -/** - * Setup vfio_cfg for the device identified by its address. It discovers - * the configured I/O MMU groups or sets a new one for the device. If a new - * groups is assigned, the DMA mapping is performed. - * Returns 0 on success, a negative value on failure and a positive value in - * case the given device cannot be managed this way. - */ -int vfio_setup_device(const char *sysfs_base, const char *dev_addr, - int *vfio_dev_fd, struct vfio_device_info *device_info); - -int vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); - -int vfio_enable(const char *modname); -int vfio_is_enabled(const char *modname); - -int pci_vfio_enable(void); -int pci_vfio_is_enabled(void); - int vfio_mp_sync_setup(void); #define SOCKET_REQ_CONTAINER 0x100 @@ -217,8 +190,6 @@ int vfio_mp_sync_setup(void); #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF -#define VFIO_PRESENT -#endif /* kernel version */ -#endif /* RTE_EAL_VFIO */ +#endif /* VFIO_PRESENT */ #endif /* EAL_VFIO_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c index 7e8095cb..b53ed7eb 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c @@ -49,12 +49,12 @@ #endif #include <rte_log.h> -#include <rte_pci.h> #include <rte_eal_memconfig.h> #include <rte_malloc.h> +#include <rte_vfio.h> #include "eal_filesystem.h" -#include "eal_pci_init.h" +#include "eal_vfio.h" #include "eal_thread.h" /** @@ -301,7 +301,8 @@ vfio_mp_sync_thread(void __rte_unused * arg) vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); else vfio_mp_sync_send_fd(conn_sock, fd); - close(fd); + if (fd >= 0) + close(fd); break; case SOCKET_REQ_GROUP: /* wait for group number */ diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c deleted file mode 100644 index 19db1cb5..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c +++ /dev/null @@ -1,381 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <errno.h> -#include <stdarg.h> -#include <stdlib.h> -#include <stdio.h> -#include <stdint.h> -#include <inttypes.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> -#include <sys/file.h> -#include <unistd.h> -#include <limits.h> -#include <sys/ioctl.h> -#include <sys/time.h> - -#include <rte_log.h> -#include <rte_memory.h> -#include <rte_memzone.h> -#include <rte_launch.h> -#include <rte_eal.h> -#include <rte_eal_memconfig.h> -#include <rte_per_lcore.h> -#include <rte_lcore.h> -#include <rte_common.h> -#include <rte_string_fns.h> - -#include "eal_private.h" -#include "eal_internal_cfg.h" -#include "eal_filesystem.h" -#include <exec-env/rte_dom0_common.h> - -#define PAGE_SIZE RTE_PGSIZE_4K -#define DEFAUL_DOM0_NAME "dom0-mem" - -static int xen_fd = -1; -static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB"; - -/* - * Try to mmap *size bytes in /dev/zero. If it is successful, return the - * pointer to the mmap'd area and keep *size unmodified. Else, retry - * with a smaller zone: decrease *size by mem_size until it reaches - * 0. In this case, return NULL. Note: this function returns an address - * which is a multiple of mem_size size. - */ -static void * -xen_get_virtual_area(size_t *size, size_t mem_size) -{ - void *addr; - int fd; - long aligned_addr; - - RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size); - - fd = open("/dev/zero", O_RDONLY); - if (fd < 0){ - RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); - return NULL; - } - do { - addr = mmap(NULL, (*size) + mem_size, PROT_READ, - MAP_PRIVATE, fd, 0); - if (addr == MAP_FAILED) - *size -= mem_size; - } while (addr == MAP_FAILED && *size > 0); - - if (addr == MAP_FAILED) { - close(fd); - RTE_LOG(ERR, EAL, "Cannot get a virtual area\n"); - return NULL; - } - - munmap(addr, (*size) + mem_size); - close(fd); - - /* align addr to a mem_size boundary */ - aligned_addr = (uintptr_t)addr; - aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size); - addr = (void *)(aligned_addr); - - RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", - addr, *size); - - return addr; -} - -/** - * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm - * /memsize-mB/memsize file, and the size unit is mB. - */ -static int -get_xen_memory_size(void) -{ - char path[PATH_MAX]; - unsigned long mem_size = 0; - static const char *file_name; - - file_name = "memsize"; - snprintf(path, sizeof(path), "%s/%s", - sys_dir_path, file_name); - - if (eal_parse_sysfs_value(path, &mem_size) < 0) - return -1; - - if (mem_size == 0) - rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not" - " configured.\n",sys_dir_path, file_name); - if (mem_size % 2) - rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be" - " even number.\n",sys_dir_path, file_name); - - if (mem_size > DOM0_CONFIG_MEMSIZE) - rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger" - " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE); - - return mem_size; -} - -/** - * Based on physical address to caculate MFN in Xen Dom0. - */ -phys_addr_t -rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr) -{ - int mfn_id, i; - uint64_t mfn, mfn_offset; - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct rte_memseg *memseg = mcfg->memseg; - - /* find the memory segment owning the physical address */ - if (memseg_id == -1) { - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - if ((phy_addr >= memseg[i].phys_addr) && - (phy_addr < memseg[i].phys_addr + - memseg[i].len)) { - memseg_id = i; - break; - } - } - if (memseg_id == -1) - return RTE_BAD_PHYS_ADDR; - } - - mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M; - - /*the MFN is contiguous in 2M */ - mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) % - RTE_PGSIZE_2M / PAGE_SIZE; - mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id]; - - /** return mechine address */ - return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE; -} - -int -rte_xen_dom0_memory_init(void) -{ - void *vir_addr, *vma_addr = NULL; - int err, ret = 0; - uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0; - size_t vma_len = 0; - struct memory_info meminfo; - struct memseg_info seginfo[RTE_MAX_MEMSEG]; - int flags, page_size = getpagesize(); - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct rte_memseg *memseg = mcfg->memseg; - uint64_t total_mem = internal_config.memory; - - memset(seginfo, 0, sizeof(seginfo)); - memset(&meminfo, 0, sizeof(struct memory_info)); - - mem_size = get_xen_memory_size(); - requested = (unsigned) (total_mem / 0x100000); - if (requested > mem_size) - /* if we didn't satisfy total memory requirements */ - rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB," - " available: %uMB\n", requested, mem_size); - else if (total_mem != 0) - mem_size = requested; - - /* Check FD and open once */ - if (xen_fd < 0) { - xen_fd = open(DOM0_MM_DEV, O_RDWR); - if (xen_fd < 0) { - RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); - return -1; - } - } - - meminfo.size = mem_size; - - /* construct memory mangement name for Dom0 */ - snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s", - internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); - - /* Notify kernel driver to allocate memory */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo); - if (ret < 0) { - RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n"); - err = -EIO; - goto fail; - } - - /* Get number of memory segment from driver */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg); - if (ret < 0) { - RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n"); - err = -EIO; - goto fail; - } - - if(num_memseg > RTE_MAX_MEMSEG){ - RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater" - " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG); - err = -EIO; - goto fail; - } - - /* get all memory segements information */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo); - if (ret < 0) { - RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n"); - err = -EIO; - goto fail; - } - - /* map all memory segments to contiguous user space */ - for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++) - { - vma_len = seginfo[memseg_idx].size; - - /** - * get the biggest virtual memory area up to vma_len. If it fails, - * vma_addr is NULL, so let the kernel provide the address. - */ - vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M); - if (vma_addr == NULL) { - flags = MAP_SHARED; - vma_len = RTE_PGSIZE_2M; - } else - flags = MAP_SHARED | MAP_FIXED; - - seginfo[memseg_idx].size = vma_len; - vir_addr = mmap(vma_addr, seginfo[memseg_idx].size, - PROT_READ|PROT_WRITE, flags, xen_fd, - memseg_idx * page_size); - if (vir_addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n", - DOM0_MM_DEV); - err = -EIO; - goto fail; - } - - memseg[memseg_idx].addr = vir_addr; - memseg[memseg_idx].phys_addr = page_size * - seginfo[memseg_idx].pfn ; - memseg[memseg_idx].len = seginfo[memseg_idx].size; - for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++) - memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i]; - - /* MFNs are continuous in 2M, so assume that page size is 2M */ - memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M; - - memseg[memseg_idx].nchannel = mcfg->nchannel; - memseg[memseg_idx].nrank = mcfg->nrank; - - /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/ - memseg[memseg_idx].socket_id = 0; - } - - return 0; -fail: - if (xen_fd > 0) { - close(xen_fd); - xen_fd = -1; - } - return err; -} - -/* - * This creates the memory mappings in the secondary process to match that of - * the server process. It goes through each memory segment in the DPDK runtime - * configuration, mapping them in order to form a contiguous block in the - * virtual memory space - */ -int -rte_xen_dom0_memory_attach(void) -{ - const struct rte_mem_config *mcfg; - unsigned s = 0; /* s used to track the segment number */ - int xen_fd = -1; - int ret = -1; - void *vir_addr; - char name[DOM0_NAME_MAX] = {0}; - int page_size = getpagesize(); - - mcfg = rte_eal_get_configuration()->mem_config; - - /* Check FD and open once */ - if (xen_fd < 0) { - xen_fd = open(DOM0_MM_DEV, O_RDWR); - if (xen_fd < 0) { - RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); - goto error; - } - } - - /* construct memory mangement name for Dom0 */ - snprintf(name, DOM0_NAME_MAX, "%s-%s", - internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); - /* attach to memory segments of primary process */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name); - if (ret) { - RTE_LOG(ERR, EAL,"attach memory segments fail.\n"); - goto error; - } - - /* map all segments into memory to make sure we get the addrs */ - for (s = 0; s < RTE_MAX_MEMSEG; ++s) { - - /* - * the first memory segment with len==0 is the one that - * follows the last valid segment. - */ - if (mcfg->memseg[s].len == 0) - break; - - vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, - PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd, - s * page_size); - if (vir_addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " - "in %s to requested address [%p]\n", - (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV, - mcfg->memseg[s].addr); - goto error; - } - } - return 0; - -error: - if (xen_fd >= 0) { - close(xen_fd); - xen_fd = -1; - } - return -1; -} diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h deleted file mode 100644 index d9707780..00000000 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h +++ /dev/null @@ -1,108 +0,0 @@ -/*- - * This file is provided under a dual BSD/LGPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GNU LESSER GENERAL PUBLIC LICENSE - * - * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * Contact Information: - * Intel Corporation - * - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef _RTE_DOM0_COMMON_H_ -#define _RTE_DOM0_COMMON_H_ - -#ifdef __KERNEL__ -#include <linux/if.h> -#endif - -#define DOM0_NAME_MAX 256 -#define DOM0_MM_DEV "/dev/dom0_mm" - -#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */ -#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ -#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */ -#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ -#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ - -#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) -#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) -#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) -#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) - -/** - * A structure used to store memory information. - */ -struct memory_info { - char name[DOM0_NAME_MAX]; - uint64_t size; -}; - -/** - * A structure used to store memory segment information. - */ -struct memseg_info { - uint32_t idx; - uint64_t pfn; - uint64_t size; - uint64_t mfn[DOM0_NUM_MEMBLOCK]; -}; - -/** - * A structure used to store memory block information. - */ -struct memblock_info { - uint8_t exchange_flag; - uint8_t used; - uint64_t vir_addr; - uint64_t pfn; - uint64_t mfn; -}; -#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h index b800a53c..ce456d4b 100644 --- a/lib/librte_eal/linuxapp/igb_uio/compat.h +++ b/lib/librte_eal/linuxapp/igb_uio/compat.h @@ -16,12 +16,9 @@ #endif #ifndef PCI_MSIX_ENTRY_SIZE -#define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR 0 -#define PCI_MSIX_ENTRY_UPPER_ADDR 4 -#define PCI_MSIX_ENTRY_DATA 8 -#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 #endif /* @@ -124,6 +121,14 @@ static bool pci_check_and_mask_intx(struct pci_dev *pdev) #endif /* < 3.3.0 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) -#define HAVE_PCI_ENABLE_MSIX +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) +#define HAVE_ALLOC_IRQ_VECTORS 1 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) +#define HAVE_MSI_LIST_IN_GENERIC_DEVICE 1 +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) +#define HAVE_PCI_MSI_MASK_IRQ 1 #endif diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index 07a19a31..a3a98c17 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -29,13 +29,11 @@ #include <linux/pci.h> #include <linux/uio_driver.h> #include <linux/io.h> +#include <linux/irq.h> #include <linux/msi.h> #include <linux/version.h> #include <linux/slab.h> -#ifdef CONFIG_XEN_DOM0 -#include <xen/xen.h> -#endif #include <rte_pci_dev_features.h> #include "compat.h" @@ -51,7 +49,6 @@ struct rte_uio_pci_dev { static char *intr_mode; static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; - /* sriov sysfs */ static ssize_t show_max_vfs(struct device *dev, struct device_attribute *attr, @@ -91,14 +88,16 @@ static struct attribute *dev_attrs[] = { static const struct attribute_group dev_attr_grp = { .attrs = dev_attrs, }; + +#ifndef HAVE_PCI_MSI_MASK_IRQ /* * It masks the msix on/off of generating MSI-X messages. */ static void -igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state) +igbuio_msix_mask_irq(struct msi_desc *desc, s32 state) { u32 mask_bits = desc->masked; - unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + unsigned int offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; if (state != 0) @@ -113,6 +112,52 @@ igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state) } } +/* + * It masks the msi on/off of generating MSI messages. + */ +static void +igbuio_msi_mask_irq(struct pci_dev *pdev, struct msi_desc *desc, int32_t state) +{ + u32 mask_bits = desc->masked; + u32 offset = desc->irq - pdev->irq; + u32 mask = 1 << offset; + + if (!desc->msi_attrib.maskbit) + return; + + if (state != 0) + mask_bits &= ~mask; + else + mask_bits |= mask; + + if (mask_bits != desc->masked) { + pci_write_config_dword(pdev, desc->mask_pos, mask_bits); + desc->masked = mask_bits; + } +} + +static void +igbuio_mask_irq(struct pci_dev *pdev, enum rte_intr_mode mode, s32 irq_state) +{ + struct msi_desc *desc; + struct list_head *msi_list; + +#ifdef HAVE_MSI_LIST_IN_GENERIC_DEVICE + msi_list = &pdev->dev.msi_list; +#else + msi_list = &pdev->msi_list; +#endif + + if (mode == RTE_INTR_MODE_MSIX) { + list_for_each_entry(desc, msi_list, list) + igbuio_msix_mask_irq(desc, irq_state); + } else if (mode == RTE_INTR_MODE_MSI) { + list_for_each_entry(desc, msi_list, list) + igbuio_msi_mask_irq(pdev, desc, irq_state); + } +} +#endif + /** * This is the irqcontrol callback to be registered to uio_info. * It can be used to disable/enable interrupt from user space processes. @@ -132,21 +177,26 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) struct rte_uio_pci_dev *udev = info->priv; struct pci_dev *pdev = udev->pdev; - pci_cfg_access_lock(pdev); - if (udev->mode == RTE_INTR_MODE_LEGACY) - pci_intx(pdev, !!irq_state); +#ifdef HAVE_PCI_MSI_MASK_IRQ + struct irq_data *irq = irq_get_irq_data(udev->info.irq); +#endif - else if (udev->mode == RTE_INTR_MODE_MSIX) { - struct msi_desc *desc; + pci_cfg_access_lock(pdev); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)) - list_for_each_entry(desc, &pdev->msi_list, list) - igbuio_msix_mask_irq(desc, irq_state); + if (udev->mode == RTE_INTR_MODE_MSIX || udev->mode == RTE_INTR_MODE_MSI) { +#ifdef HAVE_PCI_MSI_MASK_IRQ + if (irq_state == 1) + pci_msi_unmask_irq(irq); + else + pci_msi_mask_irq(irq); #else - list_for_each_entry(desc, &pdev->dev.msi_list, list) - igbuio_msix_mask_irq(desc, irq_state); + igbuio_mask_irq(pdev, udev->mode, irq_state); #endif } + + if (udev->mode == RTE_INTR_MODE_LEGACY) + pci_intx(pdev, !!irq_state); + pci_cfg_access_unlock(pdev); return 0; @@ -157,19 +207,125 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) * If yes, disable it here and will be enable later. */ static irqreturn_t -igbuio_pci_irqhandler(int irq, struct uio_info *info) +igbuio_pci_irqhandler(int irq, void *dev_id) { - struct rte_uio_pci_dev *udev = info->priv; + struct rte_uio_pci_dev *udev = (struct rte_uio_pci_dev *)dev_id; + struct uio_info *info = &udev->info; /* Legacy mode need to mask in hardware */ if (udev->mode == RTE_INTR_MODE_LEGACY && !pci_check_and_mask_intx(udev->pdev)) return IRQ_NONE; + uio_event_notify(info); + /* Message signal mode, no share IRQ and automasked */ return IRQ_HANDLED; } +static int +igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) +{ + int err = 0; +#ifndef HAVE_ALLOC_IRQ_VECTORS + struct msix_entry msix_entry; +#endif + + switch (igbuio_intr_mode_preferred) { + case RTE_INTR_MODE_MSIX: + /* Only 1 msi-x vector needed */ +#ifndef HAVE_ALLOC_IRQ_VECTORS + msix_entry.entry = 0; + if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { + dev_dbg(&udev->pdev->dev, "using MSI-X"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = msix_entry.vector; + udev->mode = RTE_INTR_MODE_MSIX; + break; + } +#else + if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) { + dev_dbg(&udev->pdev->dev, "using MSI-X"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = pci_irq_vector(udev->pdev, 0); + udev->mode = RTE_INTR_MODE_MSIX; + break; + } +#endif + + /* fall back to MSI */ + case RTE_INTR_MODE_MSI: +#ifndef HAVE_ALLOC_IRQ_VECTORS + if (pci_enable_msi(udev->pdev) == 0) { + dev_dbg(&udev->pdev->dev, "using MSI"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = udev->pdev->irq; + udev->mode = RTE_INTR_MODE_MSI; + break; + } +#else + if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) { + dev_dbg(&udev->pdev->dev, "using MSI"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = pci_irq_vector(udev->pdev, 0); + udev->mode = RTE_INTR_MODE_MSI; + break; + } +#endif + /* fall back to INTX */ + case RTE_INTR_MODE_LEGACY: + if (pci_intx_mask_supported(udev->pdev)) { + dev_dbg(&udev->pdev->dev, "using INTX"); + udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD; + udev->info.irq = udev->pdev->irq; + udev->mode = RTE_INTR_MODE_LEGACY; + break; + } + dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n"); + /* fall back to no IRQ */ + case RTE_INTR_MODE_NONE: + udev->mode = RTE_INTR_MODE_NONE; + udev->info.irq = UIO_IRQ_NONE; + break; + + default: + dev_err(&udev->pdev->dev, "invalid IRQ mode %u", + igbuio_intr_mode_preferred); + udev->info.irq = UIO_IRQ_NONE; + err = -EINVAL; + } + + if (udev->info.irq != UIO_IRQ_NONE) + err = request_irq(udev->info.irq, igbuio_pci_irqhandler, + udev->info.irq_flags, udev->info.name, + udev); + dev_info(&udev->pdev->dev, "uio device registered with irq %lx\n", + udev->info.irq); + + return err; +} + +static void +igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) +{ + if (udev->info.irq) { + free_irq(udev->info.irq, udev); + udev->info.irq = 0; + } + +#ifndef HAVE_ALLOC_IRQ_VECTORS + if (udev->mode == RTE_INTR_MODE_MSIX) + pci_disable_msix(udev->pdev); + if (udev->mode == RTE_INTR_MODE_MSI) + pci_disable_msi(udev->pdev); +#else + if (udev->mode == RTE_INTR_MODE_MSIX || + udev->mode == RTE_INTR_MODE_MSI) + pci_free_irq_vectors(udev->pdev); +#endif +} + + /** * This gets called while opening uio device file. */ @@ -178,12 +334,17 @@ igbuio_pci_open(struct uio_info *info, struct inode *inode) { struct rte_uio_pci_dev *udev = info->priv; struct pci_dev *dev = udev->pdev; - - pci_reset_function(dev); + int err; /* set bus master, which was cleared by the reset function */ pci_set_master(dev); + /* enable interrupts */ + err = igbuio_pci_enable_interrupts(udev); + if (err) { + dev_err(&dev->dev, "Enable interrupt fails\n"); + return err; + } return 0; } @@ -193,60 +354,15 @@ igbuio_pci_release(struct uio_info *info, struct inode *inode) struct rte_uio_pci_dev *udev = info->priv; struct pci_dev *dev = udev->pdev; + /* disable interrupts */ + igbuio_pci_disable_interrupts(udev); + /* stop the device from further DMA */ pci_clear_master(dev); - pci_reset_function(dev); - return 0; } -#ifdef CONFIG_XEN_DOM0 -static int -igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma) -{ - int idx; - - idx = (int)vma->vm_pgoff; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -#ifdef HAVE_PTE_MASK_PAGE_IOMAP - vma->vm_page_prot.pgprot |= _PAGE_IOMAP; -#endif - - return remap_pfn_range(vma, - vma->vm_start, - info->mem[idx].addr >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); -} - -/** - * This is uio device mmap method which will use igbuio mmap for Xen - * Dom0 environment. - */ -static int -igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma) -{ - int idx; - - if (vma->vm_pgoff >= MAX_UIO_MAPS) - return -EINVAL; - - if (info->mem[vma->vm_pgoff].size == 0) - return -EINVAL; - - idx = (int)vma->vm_pgoff; - switch (info->mem[idx].memtype) { - case UIO_MEM_PHYS: - return igbuio_dom0_mmap_phys(info, vma); - case UIO_MEM_LOGICAL: - case UIO_MEM_VIRTUAL: - default: - return -EINVAL; - } -} -#endif - /* Remap pci resources described by bar #pci_bar in uio resource n. */ static int igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info, @@ -356,9 +472,6 @@ static int igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct rte_uio_pci_dev *udev; -#ifdef HAVE_PCI_ENABLE_MSIX - struct msix_entry msix_entry; -#endif dma_addr_t map_dma_addr; void *map_addr; int err; @@ -401,61 +514,12 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) /* fill uio infos */ udev->info.name = "igb_uio"; udev->info.version = "0.1"; - udev->info.handler = igbuio_pci_irqhandler; udev->info.irqcontrol = igbuio_pci_irqcontrol; udev->info.open = igbuio_pci_open; udev->info.release = igbuio_pci_release; -#ifdef CONFIG_XEN_DOM0 - /* check if the driver run on Xen Dom0 */ - if (xen_initial_domain()) - udev->info.mmap = igbuio_dom0_pci_mmap; -#endif udev->info.priv = udev; udev->pdev = dev; - switch (igbuio_intr_mode_preferred) { - case RTE_INTR_MODE_MSIX: - /* Only 1 msi-x vector needed */ -#ifdef HAVE_PCI_ENABLE_MSIX - msix_entry.entry = 0; - if (pci_enable_msix(dev, &msix_entry, 1) == 0) { - dev_dbg(&dev->dev, "using MSI-X"); - udev->info.irq_flags = IRQF_NO_THREAD; - udev->info.irq = msix_entry.vector; - udev->mode = RTE_INTR_MODE_MSIX; - break; - } -#else - if (pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_MSIX) == 1) { - dev_dbg(&dev->dev, "using MSI-X"); - udev->info.irq = pci_irq_vector(dev, 0); - udev->mode = RTE_INTR_MODE_MSIX; - break; - } -#endif - /* fall back to INTX */ - case RTE_INTR_MODE_LEGACY: - if (pci_intx_mask_supported(dev)) { - dev_dbg(&dev->dev, "using INTX"); - udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD; - udev->info.irq = dev->irq; - udev->mode = RTE_INTR_MODE_LEGACY; - break; - } - dev_notice(&dev->dev, "PCI INTX mask not supported\n"); - /* fall back to no IRQ */ - case RTE_INTR_MODE_NONE: - udev->mode = RTE_INTR_MODE_NONE; - udev->info.irq = 0; - break; - - default: - dev_err(&dev->dev, "invalid IRQ mode %u", - igbuio_intr_mode_preferred); - err = -EINVAL; - goto fail_release_iomem; - } - err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); if (err != 0) goto fail_release_iomem; @@ -467,9 +531,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) pci_set_drvdata(dev, udev); - dev_info(&dev->dev, "uio device registered with irq %lx\n", - udev->info.irq); - /* * Doing a harmless dma mapping for attaching the device to * the iommu identity mapping if kernel boots with iommu=pt. @@ -497,8 +558,6 @@ fail_remove_group: sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); fail_release_iomem: igbuio_pci_release_iomem(&udev->info); - if (udev->mode == RTE_INTR_MODE_MSIX) - pci_disable_msix(udev->pdev); pci_disable_device(dev); fail_free: kfree(udev); @@ -514,8 +573,6 @@ igbuio_pci_remove(struct pci_dev *dev) sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); uio_unregister_device(&udev->info); igbuio_pci_release_iomem(&udev->info); - if (udev->mode == RTE_INTR_MODE_MSIX) - pci_disable_msix(dev); pci_disable_device(dev); pci_set_drvdata(dev, NULL); kfree(udev); @@ -532,6 +589,9 @@ igbuio_config_intr_mode(char *intr_str) if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) { igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; pr_info("Use MSIX interrupt\n"); + } else if (!strcmp(intr_str, RTE_INTR_MODE_MSI_NAME)) { + igbuio_intr_mode_preferred = RTE_INTR_MODE_MSI; + pr_info("Use MSI interrupt\n"); } else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) { igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY; pr_info("Use legacy interrupt\n"); @@ -575,6 +635,7 @@ module_param(intr_mode, charp, S_IRUGO); MODULE_PARM_DESC(intr_mode, "igb_uio interrupt mode (default=msix):\n" " " RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n" +" " RTE_INTR_MODE_MSI_NAME " Use MSI interrupt\n" " " RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n" "\n"); diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h index 6a1587b4..3f8c0bc8 100644 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -8,6 +8,34 @@ #define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) #endif +/* SuSE version macro is the same as Linux kernel version */ +#ifndef SLE_VERSION +#define SLE_VERSION(a, b, c) KERNEL_VERSION(a, b, c) +#endif +#ifdef CONFIG_SUSE_KERNEL +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57)) +/* SLES12SP3 is at least 4.4.57+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0) +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 28)) +/* SLES12 is at least 3.12.28+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12, 0, 0) +#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 61)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0))) +/* SLES11 SP3 is at least 3.0.61+ based */ +#define SLE_VERSION_CODE SLE_VERSION(11, 3, 0) +#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)) +/* SLES11 SP1 is 2.6.32 based */ +#define SLE_VERSION_CODE SLE_VERSION(11, 1, 0) +#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 27)) +/* SLES11 GA is 2.6.27 based */ +#define SLE_VERSION_CODE SLE_VERSION(11, 0, 0) +#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */ +#endif /* CONFIG_SUSE_KERNEL */ +#ifndef SLE_VERSION_CODE +#define SLE_VERSION_CODE 0 +#endif /* SLE_VERSION_CODE */ + + #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ (!(defined(RHEL_RELEASE_CODE) && \ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) @@ -55,7 +83,8 @@ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) || \ (defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) || \ + (SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(12, 3, 0)) #define HAVE_TRANS_START_HELPER #endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index e0a03542..e38a7561 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -697,22 +697,22 @@ struct _kc_ethtool_pauseparam { #define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c) #endif #ifdef CONFIG_SUSE_KERNEL -#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) ) -/* SLES11 GA is 2.6.27 based */ -#define SLE_VERSION_CODE SLE_VERSION(11,0,0) -#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) ) -/* SLES11 SP1 is 2.6.32 based */ -#define SLE_VERSION_CODE SLE_VERSION(11,1,0) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57)) +/* SLES12SP3 is at least 4.4.57+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0) +#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) ) +/* SLES12 is at least 3.12.28+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12,0,0) #elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,61)) && \ (LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0))) /* SLES11 SP3 is at least 3.0.61+ based */ #define SLE_VERSION_CODE SLE_VERSION(11,3,0) -#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) ) -/* SLES12 is at least 3.12.28+ based */ -#define SLE_VERSION_CODE SLE_VERSION(12,0,0) -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57)) -/* SLES12SP3 is at least 4.4.57+ based */ -#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0) +#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) ) +/* SLES11 SP1 is 2.6.32 based */ +#define SLE_VERSION_CODE SLE_VERSION(11,1,0) +#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) ) +/* SLES11 GA is 2.6.27 based */ +#define SLE_VERSION_CODE SLE_VERSION(11,0,0) #endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */ #endif /* CONFIG_SUSE_KERNEL */ #ifndef SLE_VERSION_CODE diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_eal/linuxapp/xen_dom0/Makefile deleted file mode 100644 index be51a82a..00000000 --- a/lib/librte_eal/linuxapp/xen_dom0/Makefile +++ /dev/null @@ -1,53 +0,0 @@ -# BSD LICENSE -# -# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -include $(RTE_SDK)/mk/rte.vars.mk - -# -# module name and path -# -MODULE = rte_dom0_mm - -# -# CFLAGS -# -MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50 -MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h -MODULE_CFLAGS += -Wall -Werror - -# -# all source are stored in SRCS-y -# - -SRCS-y += dom0_mm_misc.c - -include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/xen_dom0/compat.h b/lib/librte_eal/linuxapp/xen_dom0/compat.h deleted file mode 100644 index e6eb97f2..00000000 --- a/lib/librte_eal/linuxapp/xen_dom0/compat.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Minimal wrappers to allow compiling xen_dom0 on older kernels. - */ - -#ifndef RHEL_RELEASE_VERSION -#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ - (!(defined(RHEL_RELEASE_CODE) && \ - RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) - -#define kstrtoul strict_strtoul - -#endif /* < 2.6.39 */ diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h deleted file mode 100644 index 9d5ffb22..00000000 --- a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h +++ /dev/null @@ -1,107 +0,0 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -#ifndef _DOM0_MM_DEV_H_ -#define _DOM0_MM_DEV_H_ - -#include <linux/wait.h> -#include <linux/mutex.h> -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <exec-env/rte_dom0_common.h> - -#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/ -#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/ -#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE) -#define MAX_NUM_ORDER (DOM0_CONTIG_NUM_ORDER + 1) -#define SIZE_PER_BLOCK 2 /**< Size of memory block (2MB).*/ - -/** - * A structure describing the private information for a dom0 device. - */ -struct dom0_mm_dev { - struct miscdevice miscdev; - uint8_t fail_times; - uint32_t used_memsize; - uint32_t num_mem_ctx; - uint32_t config_memsize; - uint32_t num_bigblock; - struct dom0_mm_data *mm_data[NUM_MEM_CTX]; - struct mutex data_lock; -}; - -struct dom0_mm_data{ - uint32_t refcnt; - uint32_t num_memseg; /**< Number of memory segment. */ - uint32_t mem_size; /**< Size of requesting memory. */ - - char name[DOM0_NAME_MAX]; - - /** Store global memory block IDs used by an instance */ - uint32_t block_num[DOM0_NUM_MEMBLOCK]; - - /** Store memory block information.*/ - struct memblock_info block_info[DOM0_NUM_MEMBLOCK]; - - /** Store memory segment information.*/ - struct memseg_info seg_info[DOM0_NUM_MEMSEG]; -}; - -#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args) -#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args) -#endif diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c deleted file mode 100644 index 79630bad..00000000 --- a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c +++ /dev/null @@ -1,780 +0,0 @@ -/*- - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include <linux/module.h> -#include <linux/miscdevice.h> -#include <linux/fs.h> -#include <linux/device.h> -#include <linux/errno.h> -#include <linux/vmalloc.h> -#include <linux/mm.h> -#include <linux/version.h> - -#include <xen/xen.h> -#include <xen/page.h> -#include <xen/xen-ops.h> -#include <xen/interface/memory.h> - -#include <exec-env/rte_dom0_common.h> - -#include "compat.h" -#include "dom0_mm_dev.h" - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0"); - -static struct dom0_mm_dev dom0_dev; -static struct kobject *dom0_kobj = NULL; - -static struct memblock_info *rsv_mm_info; - -/* Default configuration for reserved memory size(2048 MB). */ -static uint32_t rsv_memsize = 2048; - -static int dom0_open(struct inode *inode, struct file *file); -static int dom0_release(struct inode *inode, struct file *file); -static int dom0_ioctl(struct file *file, unsigned int ioctl_num, - unsigned long ioctl_param); -static int dom0_mmap(struct file *file, struct vm_area_struct *vma); -static int dom0_memory_free(uint32_t size); -static int dom0_memory_release(struct dom0_mm_data *mm_data); - -static const struct file_operations data_fops = { - .owner = THIS_MODULE, - .open = dom0_open, - .release = dom0_release, - .mmap = dom0_mmap, - .unlocked_ioctl = (void *)dom0_ioctl, -}; - -static ssize_t -show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf) -{ - return snprintf(buf, 10, "%u\n", dom0_dev.used_memsize); -} - -static ssize_t -show_memsize(struct device *dev, struct device_attribute *attr, char *buf) -{ - return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize); -} - -static ssize_t -store_memsize(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - int err = 0; - unsigned long mem_size; - - if (0 != kstrtoul(buf, 0, &mem_size)) - return -EINVAL; - - mutex_lock(&dom0_dev.data_lock); - if (0 == mem_size) { - err = -EINVAL; - goto fail; - } else if (mem_size > (rsv_memsize - dom0_dev.used_memsize)) { - XEN_ERR("configure memory size fail\n"); - err = -EINVAL; - goto fail; - } else - dom0_dev.config_memsize = mem_size; - -fail: - mutex_unlock(&dom0_dev.data_lock); - return err ? err : count; -} - -static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize); -static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL); - -static struct attribute *dev_attrs[] = { - &dev_attr_memsize.attr, - &dev_attr_memsize_rsvd.attr, - NULL, -}; - -/* the memory size unit is MB */ -static const struct attribute_group dev_attr_grp = { - .name = "memsize-mB", - .attrs = dev_attrs, -}; - - -static void -sort_viraddr(struct memblock_info *mb, int cnt) -{ - int i,j; - uint64_t tmp_pfn; - uint64_t tmp_viraddr; - - /*sort virtual address and pfn */ - for(i = 0; i < cnt; i ++) { - for(j = cnt - 1; j > i; j--) { - if(mb[j].pfn < mb[j - 1].pfn) { - tmp_pfn = mb[j - 1].pfn; - mb[j - 1].pfn = mb[j].pfn; - mb[j].pfn = tmp_pfn; - - tmp_viraddr = mb[j - 1].vir_addr; - mb[j - 1].vir_addr = mb[j].vir_addr; - mb[j].vir_addr = tmp_viraddr; - } - } - } -} - -static int -dom0_find_memdata(const char * mem_name) -{ - unsigned i; - int idx = -1; - for(i = 0; i< NUM_MEM_CTX; i++) { - if(dom0_dev.mm_data[i] == NULL) - continue; - if (!strncmp(dom0_dev.mm_data[i]->name, mem_name, - sizeof(char) * DOM0_NAME_MAX)) { - idx = i; - break; - } - } - - return idx; -} - -static int -dom0_find_mempos(void) -{ - unsigned i; - int idx = -1; - - for(i = 0; i< NUM_MEM_CTX; i++) { - if(dom0_dev.mm_data[i] == NULL){ - idx = i; - break; - } - } - - return idx; -} - -static int -dom0_memory_release(struct dom0_mm_data *mm_data) -{ - int idx; - uint32_t num_block, block_id; - - /* each memory block is 2M */ - num_block = mm_data->mem_size / SIZE_PER_BLOCK; - if (num_block == 0) - return -EINVAL; - - /* reset global memory data */ - idx = dom0_find_memdata(mm_data->name); - if (idx >= 0) { - dom0_dev.used_memsize -= mm_data->mem_size; - dom0_dev.mm_data[idx] = NULL; - dom0_dev.num_mem_ctx--; - } - - /* reset these memory blocks status as free */ - for (idx = 0; idx < num_block; idx++) { - block_id = mm_data->block_num[idx]; - rsv_mm_info[block_id].used = 0; - } - - memset(mm_data, 0, sizeof(struct dom0_mm_data)); - vfree(mm_data); - return 0; -} - -static int -dom0_memory_free(uint32_t rsv_size) -{ - uint64_t vstart, vaddr; - uint32_t i, num_block, size; - - if (!xen_pv_domain()) - return -1; - - /* each memory block is 2M */ - num_block = rsv_size / SIZE_PER_BLOCK; - if (num_block == 0) - return -EINVAL; - - /* free all memory blocks of size of 4M and destroy contiguous region */ - for (i = 0; i < dom0_dev.num_bigblock * 2; i += 2) { - vstart = rsv_mm_info[i].vir_addr; - if (vstart) { - #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) - if (rsv_mm_info[i].exchange_flag) - xen_destroy_contiguous_region(vstart, - DOM0_CONTIG_NUM_ORDER); - if (rsv_mm_info[i + 1].exchange_flag) - xen_destroy_contiguous_region(vstart + - DOM0_MEMBLOCK_SIZE, - DOM0_CONTIG_NUM_ORDER); - #else - if (rsv_mm_info[i].exchange_flag) - xen_destroy_contiguous_region(rsv_mm_info[i].pfn - * PAGE_SIZE, - DOM0_CONTIG_NUM_ORDER); - if (rsv_mm_info[i + 1].exchange_flag) - xen_destroy_contiguous_region(rsv_mm_info[i].pfn - * PAGE_SIZE + DOM0_MEMBLOCK_SIZE, - DOM0_CONTIG_NUM_ORDER); - #endif - - size = DOM0_MEMBLOCK_SIZE * 2; - vaddr = vstart; - while (size > 0) { - ClearPageReserved(virt_to_page(vaddr)); - vaddr += PAGE_SIZE; - size -= PAGE_SIZE; - } - free_pages(vstart, MAX_NUM_ORDER); - } - } - - /* free all memory blocks size of 2M and destroy contiguous region */ - for (; i < num_block; i++) { - vstart = rsv_mm_info[i].vir_addr; - if (vstart) { - if (rsv_mm_info[i].exchange_flag) - xen_destroy_contiguous_region(vstart, - DOM0_CONTIG_NUM_ORDER); - - size = DOM0_MEMBLOCK_SIZE; - vaddr = vstart; - while (size > 0) { - ClearPageReserved(virt_to_page(vaddr)); - vaddr += PAGE_SIZE; - size -= PAGE_SIZE; - } - free_pages(vstart, DOM0_CONTIG_NUM_ORDER); - } - } - - memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block); - vfree(rsv_mm_info); - rsv_mm_info = NULL; - - return 0; -} - -static void -find_free_memory(uint32_t count, struct dom0_mm_data *mm_data) -{ - uint32_t i = 0; - uint32_t j = 0; - - while ((i < count) && (j < rsv_memsize / SIZE_PER_BLOCK)) { - if (rsv_mm_info[j].used == 0) { - mm_data->block_info[i].pfn = rsv_mm_info[j].pfn; - mm_data->block_info[i].vir_addr = - rsv_mm_info[j].vir_addr; - mm_data->block_info[i].mfn = rsv_mm_info[j].mfn; - mm_data->block_info[i].exchange_flag = - rsv_mm_info[j].exchange_flag; - mm_data->block_num[i] = j; - rsv_mm_info[j].used = 1; - i++; - } - j++; - } -} - -/** - * Find all memory segments in which physical addresses are contiguous. - */ -static void -find_memseg(int count, struct dom0_mm_data * mm_data) -{ - int i = 0; - int j, k, idx = 0; - uint64_t zone_len, pfn, num_block; - - while(i < count) { - if (mm_data->block_info[i].exchange_flag == 0) { - i++; - continue; - } - k = 0; - pfn = mm_data->block_info[i].pfn; - mm_data->seg_info[idx].pfn = pfn; - mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn; - - for (j = i + 1; j < count; j++) { - - /* ignore exchange fail memory block */ - if (mm_data->block_info[j].exchange_flag == 0) - break; - - if (mm_data->block_info[j].pfn != - (mm_data->block_info[j - 1].pfn + - DOM0_MEMBLOCK_SIZE / PAGE_SIZE)) - break; - ++k; - mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn; - } - - num_block = j - i; - zone_len = num_block * DOM0_MEMBLOCK_SIZE; - mm_data->seg_info[idx].size = zone_len; - - XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len); - i = i+ num_block; - idx++; - if (idx == DOM0_NUM_MEMSEG) - break; - } - mm_data->num_memseg = idx; -} - -static int -dom0_memory_reserve(uint32_t rsv_size) -{ - uint64_t pfn, vstart, vaddr; - uint32_t i, num_block, size, allocated_size = 0; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) - dma_addr_t dma_handle; -#endif - - /* 2M as memory block */ - num_block = rsv_size / SIZE_PER_BLOCK; - - rsv_mm_info = vmalloc(sizeof(struct memblock_info) * num_block); - if (!rsv_mm_info) { - XEN_ERR("Unable to allocate device memory information\n"); - return -ENOMEM; - } - memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block); - - /* try alloc size of 4M once */ - for (i = 0; i < num_block; i += 2) { - vstart = (unsigned long) - __get_free_pages(GFP_ATOMIC, MAX_NUM_ORDER); - if (vstart == 0) - break; - - dom0_dev.num_bigblock = i / 2 + 1; - allocated_size = SIZE_PER_BLOCK * (i + 2); - - /* size of 4M */ - size = DOM0_MEMBLOCK_SIZE * 2; - - vaddr = vstart; - while (size > 0) { - SetPageReserved(virt_to_page(vaddr)); - vaddr += PAGE_SIZE; - size -= PAGE_SIZE; - } - - pfn = virt_to_pfn(vstart); - rsv_mm_info[i].pfn = pfn; - rsv_mm_info[i].vir_addr = vstart; - rsv_mm_info[i + 1].pfn = - pfn + DOM0_MEMBLOCK_SIZE / PAGE_SIZE; - rsv_mm_info[i + 1].vir_addr = - vstart + DOM0_MEMBLOCK_SIZE; - } - - /*if it failed to alloc 4M, and continue to alloc 2M once */ - for (; i < num_block; i++) { - vstart = (unsigned long) - __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER); - if (vstart == 0) { - XEN_ERR("allocate memory fail.\n"); - dom0_memory_free(allocated_size); - return -ENOMEM; - } - - allocated_size += SIZE_PER_BLOCK; - - size = DOM0_MEMBLOCK_SIZE; - vaddr = vstart; - while (size > 0) { - SetPageReserved(virt_to_page(vaddr)); - vaddr += PAGE_SIZE; - size -= PAGE_SIZE; - } - pfn = virt_to_pfn(vstart); - rsv_mm_info[i].pfn = pfn; - rsv_mm_info[i].vir_addr = vstart; - } - - sort_viraddr(rsv_mm_info, num_block); - - for (i = 0; i< num_block; i++) { - - /* - * This API is used to exchage MFN for getting a block of - * contiguous physical addresses, its maximum size is 2M. - */ - #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) - if (xen_create_contiguous_region(rsv_mm_info[i].vir_addr, - DOM0_CONTIG_NUM_ORDER, 0) == 0) { - #else - if (xen_create_contiguous_region(rsv_mm_info[i].pfn * PAGE_SIZE, - DOM0_CONTIG_NUM_ORDER, 0, &dma_handle) == 0) { - #endif - rsv_mm_info[i].exchange_flag = 1; - rsv_mm_info[i].mfn = - pfn_to_mfn(rsv_mm_info[i].pfn); - rsv_mm_info[i].used = 0; - } else { - XEN_ERR("exchange memeory fail\n"); - rsv_mm_info[i].exchange_flag = 0; - dom0_dev.fail_times++; - if (dom0_dev.fail_times > MAX_EXCHANGE_FAIL_TIME) { - dom0_memory_free(rsv_size); - return -EFAULT; - } - } - } - - return 0; -} - -static int -dom0_prepare_memsegs(struct memory_info *meminfo, struct dom0_mm_data *mm_data) -{ - uint32_t num_block; - int idx; - - /* check if there is a free name buffer */ - memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX); - mm_data->name[DOM0_NAME_MAX - 1] = '\0'; - idx = dom0_find_mempos(); - if (idx < 0) - return -1; - - num_block = meminfo->size / SIZE_PER_BLOCK; - /* find free memory and new memory segments*/ - find_free_memory(num_block, mm_data); - find_memseg(num_block, mm_data); - - /* update private memory data */ - mm_data->refcnt++; - mm_data->mem_size = meminfo->size; - - /* update global memory data */ - dom0_dev.mm_data[idx] = mm_data; - dom0_dev.num_mem_ctx++; - dom0_dev.used_memsize += mm_data->mem_size; - - return 0; -} - -static int -dom0_check_memory (struct memory_info *meminfo) -{ - int idx; - uint64_t mem_size; - - /* round memory size to the next even number. */ - if (meminfo->size % 2) - ++meminfo->size; - - mem_size = meminfo->size; - if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) { - XEN_ERR("Memory data space is full in Dom0 driver\n"); - return -1; - } - idx = dom0_find_memdata(meminfo->name); - if (idx >= 0) { - XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n", - meminfo->name); - return -1; - } - if ((dom0_dev.used_memsize + mem_size) > rsv_memsize) { - XEN_ERR("Total size can't be larger than reserved size.\n"); - return -1; - } - - return 0; -} - -static int __init -dom0_init(void) -{ - if (!xen_domain()) - return -ENODEV; - - if (rsv_memsize > DOM0_CONFIG_MEMSIZE) { - XEN_ERR("The reserved memory size cannot be greater than %d\n", - DOM0_CONFIG_MEMSIZE); - return -EINVAL; - } - - /* Setup the misc device */ - dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR; - dom0_dev.miscdev.name = "dom0_mm"; - dom0_dev.miscdev.fops = &data_fops; - - /* register misc char device */ - if (misc_register(&dom0_dev.miscdev) != 0) { - XEN_ERR("Misc device registration failed\n"); - return -EPERM; - } - - mutex_init(&dom0_dev.data_lock); - dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj); - - if (!dom0_kobj) { - XEN_ERR("dom0-mm object creation failed\n"); - misc_deregister(&dom0_dev.miscdev); - return -ENOMEM; - } - - if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) { - kobject_put(dom0_kobj); - misc_deregister(&dom0_dev.miscdev); - return -EPERM; - } - - if (dom0_memory_reserve(rsv_memsize) < 0) { - sysfs_remove_group(dom0_kobj, &dev_attr_grp); - kobject_put(dom0_kobj); - misc_deregister(&dom0_dev.miscdev); - return -ENOMEM; - } - - XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n"); - - return 0; -} - -static void __exit -dom0_exit(void) -{ - if (rsv_mm_info != NULL) - dom0_memory_free(rsv_memsize); - - sysfs_remove_group(dom0_kobj, &dev_attr_grp); - kobject_put(dom0_kobj); - misc_deregister(&dom0_dev.miscdev); - - XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n"); -} - -static int -dom0_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - - XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n"); - return 0; -} - -static int -dom0_release(struct inode *inode, struct file *file) -{ - int ret = 0; - struct dom0_mm_data *mm_data = file->private_data; - - if (mm_data == NULL) - return ret; - - mutex_lock(&dom0_dev.data_lock); - if (--mm_data->refcnt == 0) - ret = dom0_memory_release(mm_data); - mutex_unlock(&dom0_dev.data_lock); - - file->private_data = NULL; - XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n"); - return ret; -} - -static int -dom0_mmap(struct file *file, struct vm_area_struct *vm) -{ - int status = 0; - uint32_t idx = vm->vm_pgoff; - uint64_t pfn, size = vm->vm_end - vm->vm_start; - struct dom0_mm_data *mm_data = file->private_data; - - if(mm_data == NULL) - return -EINVAL; - - mutex_lock(&dom0_dev.data_lock); - if (idx >= mm_data->num_memseg) { - mutex_unlock(&dom0_dev.data_lock); - return -EINVAL; - } - - if (size > mm_data->seg_info[idx].size){ - mutex_unlock(&dom0_dev.data_lock); - return -EINVAL; - } - - XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size); - - pfn = mm_data->seg_info[idx].pfn; - mutex_unlock(&dom0_dev.data_lock); - - status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED); - - return status; -} -static int -dom0_ioctl(struct file *file, - unsigned int ioctl_num, - unsigned long ioctl_param) -{ - int idx, ret; - char name[DOM0_NAME_MAX] = {0}; - struct memory_info meminfo; - struct dom0_mm_data *mm_data = file->private_data; - - XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param); - - /** - * Switch according to the ioctl called - */ - switch _IOC_NR(ioctl_num) { - case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG): - ret = copy_from_user(&meminfo, (void *)ioctl_param, - sizeof(struct memory_info)); - if (ret) - return -EFAULT; - - if (mm_data != NULL) { - XEN_ERR("Cannot create memory segment for the same" - " file descriptor\n"); - return -EINVAL; - } - - /* Allocate private data */ - mm_data = vmalloc(sizeof(struct dom0_mm_data)); - if (!mm_data) { - XEN_ERR("Unable to allocate device private data\n"); - return -ENOMEM; - } - memset(mm_data, 0, sizeof(struct dom0_mm_data)); - - mutex_lock(&dom0_dev.data_lock); - /* check if we can allocate memory*/ - if (dom0_check_memory(&meminfo) < 0) { - mutex_unlock(&dom0_dev.data_lock); - vfree(mm_data); - return -EINVAL; - } - - /* allocate memory and created memory segments*/ - if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) { - XEN_ERR("create memory segment fail.\n"); - mutex_unlock(&dom0_dev.data_lock); - return -EIO; - } - - file->private_data = mm_data; - mutex_unlock(&dom0_dev.data_lock); - break; - - /* support multiple process in term of memory mapping*/ - case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG): - ret = copy_from_user(name, (void *)ioctl_param, - sizeof(char) * DOM0_NAME_MAX); - if (ret) - return -EFAULT; - - mutex_lock(&dom0_dev.data_lock); - idx = dom0_find_memdata(name); - if (idx < 0) { - mutex_unlock(&dom0_dev.data_lock); - return -EINVAL; - } - - mm_data = dom0_dev.mm_data[idx]; - mm_data->refcnt++; - file->private_data = mm_data; - mutex_unlock(&dom0_dev.data_lock); - break; - - case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG): - ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg, - sizeof(int)); - if (ret) - return -EFAULT; - break; - - case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO): - ret = copy_to_user((void *)ioctl_param, - &mm_data->seg_info[0], - sizeof(struct memseg_info) * - mm_data->num_memseg); - if (ret) - return -EFAULT; - break; - default: - XEN_PRINT("IOCTL default \n"); - break; - } - - return 0; -} - -module_init(dom0_init); -module_exit(dom0_exit); - -module_param(rsv_memsize, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(rsv_memsize, "Xen-dom0 reserved memory size(MB).\n"); diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 3a8f1540..f4f46c1b 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -44,8 +44,6 @@ DPDK_2.0 { rte_free; rte_get_hpet_cycles; rte_get_hpet_hz; - rte_get_log_level; - rte_get_log_type; rte_get_tsc_hz; rte_hexdump; rte_intr_callback_register; @@ -62,9 +60,7 @@ DPDK_2.0 { rte_malloc_set_limit; rte_malloc_socket; rte_malloc_validate; - rte_malloc_virt2phy; rte_mem_lock_page; - rte_mem_phy2mch; rte_mem_virt2phy; rte_memdump; rte_memory_get_nchannel; @@ -78,8 +74,6 @@ DPDK_2.0 { rte_openlog_stream; rte_realloc; rte_set_application_usage_hook; - rte_set_log_level; - rte_set_log_type; rte_socket_id; rte_strerror; rte_strsplit; @@ -87,8 +81,6 @@ DPDK_2.0 { rte_thread_get_affinity; rte_thread_set_affinity; rte_vlog; - rte_xen_dom0_memory_attach; - rte_xen_dom0_memory_init; rte_zmalloc; rte_zmalloc_socket; @@ -118,8 +110,6 @@ DPDK_2.2 { rte_keepalive_dispatch_pings; rte_keepalive_mark_alive; rte_keepalive_register_core; - rte_xen_dom0_supported; - rte_xen_mem_phy2mch; } DPDK_2.1; @@ -134,7 +124,6 @@ DPDK_16.04 { DPDK_16.07 { global: - pci_get_sysfs_path; rte_keepalive_mark_sleep; rte_keepalive_register_relay_callback; rte_rtm_supported; @@ -174,25 +163,6 @@ DPDK_17.05 { rte_log_set_global_level; rte_log_set_level; rte_log_set_level_regexp; - rte_pci_detach; - rte_pci_dump; - rte_pci_ioport_map; - rte_pci_ioport_read; - rte_pci_ioport_unmap; - rte_pci_ioport_write; - rte_pci_map_device; - rte_pci_probe; - rte_pci_probe_one; - rte_pci_read_config; - rte_pci_register; - rte_pci_scan; - rte_pci_unmap_device; - rte_pci_unregister; - rte_pci_write_config; - rte_vdev_init; - rte_vdev_register; - rte_vdev_uninit; - rte_vdev_unregister; vfio_get_container_fd; vfio_get_group_fd; vfio_get_group_no; @@ -209,6 +179,27 @@ DPDK_17.08 { } DPDK_17.05; +DPDK_17.11 { + global: + + rte_eal_create_uio_dev; + rte_bus_get_iommu_class; + rte_eal_has_pci; + rte_eal_iova_mode; + rte_eal_mbuf_default_mempool_ops; + rte_eal_using_phys_addrs; + rte_eal_vfio_intr_mode; + rte_lcore_has_role; + rte_malloc_virt2iova; + rte_mem_virt2iova; + rte_vfio_enable; + rte_vfio_is_enabled; + rte_vfio_noiommu_is_enabled; + rte_vfio_release_device; + rte_vfio_setup_device; + +} DPDK_17.08; + EXPERIMENTAL { global: @@ -217,28 +208,31 @@ EXPERIMENTAL { rte_eal_devargs_remove; rte_eal_hotplug_add; rte_eal_hotplug_remove; - rte_service_disable_on_lcore; + rte_service_component_register; + rte_service_component_unregister; + rte_service_component_runstate_set; rte_service_dump; - rte_service_enable_on_lcore; rte_service_get_by_id; rte_service_get_by_name; rte_service_get_count; - rte_service_get_enabled_on_lcore; - rte_service_is_running; + rte_service_get_name; rte_service_lcore_add; rte_service_lcore_count; + rte_service_lcore_count_services; rte_service_lcore_del; rte_service_lcore_list; rte_service_lcore_reset_all; rte_service_lcore_start; rte_service_lcore_stop; + rte_service_map_lcore_get; + rte_service_map_lcore_set; rte_service_probe_capability; - rte_service_register; rte_service_reset; + rte_service_run_iter_on_app_lcore; + rte_service_runstate_get; + rte_service_runstate_set; + rte_service_set_runstate_mapped_check; rte_service_set_stats_enable; - rte_service_start; rte_service_start_with_defaults; - rte_service_stop; - rte_service_unregister; -} DPDK_17.08; +} DPDK_17.11; |