From 055c52583a2794da8ba1e85a48cce3832372b12f Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Wed, 8 Nov 2017 14:15:11 +0000 Subject: New upstream version 17.11-rc3 Change-Id: I6a5baa40612fe0c20f30b5fa773a6cbbac63a685 Signed-off-by: Luca Boccassi --- lib/librte_eal/linuxapp/eal/Makefile | 20 +- lib/librte_eal/linuxapp/eal/eal.c | 117 ++-- lib/librte_eal/linuxapp/eal/eal_alarm.c | 1 - lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 1 - lib/librte_eal/linuxapp/eal/eal_interrupts.c | 21 +- lib/librte_eal/linuxapp/eal/eal_log.c | 1 - lib/librte_eal/linuxapp/eal/eal_memory.c | 99 +-- lib/librte_eal/linuxapp/eal/eal_pci.c | 722 --------------------- lib/librte_eal/linuxapp/eal/eal_pci_init.h | 97 --- lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 567 ---------------- lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 674 ------------------- lib/librte_eal/linuxapp/eal/eal_thread.c | 1 - lib/librte_eal/linuxapp/eal/eal_timer.c | 1 - lib/librte_eal/linuxapp/eal/eal_vfio.c | 75 ++- lib/librte_eal/linuxapp/eal/eal_vfio.h | 49 +- lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c | 7 +- lib/librte_eal/linuxapp/eal/eal_xen_memory.c | 381 ----------- .../eal/include/exec-env/rte_dom0_common.h | 108 --- .../linuxapp/eal/include/exec-env/rte_interrupts.h | 239 ------- lib/librte_eal/linuxapp/eal/rte_eal_version.map | 244 ------- 20 files changed, 189 insertions(+), 3236 deletions(-) delete mode 100644 lib/librte_eal/linuxapp/eal/eal_pci.c delete mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_init.h delete mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_uio.c delete mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c delete mode 100644 lib/librte_eal/linuxapp/eal/eal_xen_memory.c delete mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h delete mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h delete mode 100644 lib/librte_eal/linuxapp/eal/rte_eal_version.map (limited to 'lib/librte_eal/linuxapp/eal') diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 90bca4d6..5a7b8b2a 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -34,10 +34,10 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_eal.a ARCH_DIR ?= $(RTE_ARCH) -EXPORT_MAP := rte_eal_version.map +EXPORT_MAP := ../../rte_eal_version.map VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) -LIBABIVER := 5 +LIBABIVER := 6 VPATH += $(RTE_SDK)/lib/librte_eal/common @@ -58,16 +58,10 @@ endif SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c -ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y) -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c -endif SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c @@ -80,9 +74,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_vdev.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c -SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c @@ -104,6 +95,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c +SRCS-y += rte_cycles.c CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST) @@ -116,13 +108,11 @@ CFLAGS_eal_thread.o := -D_GNU_SOURCE CFLAGS_eal_log.o := -D_GNU_SOURCE CFLAGS_eal_common_log.o := -D_GNU_SOURCE CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE -CFLAGS_eal_pci.o := -D_GNU_SOURCE -CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE -CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE CFLAGS_eal_common_options.o := -D_GNU_SOURCE CFLAGS_eal_common_thread.o := -D_GNU_SOURCE CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE +CFLAGS_rte_cycles.o := -D_GNU_SOURCE # workaround for a gcc bug with noreturn attribute # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 @@ -130,7 +120,7 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) CFLAGS_eal_thread.o += -Wno-return-type endif -INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h +INC := rte_kni_common.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 48f12f44..229eec9f 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include @@ -71,12 +70,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include "eal_private.h" #include "eal_thread.h" @@ -121,6 +120,13 @@ struct internal_config internal_config; /* used by rte_rdtsc() */ int rte_cycles_vmware_tsc_map; +/* Return mbuf pool ops name */ +const char * +rte_eal_mbuf_default_mempool_ops(void) +{ + return internal_config.mbuf_pool_ops_name; +} + /* Return a pointer to the configuration structure */ struct rte_config * rte_eal_get_configuration(void) @@ -128,6 +134,12 @@ rte_eal_get_configuration(void) return &rte_config; } +enum rte_iova_mode +rte_eal_iova_mode(void) +{ + return rte_eal_get_configuration()->iova_mode; +} + /* parse a sysfs (or other) file containing one integer value */ int eal_parse_sysfs_value(const char *filename, unsigned long *val) @@ -354,7 +366,6 @@ eal_usage(const char *prgname) " --"OPT_BASE_VIRTADDR" Base virtual address\n" " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" - " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n" "\n"); /* Allow the application to print its usage message too if hook is set */ if ( rte_application_usage_hook ) { @@ -555,25 +566,12 @@ eal_parse_args(int argc, char **argv) eal_usage(prgname); exit(EXIT_SUCCESS); - /* long options */ - case OPT_XEN_DOM0_NUM: -#ifdef RTE_LIBRTE_XEN_DOM0 - internal_config.xen_dom0_support = 1; -#else - RTE_LOG(ERR, EAL, "Can't support DPDK app " - "running on Dom0, please configure" - " RTE_LIBRTE_XEN_DOM0=y\n"); - ret = -1; - goto out; -#endif - break; - case OPT_HUGE_DIR_NUM: - internal_config.hugepage_dir = optarg; + internal_config.hugepage_dir = strdup(optarg); break; case OPT_FILE_PREFIX_NUM: - internal_config.hugefile_prefix = optarg; + internal_config.hugefile_prefix = strdup(optarg); break; case OPT_SOCKET_MEM_NUM: @@ -610,6 +608,10 @@ eal_parse_args(int argc, char **argv) internal_config.create_uio_dev = 1; break; + case OPT_MBUF_POOL_OPS_NAME_NUM: + internal_config.mbuf_pool_ops_name = optarg; + break; + default: if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { RTE_LOG(ERR, EAL, "Option %c is not supported " @@ -641,15 +643,6 @@ eal_parse_args(int argc, char **argv) goto out; } - /* --xen-dom0 doesn't make sense with --socket-mem */ - if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) { - RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified " - "together with --"OPT_XEN_DOM0"\n"); - eal_usage(prgname); - ret = -1; - goto out; - } - if (optind >= 0) argv[optind-1] = prgname; ret = optind-1; @@ -716,10 +709,9 @@ static int rte_eal_vfio_setup(void) { int vfio_enabled = 0; - if (!internal_config.no_pci) { - pci_vfio_enable(); - vfio_enabled |= pci_vfio_is_enabled(); - } + if (rte_vfio_enable("vfio")) + return -1; + vfio_enabled = rte_vfio_is_enabled("vfio"); if (vfio_enabled) { @@ -792,9 +784,40 @@ rte_eal_init(int argc, char **argv) return -1; } + if (eal_plugins_init() < 0) { + rte_eal_init_alert("Cannot init plugins\n"); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } + + if (eal_option_device_parse()) { + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + /* autodetect the iova mapping mode (default is iova_pa) */ + rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class(); + + /* Workaround for KNI which requires physical address to work */ + if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA && + rte_eal_check_module("rte_kni") == 1) { + rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA; + RTE_LOG(WARNING, EAL, + "Some devices want IOVA as VA but PA will be used because.. " + "KNI module inserted\n"); + } + if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && - internal_config.xen_dom0_support == 0 && eal_hugepage_info_init() < 0) { rte_eal_init_alert("Cannot get hugepage information."); rte_errno = EACCES; @@ -873,9 +896,6 @@ rte_eal_init(int argc, char **argv) eal_check_mem_on_local_socket(); - if (eal_plugins_init() < 0) - rte_eal_init_alert("Cannot init plugins\n"); - eal_thread_init_master(rte_config.master_lcore); ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); @@ -889,17 +909,6 @@ rte_eal_init(int argc, char **argv) return -1; } - if (eal_option_device_parse()) { - rte_errno = ENODEV; - return -1; - } - - if (rte_bus_scan()) { - rte_eal_init_alert("Cannot scan the buses for devices\n"); - rte_errno = ENODEV; - return -1; - } - RTE_LCORE_FOREACH_SLAVE(i) { /* @@ -983,6 +992,22 @@ int rte_eal_has_hugepages(void) return ! internal_config.no_hugetlbfs; } +int rte_eal_has_pci(void) +{ + return !internal_config.no_pci; +} + +int rte_eal_create_uio_dev(void) +{ + return internal_config.create_uio_dev; +} + +enum rte_intr_mode +rte_eal_vfio_intr_mode(void) +{ + return internal_config.vfio_intr_mode; +} + int rte_eal_check_module(const char *module_name) { diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c index fbae4613..8e4a775b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -40,7 +40,6 @@ #include #include -#include #include #include #include diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 7a21e8f6..86e174fc 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -46,7 +46,6 @@ #include #include -#include #include #include #include diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 3e9ac41e..1c20693d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include @@ -60,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -914,7 +912,7 @@ static void eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) { union rte_intr_read_buffer buf; - int bytes_read = 1; + int bytes_read = 0; int nbytes; switch (intr_handle->type) { @@ -930,11 +928,9 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) break; #endif case RTE_INTR_HANDLE_VDEV: - /* for vdev, fd points to: - * a. eventfd which does not need to read out; - * b. datapath fd which needs PMD to read out. - */ - return; + bytes_read = intr_handle->efd_counter_size; + /* For vdev, number of bytes to read is set by driver */ + break; case RTE_INTR_HANDLE_EXT: return; default: @@ -947,6 +943,8 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) * read out to clear the ready-to-be-read flag * for epoll_wait. */ + if (bytes_read == 0) + return; do { nbytes = read(fd, &buf, bytes_read); if (nbytes < 0) { @@ -1206,7 +1204,12 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) intr_handle->nb_efd = n; intr_handle->max_intr = NB_OTHER_INTR + n; } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) { - /* do nothing, and let vdev driver to initialize this struct */ + /* only check, initialization would be done in vdev driver.*/ + if (intr_handle->efd_counter_size > + sizeof(union rte_intr_read_buffer)) { + RTE_LOG(ERR, EAL, "the efd_counter_size is oversized"); + return -EINVAL; + } } else { intr_handle->efds[0] = intr_handle->fd; intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c index e3a50aa3..c088bd9b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_log.c +++ b/lib/librte_eal/linuxapp/eal/eal_log.c @@ -39,7 +39,6 @@ #include #include -#include #include #include #include diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 52791282..a54b822a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -59,7 +59,6 @@ #include #include -#include #include #include #include @@ -75,13 +74,6 @@ #define PFN_MASK_SIZE 8 -#ifdef RTE_LIBRTE_XEN_DOM0 -int rte_xen_dom0_supported(void) -{ - return internal_config.xen_dom0_support; -} -#endif - /** * @file * Huge page mapping under linux @@ -106,10 +98,6 @@ test_phys_addrs_available(void) uint64_t tmp; phys_addr_t physaddr; - /* For dom0, phys addresses can always be available */ - if (rte_xen_dom0_supported()) - return; - if (!rte_eal_has_hugepages()) { RTE_LOG(ERR, EAL, "Started without hugepages support, physical addresses not available\n"); @@ -119,10 +107,11 @@ test_phys_addrs_available(void) physaddr = rte_mem_virt2phy(&tmp); if (physaddr == RTE_BAD_PHYS_ADDR) { - RTE_LOG(ERR, EAL, - "Cannot obtain physical addresses: %s. " - "Only vfio will function.\n", - strerror(errno)); + if (rte_eal_iova_mode() == RTE_IOVA_PA) + RTE_LOG(ERR, EAL, + "Cannot obtain physical addresses: %s. " + "Only vfio will function.\n", + strerror(errno)); phys_addrs_available = false; } } @@ -139,32 +128,9 @@ rte_mem_virt2phy(const void *virtaddr) int page_size; off_t offset; - /* when using dom0, /proc/self/pagemap always returns 0, check in - * dpdk memory by browsing the memsegs */ - if (rte_xen_dom0_supported()) { - struct rte_mem_config *mcfg; - struct rte_memseg *memseg; - unsigned i; - - mcfg = rte_eal_get_configuration()->mem_config; - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - memseg = &mcfg->memseg[i]; - if (memseg->addr == NULL) - break; - if (virtaddr > memseg->addr && - virtaddr < RTE_PTR_ADD(memseg->addr, - memseg->len)) { - return memseg->phys_addr + - RTE_PTR_DIFF(virtaddr, memseg->addr); - } - } - - return RTE_BAD_PHYS_ADDR; - } - /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ if (!phys_addrs_available) - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; /* standard page size */ page_size = getpagesize(); @@ -173,7 +139,7 @@ rte_mem_virt2phy(const void *virtaddr) if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", __func__, strerror(errno)); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } virt_pfn = (unsigned long)virtaddr / page_size; @@ -182,7 +148,7 @@ rte_mem_virt2phy(const void *virtaddr) RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", __func__, strerror(errno)); close(fd); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } retval = read(fd, &page, PFN_MASK_SIZE); @@ -190,12 +156,12 @@ rte_mem_virt2phy(const void *virtaddr) if (retval < 0) { RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", __func__, strerror(errno)); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } else if (retval != PFN_MASK_SIZE) { RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap " "but expected %d:\n", __func__, retval, PFN_MASK_SIZE); - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; } /* @@ -203,7 +169,7 @@ rte_mem_virt2phy(const void *virtaddr) * pagemap.txt in linux Documentation) */ if ((page & 0x7fffffffffffffULL) == 0) - return RTE_BAD_PHYS_ADDR; + return RTE_BAD_IOVA; physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size); @@ -211,6 +177,14 @@ rte_mem_virt2phy(const void *virtaddr) return physaddr; } +rte_iova_t +rte_mem_virt2iova(const void *virtaddr) +{ + if (rte_eal_iova_mode() == RTE_IOVA_VA) + return (uintptr_t)virtaddr; + return rte_mem_virt2phy(virtaddr); +} + /* * For each hugepage in hugepg_tbl, fill the physaddr value. We find * it by browsing the /proc/self/pagemap special file. @@ -716,6 +690,8 @@ create_shared_memory(const char *filename, const size_t mem_size) } retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); + if (retval == MAP_FAILED) + return NULL; return retval; } @@ -1059,7 +1035,10 @@ rte_eal_hugepage_init(void) strerror(errno)); return -1; } - mcfg->memseg[0].phys_addr = RTE_BAD_PHYS_ADDR; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + mcfg->memseg[0].iova = (uintptr_t)addr; + else + mcfg->memseg[0].iova = RTE_BAD_IOVA; mcfg->memseg[0].addr = addr; mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; mcfg->memseg[0].len = internal_config.memory; @@ -1067,17 +1046,6 @@ rte_eal_hugepage_init(void) return 0; } -/* check if app runs on Xen Dom0 */ - if (internal_config.xen_dom0_support) { -#ifdef RTE_LIBRTE_XEN_DOM0 - /* use dom0_mm kernel driver to init memory */ - if (rte_xen_dom0_memory_init() < 0) - return -1; - else - return 0; -#endif - } - /* calculate total number of hugepages available. at this point we haven't * yet started sorting them so they all are on socket 0 */ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { @@ -1319,7 +1287,7 @@ rte_eal_hugepage_init(void) if (j == RTE_MAX_MEMSEG) break; - mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].iova = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; mcfg->memseg[j].len = hugepage[i].size; mcfg->memseg[j].socket_id = hugepage[i].socket_id; @@ -1330,7 +1298,7 @@ rte_eal_hugepage_init(void) #ifdef RTE_ARCH_PPC_64 /* Use the phy and virt address of the last page as segment * address for IBM Power architecture */ - mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].iova = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; #endif mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; @@ -1400,17 +1368,6 @@ rte_eal_hugepage_attach(void) test_phys_addrs_available(); - if (internal_config.xen_dom0_support) { -#ifdef RTE_LIBRTE_XEN_DOM0 - if (rte_xen_dom0_memory_attach() < 0) { - RTE_LOG(ERR, EAL, "Failed to attach memory segments of primary " - "process\n"); - return -1; - } - return 0; -#endif - } - fd_zero = open("/dev/zero", O_RDONLY); if (fd_zero < 0) { RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); @@ -1542,7 +1499,7 @@ error: return -1; } -bool +int rte_eal_using_phys_addrs(void) { return phys_addrs_available; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c deleted file mode 100644 index 8951ce74..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ /dev/null @@ -1,722 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "eal_filesystem.h" -#include "eal_private.h" -#include "eal_pci_init.h" - -/** - * @file - * PCI probing under linux - * - * This code is used to simulate a PCI probe by parsing information in sysfs. - * When a registered device matches a driver, it is then initialized with - * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). - */ - -extern struct rte_pci_bus rte_pci_bus; - -static int -pci_get_kernel_driver_by_path(const char *filename, char *dri_name) -{ - int count; - char path[PATH_MAX]; - char *name; - - if (!filename || !dri_name) - return -1; - - count = readlink(filename, path, PATH_MAX); - if (count >= PATH_MAX) - return -1; - - /* For device does not have a driver */ - if (count < 0) - return 1; - - path[count] = '\0'; - - name = strrchr(path, '/'); - if (name) { - strncpy(dri_name, name + 1, strlen(name + 1) + 1); - return 0; - } - - return -1; -} - -/* Map pci device */ -int -rte_pci_map_device(struct rte_pci_device *dev) -{ - int ret = -1; - - /* try mapping the NIC resources using VFIO if it exists */ - switch (dev->kdrv) { - case RTE_KDRV_VFIO: -#ifdef VFIO_PRESENT - if (pci_vfio_is_enabled()) - ret = pci_vfio_map_resource(dev); -#endif - break; - case RTE_KDRV_IGB_UIO: - case RTE_KDRV_UIO_GENERIC: - if (rte_eal_using_phys_addrs()) { - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); - } - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - ret = 1; - break; - } - - return ret; -} - -/* Unmap pci device */ -void -rte_pci_unmap_device(struct rte_pci_device *dev) -{ - /* try unmapping the NIC resources using VFIO if it exists */ - switch (dev->kdrv) { - case RTE_KDRV_VFIO: -#ifdef VFIO_PRESENT - if (pci_vfio_is_enabled()) - pci_vfio_unmap_resource(dev); -#endif - break; - case RTE_KDRV_IGB_UIO: - case RTE_KDRV_UIO_GENERIC: - /* unmap resources for devices that use uio */ - pci_uio_unmap_resource(dev); - break; - default: - RTE_LOG(DEBUG, EAL, - " Not managed by a supported kernel driver, skipped\n"); - break; - } -} - -void * -pci_find_max_end_va(void) -{ - const struct rte_memseg *seg = rte_eal_get_physmem_layout(); - const struct rte_memseg *last = seg; - unsigned i = 0; - - for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { - if (seg->addr == NULL) - break; - - if (seg->addr > last->addr) - last = seg; - - } - return RTE_PTR_ADD(last->addr, last->len); -} - -/* parse one line of the "resource" sysfs file (note that the 'line' - * string is modified) - */ -int -pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, - uint64_t *end_addr, uint64_t *flags) -{ - union pci_resource_info { - struct { - char *phys_addr; - char *end_addr; - char *flags; - }; - char *ptrs[PCI_RESOURCE_FMT_NVAL]; - } res_info; - - if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); - return -1; - } - errno = 0; - *phys_addr = strtoull(res_info.phys_addr, NULL, 16); - *end_addr = strtoull(res_info.end_addr, NULL, 16); - *flags = strtoull(res_info.flags, NULL, 16); - if (errno != 0) { - RTE_LOG(ERR, EAL, - "%s(): bad resource format\n", __func__); - return -1; - } - - return 0; -} - -/* parse the "resource" sysfs file */ -static int -pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) -{ - FILE *f; - char buf[BUFSIZ]; - int i; - uint64_t phys_addr, end_addr, flags; - - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); - return -1; - } - - for (i = 0; imem_resource[i].phys_addr = phys_addr; - dev->mem_resource[i].len = end_addr - phys_addr + 1; - /* not mapped for now */ - dev->mem_resource[i].addr = NULL; - } - } - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} - -/* Scan one pci sysfs entry, and fill the devices list from it. */ -static int -pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) -{ - char filename[PATH_MAX]; - unsigned long tmp; - struct rte_pci_device *dev; - char driver[PATH_MAX]; - int ret; - - dev = malloc(sizeof(*dev)); - if (dev == NULL) - return -1; - - memset(dev, 0, sizeof(*dev)); - dev->addr = *addr; - - /* get vendor id */ - snprintf(filename, sizeof(filename), "%s/vendor", dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.vendor_id = (uint16_t)tmp; - - /* get device id */ - snprintf(filename, sizeof(filename), "%s/device", dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.device_id = (uint16_t)tmp; - - /* get subsystem_vendor id */ - snprintf(filename, sizeof(filename), "%s/subsystem_vendor", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.subsystem_vendor_id = (uint16_t)tmp; - - /* get subsystem_device id */ - snprintf(filename, sizeof(filename), "%s/subsystem_device", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - dev->id.subsystem_device_id = (uint16_t)tmp; - - /* get class_id */ - snprintf(filename, sizeof(filename), "%s/class", - dirname); - if (eal_parse_sysfs_value(filename, &tmp) < 0) { - free(dev); - return -1; - } - /* the least 24 bits are valid: class, subclass, program interface */ - dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID; - - /* get max_vfs */ - dev->max_vfs = 0; - snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); - if (!access(filename, F_OK) && - eal_parse_sysfs_value(filename, &tmp) == 0) - dev->max_vfs = (uint16_t)tmp; - else { - /* for non igb_uio driver, need kernel version >= 3.8 */ - snprintf(filename, sizeof(filename), - "%s/sriov_numvfs", dirname); - if (!access(filename, F_OK) && - eal_parse_sysfs_value(filename, &tmp) == 0) - dev->max_vfs = (uint16_t)tmp; - } - - /* get numa node, default to 0 if not present */ - snprintf(filename, sizeof(filename), "%s/numa_node", - dirname); - - if (access(filename, F_OK) != -1) { - if (eal_parse_sysfs_value(filename, &tmp) == 0) - dev->device.numa_node = tmp; - else - dev->device.numa_node = -1; - } else { - dev->device.numa_node = 0; - } - - pci_name_set(dev); - - /* parse resources */ - snprintf(filename, sizeof(filename), "%s/resource", dirname); - if (pci_parse_sysfs_resource(filename, dev) < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); - free(dev); - return -1; - } - - /* parse driver */ - snprintf(filename, sizeof(filename), "%s/driver", dirname); - ret = pci_get_kernel_driver_by_path(filename, driver); - if (ret < 0) { - RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); - free(dev); - return -1; - } - - if (!ret) { - if (!strcmp(driver, "vfio-pci")) - dev->kdrv = RTE_KDRV_VFIO; - else if (!strcmp(driver, "igb_uio")) - dev->kdrv = RTE_KDRV_IGB_UIO; - else if (!strcmp(driver, "uio_pci_generic")) - dev->kdrv = RTE_KDRV_UIO_GENERIC; - else - dev->kdrv = RTE_KDRV_UNKNOWN; - } else - dev->kdrv = RTE_KDRV_NONE; - - /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { - rte_pci_add_device(dev); - } else { - struct rte_pci_device *dev2; - int ret; - - TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { - ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); - if (ret > 0) - continue; - - if (ret < 0) { - rte_pci_insert_device(dev2, dev); - } else { /* already registered */ - dev2->kdrv = dev->kdrv; - dev2->max_vfs = dev->max_vfs; - pci_name_set(dev2); - memmove(dev2->mem_resource, dev->mem_resource, - sizeof(dev->mem_resource)); - free(dev); - } - return 0; - } - - rte_pci_add_device(dev); - } - - return 0; -} - -int -pci_update_device(const struct rte_pci_addr *addr) -{ - char filename[PATH_MAX]; - - snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT, - pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, - addr->function); - - return pci_scan_one(filename, addr); -} - -/* - * split up a pci address into its constituent parts. - */ -static int -parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) -{ - /* first split on ':' */ - union splitaddr { - struct { - char *domain; - char *bus; - char *devid; - char *function; - }; - char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */ - } splitaddr; - - char *buf_copy = strndup(buf, bufsize); - if (buf_copy == NULL) - return -1; - - if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':') - != PCI_FMT_NVAL - 1) - goto error; - /* final split is on '.' between devid and function */ - splitaddr.function = strchr(splitaddr.devid,'.'); - if (splitaddr.function == NULL) - goto error; - *splitaddr.function++ = '\0'; - - /* now convert to int values */ - errno = 0; - addr->domain = strtoul(splitaddr.domain, NULL, 16); - addr->bus = strtoul(splitaddr.bus, NULL, 16); - addr->devid = strtoul(splitaddr.devid, NULL, 16); - addr->function = strtoul(splitaddr.function, NULL, 10); - if (errno != 0) - goto error; - - free(buf_copy); /* free the copy made with strdup */ - return 0; -error: - free(buf_copy); - return -1; -} - -/* - * Scan the content of the PCI bus, and the devices in the devices - * list - */ -int -rte_pci_scan(void) -{ - struct dirent *e; - DIR *dir; - char dirname[PATH_MAX]; - struct rte_pci_addr addr; - - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - dir = opendir(pci_get_sysfs_path()); - if (dir == NULL) { - RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", - __func__, strerror(errno)); - return -1; - } - - while ((e = readdir(dir)) != NULL) { - if (e->d_name[0] == '.') - continue; - - if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) - continue; - - snprintf(dirname, sizeof(dirname), "%s/%s", - pci_get_sysfs_path(), e->d_name); - - if (pci_scan_one(dirname, &addr) < 0) - goto error; - } - closedir(dir); - return 0; - -error: - closedir(dir); - return -1; -} - -/* Read PCI config space. */ -int rte_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &device->intr_handle; - - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: - return pci_uio_read_config(intr_handle, buf, len, offset); - -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: - return pci_vfio_read_config(intr_handle, buf, len, offset); -#endif - default: - RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); - return -1; - } -} - -/* Write PCI config space. */ -int rte_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &device->intr_handle; - - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: - return pci_uio_write_config(intr_handle, buf, len, offset); - -#ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: - return pci_vfio_write_config(intr_handle, buf, len, offset); -#endif - default: - RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); - return -1; - } -} - -#if defined(RTE_ARCH_X86) -static int -pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, - struct rte_pci_ioport *p) -{ - uint16_t start, end; - FILE *fp; - char *line = NULL; - char pci_id[16]; - int found = 0; - size_t linesz; - - snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT, - dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - - fp = fopen("/proc/ioports", "r"); - if (fp == NULL) { - RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__); - return -1; - } - - while (getdelim(&line, &linesz, '\n', fp) > 0) { - char *ptr = line; - char *left; - int n; - - n = strcspn(ptr, ":"); - ptr[n] = 0; - left = &ptr[n + 1]; - - while (*left && isspace(*left)) - left++; - - if (!strncmp(left, pci_id, strlen(pci_id))) { - found = 1; - - while (*ptr && isspace(*ptr)) - ptr++; - - sscanf(ptr, "%04hx-%04hx", &start, &end); - - break; - } - } - - free(line); - fclose(fp); - - if (!found) - return -1; - - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - p->base = start; - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start); - - return 0; -} -#endif - -int -rte_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - int ret = -1; - - switch (dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - if (pci_vfio_is_enabled()) - ret = pci_vfio_ioport_map(dev, bar, p); - break; -#endif - case RTE_KDRV_IGB_UIO: - ret = pci_uio_ioport_map(dev, bar, p); - break; - case RTE_KDRV_UIO_GENERIC: -#if defined(RTE_ARCH_X86) - ret = pci_ioport_map(dev, bar, p); -#else - ret = pci_uio_ioport_map(dev, bar, p); -#endif - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - ret = pci_ioport_map(dev, bar, p); -#endif - break; - default: - break; - } - - if (!ret) - p->dev = dev; - - return ret; -} - -void -rte_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - pci_vfio_ioport_read(p, data, len, offset); - break; -#endif - case RTE_KDRV_IGB_UIO: - pci_uio_ioport_read(p, data, len, offset); - break; - case RTE_KDRV_UIO_GENERIC: - pci_uio_ioport_read(p, data, len, offset); - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - pci_uio_ioport_read(p, data, len, offset); -#endif - break; - default: - break; - } -} - -void -rte_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - pci_vfio_ioport_write(p, data, len, offset); - break; -#endif - case RTE_KDRV_IGB_UIO: - pci_uio_ioport_write(p, data, len, offset); - break; - case RTE_KDRV_UIO_GENERIC: - pci_uio_ioport_write(p, data, len, offset); - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - pci_uio_ioport_write(p, data, len, offset); -#endif - break; - default: - break; - } -} - -int -rte_pci_ioport_unmap(struct rte_pci_ioport *p) -{ - int ret = -1; - - switch (p->dev->kdrv) { -#ifdef VFIO_PRESENT - case RTE_KDRV_VFIO: - if (pci_vfio_is_enabled()) - ret = pci_vfio_ioport_unmap(p); - break; -#endif - case RTE_KDRV_IGB_UIO: - ret = pci_uio_ioport_unmap(p); - break; - case RTE_KDRV_UIO_GENERIC: -#if defined(RTE_ARCH_X86) - ret = 0; -#else - ret = pci_uio_ioport_unmap(p); -#endif - break; - case RTE_KDRV_NONE: -#if defined(RTE_ARCH_X86) - ret = 0; -#endif - break; - default: - break; - } - - return ret; -} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h deleted file mode 100644 index ae2980d6..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ /dev/null @@ -1,97 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef EAL_PCI_INIT_H_ -#define EAL_PCI_INIT_H_ - -#include "eal_vfio.h" - -/** IO resource type: */ -#define IORESOURCE_IO 0x00000100 -#define IORESOURCE_MEM 0x00000200 - -/* - * Helper function to map PCI resources right after hugepages in virtual memory - */ -extern void *pci_map_addr; -void *pci_find_max_end_va(void); - -/* parse one line of the "resource" sysfs file (note that the 'line' - * string is modified) - */ -int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, - uint64_t *end_addr, uint64_t *flags); - -int pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res); -void pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res); -int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx); - -int pci_uio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs); -int pci_uio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs); - -int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); -void pci_uio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); -void pci_uio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); -int pci_uio_ioport_unmap(struct rte_pci_ioport *p); - -#ifdef VFIO_PRESENT - -/* access config space */ -int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs); -int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs); - -int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); -void pci_vfio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); -void pci_vfio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); -int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); - -/* map/unmap VFIO resource prototype */ -int pci_vfio_map_resource(struct rte_pci_device *dev); -int pci_vfio_unmap_resource(struct rte_pci_device *dev); - -#endif - -#endif /* EAL_PCI_INIT_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c deleted file mode 100644 index fa10329f..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ /dev/null @@ -1,567 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(RTE_ARCH_X86) -#include -#endif - -#include -#include -#include -#include -#include - -#include "eal_filesystem.h" -#include "eal_pci_init.h" - -void *pci_map_addr = NULL; - -#define OFF_MAX ((uint64_t)(off_t)-1) - -int -pci_uio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offset) -{ - return pread(intr_handle->uio_cfg_fd, buf, len, offset); -} - -int -pci_uio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offset) -{ - return pwrite(intr_handle->uio_cfg_fd, buf, len, offset); -} - -static int -pci_uio_set_bus_master(int dev_fd) -{ - uint16_t reg; - int ret; - - ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, - "Cannot read command from PCI config space!\n"); - return -1; - } - - /* return if bus mastering is already on */ - if (reg & PCI_COMMAND_MASTER) - return 0; - - reg |= PCI_COMMAND_MASTER; - - ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, - "Cannot write command to PCI config space!\n"); - return -1; - } - - return 0; -} - -static int -pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) -{ - FILE *f; - char filename[PATH_MAX]; - int ret; - unsigned major, minor; - dev_t dev; - - /* get the name of the sysfs file that contains the major and minor - * of the uio device and read its content */ - snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path); - - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n", - __func__); - return -1; - } - - ret = fscanf(f, "%u:%u", &major, &minor); - if (ret != 2) { - RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n", - __func__); - fclose(f); - return -1; - } - fclose(f); - - /* create the char device "mknod /dev/uioX c major minor" */ - snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); - dev = makedev(major, minor); - ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); - if (ret != 0) { - RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", - __func__, strerror(errno)); - return -1; - } - - return ret; -} - -/* - * Return the uioX char device used for a pci device. On success, return - * the UIO number and fill dstbuf string with the path of the device in - * sysfs. On error, return a negative value. In this case dstbuf is - * invalid. - */ -static int -pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, - unsigned int buflen, int create) -{ - struct rte_pci_addr *loc = &dev->addr; - unsigned int uio_num; - struct dirent *e; - DIR *dir; - char dirname[PATH_MAX]; - - /* depending on kernel version, uio can be located in uio/uioX - * or uio:uioX */ - - snprintf(dirname, sizeof(dirname), - "%s/" PCI_PRI_FMT "/uio", pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - - dir = opendir(dirname); - if (dir == NULL) { - /* retry with the parent directory */ - snprintf(dirname, sizeof(dirname), - "%s/" PCI_PRI_FMT, pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - dir = opendir(dirname); - - if (dir == NULL) { - RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); - return -1; - } - } - - /* take the first file starting with "uio" */ - while ((e = readdir(dir)) != NULL) { - /* format could be uio%d ...*/ - int shortprefix_len = sizeof("uio") - 1; - /* ... or uio:uio%d */ - int longprefix_len = sizeof("uio:uio") - 1; - char *endptr; - - if (strncmp(e->d_name, "uio", 3) != 0) - continue; - - /* first try uio%d */ - errno = 0; - uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); - if (errno == 0 && endptr != (e->d_name + shortprefix_len)) { - snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); - break; - } - - /* then try uio:uio%d */ - errno = 0; - uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); - if (errno == 0 && endptr != (e->d_name + longprefix_len)) { - snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num); - break; - } - } - closedir(dir); - - /* No uio resource found */ - if (e == NULL) - return -1; - - /* create uio device if we've been asked to */ - if (internal_config.create_uio_dev && create && - pci_mknod_uio_dev(dstbuf, uio_num) < 0) - RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num); - - return uio_num; -} - -void -pci_uio_free_resource(struct rte_pci_device *dev, - struct mapped_pci_resource *uio_res) -{ - rte_free(uio_res); - - if (dev->intr_handle.uio_cfg_fd >= 0) { - close(dev->intr_handle.uio_cfg_fd); - dev->intr_handle.uio_cfg_fd = -1; - } - if (dev->intr_handle.fd >= 0) { - close(dev->intr_handle.fd); - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - } -} - -int -pci_uio_alloc_resource(struct rte_pci_device *dev, - struct mapped_pci_resource **uio_res) -{ - char dirname[PATH_MAX]; - char cfgname[PATH_MAX]; - char devname[PATH_MAX]; /* contains the /dev/uioX */ - int uio_num; - struct rte_pci_addr *loc; - - loc = &dev->addr; - - /* find uio resource */ - uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1); - if (uio_num < 0) { - RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " - "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); - return 1; - } - snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); - - /* save fd if in primary process */ - dev->intr_handle.fd = open(devname, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - - snprintf(cfgname, sizeof(cfgname), - "/sys/class/uio/uio%u/device/config", uio_num); - dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); - if (dev->intr_handle.uio_cfg_fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - cfgname, strerror(errno)); - goto error; - } - - if (dev->kdrv == RTE_KDRV_IGB_UIO) - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - else { - dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; - - /* set bus master that is not done by uio_pci_generic */ - if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { - RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); - goto error; - } - } - - /* allocate the mapping details for secondary processes*/ - *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); - if (*uio_res == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - goto error; - } - - snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); - memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); - - return 0; - -error: - pci_uio_free_resource(dev, *uio_res); - return -1; -} - -int -pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, - struct mapped_pci_resource *uio_res, int map_idx) -{ - int fd; - char devname[PATH_MAX]; - void *mapaddr; - struct rte_pci_addr *loc; - struct pci_map *maps; - - loc = &dev->addr; - maps = uio_res->maps; - - /* update devname for mmap */ - snprintf(devname, sizeof(devname), - "%s/" PCI_PRI_FMT "/resource%d", - pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, - loc->function, res_idx); - - /* allocate memory to keep path */ - maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); - if (maps[map_idx].path == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", - strerror(errno)); - return -1; - } - - /* - * open resource file, to mmap it - */ - fd = open(devname, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - devname, strerror(errno)); - goto error; - } - - /* try mapping somewhere close to the end of hugepages */ - if (pci_map_addr == NULL) - pci_map_addr = pci_find_max_end_va(); - - mapaddr = pci_map_resource(pci_map_addr, fd, 0, - (size_t)dev->mem_resource[res_idx].len, 0); - close(fd); - if (mapaddr == MAP_FAILED) - goto error; - - pci_map_addr = RTE_PTR_ADD(mapaddr, - (size_t)dev->mem_resource[res_idx].len); - - maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; - maps[map_idx].size = dev->mem_resource[res_idx].len; - maps[map_idx].addr = mapaddr; - maps[map_idx].offset = 0; - strcpy(maps[map_idx].path, devname); - dev->mem_resource[res_idx].addr = mapaddr; - - return 0; - -error: - rte_free(maps[map_idx].path); - return -1; -} - -#if defined(RTE_ARCH_X86) -int -pci_uio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - char dirname[PATH_MAX]; - char filename[PATH_MAX]; - int uio_num; - unsigned long start; - - uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0); - if (uio_num < 0) - return -1; - - /* get portio start */ - snprintf(filename, sizeof(filename), - "%s/portio/port%d/start", dirname, bar); - if (eal_parse_sysfs_value(filename, &start) < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n", - __func__); - return -1; - } - /* ensure we don't get anything funny here, read/write will cast to - * uin16_t */ - if (start > UINT16_MAX) - return -1; - - /* FIXME only for primary process ? */ - if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) { - - snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); - dev->intr_handle.fd = open(filename, O_RDWR); - if (dev->intr_handle.fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", - filename, strerror(errno)); - return -1; - } - dev->intr_handle.type = RTE_INTR_HANDLE_UIO; - } - - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start); - - p->base = start; - p->len = 0; - return 0; -} -#else -int -pci_uio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - FILE *f; - char buf[BUFSIZ]; - char filename[PATH_MAX]; - uint64_t phys_addr, end_addr, flags; - int fd, i; - void *addr; - - /* open and read addresses of the corresponding resource in sysfs */ - snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource", - pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - f = fopen(filename, "r"); - if (f == NULL) { - RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n", - strerror(errno)); - return -1; - } - for (i = 0; i < bar + 1; i++) { - if (fgets(buf, sizeof(buf), f) == NULL) { - RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n"); - goto error; - } - } - if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, - &end_addr, &flags) < 0) - goto error; - if ((flags & IORESOURCE_IO) == 0) { - RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar); - goto error; - } - snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d", - pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function, bar); - - /* mmap the pci resource */ - fd = open(filename, O_RDWR); - if (fd < 0) { - RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, - strerror(errno)); - goto error; - } - addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - close(fd); - if (addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n", - strerror(errno)); - goto error; - } - - /* strangely, the base address is mmap addr + phys_addr */ - p->base = (uintptr_t)addr + phys_addr; - p->len = end_addr + 1; - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base); - fclose(f); - - return 0; - -error: - fclose(f); - return -1; -} -#endif - -void -pci_uio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - uint8_t *d; - int size; - uintptr_t reg = p->base + offset; - - for (d = data; len > 0; d += size, reg += size, len -= size) { - if (len >= 4) { - size = 4; -#if defined(RTE_ARCH_X86) - *(uint32_t *)d = inl(reg); -#else - *(uint32_t *)d = *(volatile uint32_t *)reg; -#endif - } else if (len >= 2) { - size = 2; -#if defined(RTE_ARCH_X86) - *(uint16_t *)d = inw(reg); -#else - *(uint16_t *)d = *(volatile uint16_t *)reg; -#endif - } else { - size = 1; -#if defined(RTE_ARCH_X86) - *d = inb(reg); -#else - *d = *(volatile uint8_t *)reg; -#endif - } - } -} - -void -pci_uio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - const uint8_t *s; - int size; - uintptr_t reg = p->base + offset; - - for (s = data; len > 0; s += size, reg += size, len -= size) { - if (len >= 4) { - size = 4; -#if defined(RTE_ARCH_X86) - outl_p(*(const uint32_t *)s, reg); -#else - *(volatile uint32_t *)reg = *(const uint32_t *)s; -#endif - } else if (len >= 2) { - size = 2; -#if defined(RTE_ARCH_X86) - outw_p(*(const uint16_t *)s, reg); -#else - *(volatile uint16_t *)reg = *(const uint16_t *)s; -#endif - } else { - size = 1; -#if defined(RTE_ARCH_X86) - outb_p(*s, reg); -#else - *(volatile uint8_t *)reg = *s; -#endif - } - } -} - -int -pci_uio_ioport_unmap(struct rte_pci_ioport *p) -{ -#if defined(RTE_ARCH_X86) - RTE_SET_USED(p); - /* FIXME close intr fd ? */ - return 0; -#else - return munmap((void *)(uintptr_t)p->base, p->len); -#endif -} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c deleted file mode 100644 index aa9d96ed..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ /dev/null @@ -1,674 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "eal_filesystem.h" -#include "eal_pci_init.h" -#include "eal_vfio.h" -#include "eal_private.h" - -/** - * @file - * PCI probing under linux (VFIO version) - * - * This code tries to determine if the PCI device is bound to VFIO driver, - * and initialize it (map BARs, set up interrupts) if that's the case. - * - * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". - */ - -#ifdef VFIO_PRESENT - -#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) -#define PAGE_MASK (~(PAGE_SIZE - 1)) - -static struct rte_tailq_elem rte_vfio_tailq = { - .name = "VFIO_RESOURCE_LIST", -}; -EAL_REGISTER_TAILQ(rte_vfio_tailq) - -int -pci_vfio_read_config(const struct rte_intr_handle *intr_handle, - void *buf, size_t len, off_t offs) -{ - return pread64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); -} - -int -pci_vfio_write_config(const struct rte_intr_handle *intr_handle, - const void *buf, size_t len, off_t offs) -{ - return pwrite64(intr_handle->vfio_dev_fd, buf, len, - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); -} - -/* get PCI BAR number where MSI-X interrupts are */ -static int -pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, - uint32_t *msix_table_size) -{ - int ret; - uint32_t reg; - uint16_t flags; - uint8_t cap_id, cap_offset; - - /* read PCI capability pointer from config space */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_CAPABILITY_LIST); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " - "config space!\n"); - return -1; - } - - /* we need first byte */ - cap_offset = reg & 0xFF; - - while (cap_offset) { - - /* read PCI capability ID */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " - "config space!\n"); - return -1; - } - - /* we need first byte */ - cap_id = reg & 0xFF; - - /* if we haven't reached MSI-X, check next capability */ - if (cap_id != PCI_CAP_ID_MSIX) { - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " - "config space!\n"); - return -1; - } - - /* we need second byte */ - cap_offset = (reg & 0xFF00) >> 8; - - continue; - } - /* else, read table offset */ - else { - /* table offset resides in the next 4 bytes */ - ret = pread64(fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 4); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " - "space!\n"); - return -1; - } - - ret = pread64(fd, &flags, sizeof(flags), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - cap_offset + 2); - if (ret != sizeof(flags)) { - RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " - "space!\n"); - return -1; - } - - *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR; - *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; - *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); - - return 0; - } - } - return 0; -} - -/* set PCI bus mastering */ -static int -pci_vfio_set_bus_master(int dev_fd, bool op) -{ - uint16_t reg; - int ret; - - ret = pread64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); - return -1; - } - - if (op) - /* set the master bit */ - reg |= PCI_COMMAND_MASTER; - else - reg &= ~(PCI_COMMAND_MASTER); - - ret = pwrite64(dev_fd, ®, sizeof(reg), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + - PCI_COMMAND); - - if (ret != sizeof(reg)) { - RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); - return -1; - } - - return 0; -} - -/* set up interrupt support (but not enable interrupts) */ -static int -pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) -{ - int i, ret, intr_idx; - - /* default to invalid index */ - intr_idx = VFIO_PCI_NUM_IRQS; - - /* get interrupt type from internal config (MSI-X by default, can be - * overridden from the command line - */ - switch (internal_config.vfio_intr_mode) { - case RTE_INTR_MODE_MSIX: - intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; - break; - case RTE_INTR_MODE_MSI: - intr_idx = VFIO_PCI_MSI_IRQ_INDEX; - break; - case RTE_INTR_MODE_LEGACY: - intr_idx = VFIO_PCI_INTX_IRQ_INDEX; - break; - /* don't do anything if we want to automatically determine interrupt type */ - case RTE_INTR_MODE_NONE: - break; - default: - RTE_LOG(ERR, EAL, " unknown default interrupt type!\n"); - return -1; - } - - /* start from MSI-X interrupt type */ - for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { - struct vfio_irq_info irq = { .argsz = sizeof(irq) }; - int fd = -1; - - /* skip interrupt modes we don't want */ - if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE && - i != intr_idx) - continue; - - irq.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); - if (ret < 0) { - RTE_LOG(ERR, EAL, " cannot get IRQ info, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - - /* if this vector cannot be used with eventfd, fail if we explicitly - * specified interrupt type, otherwise continue */ - if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { - if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) { - RTE_LOG(ERR, EAL, - " interrupt vector does not support eventfd!\n"); - return -1; - } else - continue; - } - - /* set up an eventfd for interrupts */ - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - RTE_LOG(ERR, EAL, " cannot set up eventfd, " - "error %i (%s)\n", errno, strerror(errno)); - return -1; - } - - dev->intr_handle.fd = fd; - dev->intr_handle.vfio_dev_fd = vfio_dev_fd; - - switch (i) { - case VFIO_PCI_MSIX_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; - break; - case VFIO_PCI_MSI_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; - break; - case VFIO_PCI_INTX_IRQ_INDEX: - internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY; - dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; - break; - default: - RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); - return -1; - } - - return 0; - } - - /* if we're here, we haven't found a suitable interrupt vector */ - return -1; -} - -/* - * map the PCI resources of a PCI device in virtual memory (VFIO version). - * primary and secondary processes follow almost exactly the same path - */ -int -pci_vfio_map_resource(struct rte_pci_device *dev) -{ - struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; - char pci_addr[PATH_MAX] = {0}; - int vfio_dev_fd; - struct rte_pci_addr *loc = &dev->addr; - int i, ret, msix_bar; - struct mapped_pci_resource *vfio_res = NULL; - struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); - - struct pci_map *maps; - uint32_t msix_table_offset = 0; - uint32_t msix_table_size = 0; - uint32_t ioport_bar; - - dev->intr_handle.fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; - - /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, - loc->domain, loc->bus, loc->devid, loc->function); - - if ((ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr, - &vfio_dev_fd, &device_info))) - return ret; - - /* get MSI-X BAR, if any (we have to know where it is because we can't - * easily mmap it when using VFIO) */ - msix_bar = -1; - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar, - &msix_table_offset, &msix_table_size); - if (ret < 0) { - RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); - close(vfio_dev_fd); - return -1; - } - - /* if we're in a primary process, allocate vfio_res and get region info */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, - "%s(): cannot store uio mmap details\n", __func__); - close(vfio_dev_fd); - return -1; - } - memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); - - /* get number of registers (up to BAR5) */ - vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions, - VFIO_PCI_BAR5_REGION_INDEX + 1); - } else { - /* if we're in a secondary process, just find our tailq entry */ - TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (rte_eal_compare_pci_addr(&vfio_res->pci_addr, - &dev->addr)) - continue; - break; - } - /* if we haven't found our tailq entry, something's wrong */ - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", - pci_addr); - close(vfio_dev_fd); - return -1; - } - } - - /* map BARs */ - maps = vfio_res->maps; - - for (i = 0; i < (int) vfio_res->nb_maps; i++) { - struct vfio_region_info reg = { .argsz = sizeof(reg) }; - void *bar_addr; - struct memreg { - unsigned long offset, size; - } memreg[2] = {}; - - reg.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); - - if (ret) { - RTE_LOG(ERR, EAL, " %s cannot get device region info " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); - close(vfio_dev_fd); - if (internal_config.process_type == RTE_PROC_PRIMARY) - rte_free(vfio_res); - return -1; - } - - /* chk for io port region */ - ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), - VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) - + PCI_BASE_ADDRESS_0 + i*4); - - if (ret != sizeof(ioport_bar)) { - RTE_LOG(ERR, EAL, - "Cannot read command (%x) from config space!\n", - PCI_BASE_ADDRESS_0 + i*4); - return -1; - } - - if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) { - RTE_LOG(INFO, EAL, - "Ignore mapping IO port bar(%d) addr: %x\n", - i, ioport_bar); - continue; - } - - /* skip non-mmapable BARs */ - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) - continue; - - if (i == msix_bar) { - /* - * VFIO will not let us map the MSI-X table, - * but we can map around it. - */ - uint32_t table_start = msix_table_offset; - uint32_t table_end = table_start + msix_table_size; - table_end = (table_end + ~PAGE_MASK) & PAGE_MASK; - table_start &= PAGE_MASK; - - if (table_start == 0 && table_end >= reg.size) { - /* Cannot map this BAR */ - RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i); - continue; - } else { - memreg[0].offset = reg.offset; - memreg[0].size = table_start; - memreg[1].offset = reg.offset + table_end; - memreg[1].size = reg.size - table_end; - - RTE_LOG(DEBUG, EAL, - "Trying to map BAR %d that contains the MSI-X " - "table. Trying offsets: " - "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i, - memreg[0].offset, memreg[0].size, - memreg[1].offset, memreg[1].size); - } - } else { - memreg[0].offset = reg.offset; - memreg[0].size = reg.size; - } - - /* try to figure out an address */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - /* try mapping somewhere close to the end of hugepages */ - if (pci_map_addr == NULL) - pci_map_addr = pci_find_max_end_va(); - - bar_addr = pci_map_addr; - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); - } else { - bar_addr = maps[i].addr; - } - - /* reserve the address using an inaccessible mapping */ - bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (bar_addr != MAP_FAILED) { - void *map_addr = NULL; - if (memreg[0].size) { - /* actual map of first part */ - map_addr = pci_map_resource(bar_addr, vfio_dev_fd, - memreg[0].offset, - memreg[0].size, - MAP_FIXED); - } - - /* if there's a second part, try to map it */ - if (map_addr != MAP_FAILED - && memreg[1].offset && memreg[1].size) { - void *second_addr = RTE_PTR_ADD(bar_addr, - memreg[1].offset - - (uintptr_t)reg.offset); - map_addr = pci_map_resource(second_addr, - vfio_dev_fd, memreg[1].offset, - memreg[1].size, - MAP_FIXED); - } - - if (map_addr == MAP_FAILED || !map_addr) { - munmap(bar_addr, reg.size); - bar_addr = MAP_FAILED; - } - } - - if (bar_addr == MAP_FAILED || - (internal_config.process_type == RTE_PROC_SECONDARY && - bar_addr != maps[i].addr)) { - RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, - strerror(errno)); - close(vfio_dev_fd); - if (internal_config.process_type == RTE_PROC_PRIMARY) - rte_free(vfio_res); - return -1; - } - - maps[i].addr = bar_addr; - maps[i].offset = reg.offset; - maps[i].size = reg.size; - maps[i].path = NULL; /* vfio doesn't have per-resource paths */ - dev->mem_resource[i].addr = bar_addr; - } - - /* if secondary process, do not set up interrupts */ - if (internal_config.process_type == RTE_PROC_PRIMARY) { - if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { - RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr); - close(vfio_dev_fd); - rte_free(vfio_res); - return -1; - } - - /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { - RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr); - close(vfio_dev_fd); - rte_free(vfio_res); - return -1; - } - - /* Reset the device */ - ioctl(vfio_dev_fd, VFIO_DEVICE_RESET); - } - - if (internal_config.process_type == RTE_PROC_PRIMARY) - TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); - - return 0; -} - -int -pci_vfio_unmap_resource(struct rte_pci_device *dev) -{ - char pci_addr[PATH_MAX] = {0}; - struct rte_pci_addr *loc = &dev->addr; - int i, ret; - struct mapped_pci_resource *vfio_res = NULL; - struct mapped_pci_res_list *vfio_res_list; - - struct pci_map *maps; - - /* store PCI address string */ - snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, - loc->domain, loc->bus, loc->devid, loc->function); - - - if (close(dev->intr_handle.fd) < 0) { - RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", - pci_addr); - return -1; - } - - if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { - RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", - pci_addr); - return -1; - } - - ret = vfio_release_device(pci_get_sysfs_path(), pci_addr, - dev->intr_handle.vfio_dev_fd); - if (ret < 0) { - RTE_LOG(ERR, EAL, - "%s(): cannot release device\n", __func__); - return ret; - } - - vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); - /* Get vfio_res */ - TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) - continue; - break; - } - /* if we haven't found our tailq entry, something's wrong */ - if (vfio_res == NULL) { - RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", - pci_addr); - return -1; - } - - /* unmap BARs */ - maps = vfio_res->maps; - - RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n", - pci_addr); - for (i = 0; i < (int) vfio_res->nb_maps; i++) { - - /* - * We do not need to be aware of MSI-X table BAR mappings as - * when mapping. Just using current maps array is enough - */ - if (maps[i].addr) { - RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n", - pci_addr, maps[i].addr); - pci_unmap_resource(maps[i].addr, maps[i].size); - } - } - - TAILQ_REMOVE(vfio_res_list, vfio_res, next); - - return 0; -} - -int -pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) -{ - if (bar < VFIO_PCI_BAR0_REGION_INDEX || - bar > VFIO_PCI_BAR5_REGION_INDEX) { - RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); - return -1; - } - - p->dev = dev; - p->base = VFIO_GET_REGION_ADDR(bar); - return 0; -} - -void -pci_vfio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; - - if (pread64(intr_handle->vfio_dev_fd, data, - len, p->base + offset) <= 0) - RTE_LOG(ERR, EAL, - "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n", - VFIO_GET_REGION_IDX(p->base), (int)offset); -} - -void -pci_vfio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) -{ - const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; - - if (pwrite64(intr_handle->vfio_dev_fd, data, - len, p->base + offset) <= 0) - RTE_LOG(ERR, EAL, - "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n", - VFIO_GET_REGION_IDX(p->base), (int)offset); -} - -int -pci_vfio_ioport_unmap(struct rte_pci_ioport *p) -{ - RTE_SET_USED(p); - return -1; -} - -int -pci_vfio_enable(void) -{ - return vfio_enable("vfio_pci"); -} - -int -pci_vfio_is_enabled(void) -{ - return vfio_is_enabled("vfio_pci"); -} -#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c index 6481eeea..e9a579e4 100644 --- a/lib/librte_eal/linuxapp/eal/eal_thread.c +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c index afa32f5c..24349dab 100644 --- a/lib/librte_eal/linuxapp/eal/eal_timer.c +++ b/lib/librte_eal/linuxapp/eal/eal_timer.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 946df7e3..58f0123e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "eal_filesystem.h" #include "eal_vfio.h" @@ -68,8 +69,8 @@ vfio_get_group_fd(int iommu_group_no) { int i; int vfio_group_fd; - int group_idx = -1; char filename[PATH_MAX]; + struct vfio_group *cur_grp; /* check if we already have the group descriptor open */ for (i = 0; i < VFIO_MAX_GROUPS; i++) @@ -85,12 +86,12 @@ vfio_get_group_fd(int iommu_group_no) /* Now lets get an index for the new group */ for (i = 0; i < VFIO_MAX_GROUPS; i++) if (vfio_cfg.vfio_groups[i].group_no == -1) { - group_idx = i; + cur_grp = &vfio_cfg.vfio_groups[i]; break; } /* This should not happen */ - if (group_idx == -1) { + if (i == VFIO_MAX_GROUPS) { RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); return -1; } @@ -123,8 +124,8 @@ vfio_get_group_fd(int iommu_group_no) /* noiommu group found */ } - vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd; + cur_grp->group_no = iommu_group_no; + cur_grp->fd = vfio_group_fd; vfio_cfg.vfio_active_groups++; return vfio_group_fd; } @@ -157,9 +158,12 @@ vfio_get_group_fd(int iommu_group_no) return 0; case SOCKET_OK: vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); - /* if we got the fd, return it */ + /* if we got the fd, store it and return it */ if (vfio_group_fd > 0) { close(socket_fd); + cur_grp->group_no = iommu_group_no; + cur_grp->fd = vfio_group_fd; + vfio_cfg.vfio_active_groups++; return vfio_group_fd; } /* fall-through on error */ @@ -280,7 +284,7 @@ clear_group(int vfio_group_fd) } int -vfio_setup_device(const char *sysfs_base, const char *dev_addr, +rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info) { struct vfio_group_status group_status = { @@ -412,7 +416,7 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr, } int -vfio_release_device(const char *sysfs_base, const char *dev_addr, +rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int vfio_dev_fd) { struct vfio_group_status group_status = { @@ -474,7 +478,7 @@ vfio_release_device(const char *sysfs_base, const char *dev_addr, } int -vfio_enable(const char *modname) +rte_vfio_enable(const char *modname) { /* initialize group list */ int i; @@ -489,7 +493,7 @@ vfio_enable(const char *modname) /* inform the user that we are probing for VFIO */ RTE_LOG(INFO, EAL, "Probing VFIO support...\n"); - /* check if vfio-pci module is loaded */ + /* check if vfio module is loaded */ vfio_available = rte_eal_check_module(modname); /* return error directly */ @@ -519,7 +523,7 @@ vfio_enable(const char *modname) } int -vfio_is_enabled(const char *modname) +rte_vfio_is_enabled(const char *modname) { const int mod_available = rte_eal_check_module(modname); return vfio_cfg.vfio_enabled && mod_available; @@ -706,7 +710,10 @@ vfio_type1_dma_map(int vfio_container_fd) dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + dma_map.iova = dma_map.vaddr; + else + dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); @@ -759,10 +766,19 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } - /* calculate window size based on number of hugepages configured */ - create.window_size = rte_eal_get_physmem_size(); + /* create DMA window from 0 to max(phys_addr + len) */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (ms[i].addr == NULL) + break; + + create.window_size = RTE_MAX(create.window_size, + ms[i].iova + ms[i].len); + } + + /* sPAPR requires window size to be a power of 2 */ + create.window_size = rte_align64pow2(create.window_size); create.page_shift = __builtin_ctzll(ms->hugepage_sz); - create.levels = 2; + create.levels = 1; ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); if (ret) { @@ -771,6 +787,11 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } + if (create.start_addr != 0) { + RTE_LOG(ERR, EAL, " DMA window start address != 0\n"); + return -1; + } + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { struct vfio_iommu_type1_dma_map dma_map; @@ -792,7 +813,10 @@ vfio_spapr_dma_map(int vfio_container_fd) dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + dma_map.iova = dma_map.vaddr; + else + dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; @@ -816,4 +840,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) return 0; } +int +rte_vfio_noiommu_is_enabled(void) +{ + int fd, ret, cnt __rte_unused; + char c; + + ret = -1; + fd = open(VFIO_NOIOMMU_MODE, O_RDONLY); + if (fd < 0) + return -1; + + cnt = read(fd, &c, 1); + if (c == 'Y') + ret = 1; + + close(fd); + return ret; +} + #endif diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 5ff63e5d..ba7892b7 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -37,20 +37,18 @@ /* * determine if VFIO is present on the system */ -#ifdef RTE_EAL_VFIO +#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO) #include #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) -#include - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) -#define RTE_PCI_MSIX_TABLE_BIR 0x7 -#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 -#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff +#define VFIO_PRESENT #else -#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR -#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET -#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE -#endif +#pragma message("VFIO configured but not supported by this kernel, disabling.") +#endif /* kernel version >= 3.6.0 */ +#endif /* RTE_EAL_VFIO */ + +#ifdef VFIO_PRESENT + +#include #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU @@ -144,13 +142,6 @@ struct vfio_config { struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; }; -#define VFIO_DIR "/dev/vfio" -#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" -#define VFIO_GROUP_FMT "/dev/vfio/%u" -#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" -#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) -#define VFIO_GET_REGION_IDX(x) (x >> 40) - /* DMA mapping function prototype. * Takes VFIO container fd as a parameter. * Returns 0 on success, -1 on error. @@ -190,24 +181,6 @@ vfio_get_group_fd(int iommu_group_no); int clear_group(int vfio_group_fd); -/** - * Setup vfio_cfg for the device identified by its address. It discovers - * the configured I/O MMU groups or sets a new one for the device. If a new - * groups is assigned, the DMA mapping is performed. - * Returns 0 on success, a negative value on failure and a positive value in - * case the given device cannot be managed this way. - */ -int vfio_setup_device(const char *sysfs_base, const char *dev_addr, - int *vfio_dev_fd, struct vfio_device_info *device_info); - -int vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); - -int vfio_enable(const char *modname); -int vfio_is_enabled(const char *modname); - -int pci_vfio_enable(void); -int pci_vfio_is_enabled(void); - int vfio_mp_sync_setup(void); #define SOCKET_REQ_CONTAINER 0x100 @@ -217,8 +190,6 @@ int vfio_mp_sync_setup(void); #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF -#define VFIO_PRESENT -#endif /* kernel version */ -#endif /* RTE_EAL_VFIO */ +#endif /* VFIO_PRESENT */ #endif /* EAL_VFIO_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c index 7e8095cb..b53ed7eb 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c @@ -49,12 +49,12 @@ #endif #include -#include #include #include +#include #include "eal_filesystem.h" -#include "eal_pci_init.h" +#include "eal_vfio.h" #include "eal_thread.h" /** @@ -301,7 +301,8 @@ vfio_mp_sync_thread(void __rte_unused * arg) vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); else vfio_mp_sync_send_fd(conn_sock, fd); - close(fd); + if (fd >= 0) + close(fd); break; case SOCKET_REQ_GROUP: /* wait for group number */ diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c deleted file mode 100644 index 19db1cb5..00000000 --- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c +++ /dev/null @@ -1,381 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "eal_private.h" -#include "eal_internal_cfg.h" -#include "eal_filesystem.h" -#include - -#define PAGE_SIZE RTE_PGSIZE_4K -#define DEFAUL_DOM0_NAME "dom0-mem" - -static int xen_fd = -1; -static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB"; - -/* - * Try to mmap *size bytes in /dev/zero. If it is successful, return the - * pointer to the mmap'd area and keep *size unmodified. Else, retry - * with a smaller zone: decrease *size by mem_size until it reaches - * 0. In this case, return NULL. Note: this function returns an address - * which is a multiple of mem_size size. - */ -static void * -xen_get_virtual_area(size_t *size, size_t mem_size) -{ - void *addr; - int fd; - long aligned_addr; - - RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size); - - fd = open("/dev/zero", O_RDONLY); - if (fd < 0){ - RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); - return NULL; - } - do { - addr = mmap(NULL, (*size) + mem_size, PROT_READ, - MAP_PRIVATE, fd, 0); - if (addr == MAP_FAILED) - *size -= mem_size; - } while (addr == MAP_FAILED && *size > 0); - - if (addr == MAP_FAILED) { - close(fd); - RTE_LOG(ERR, EAL, "Cannot get a virtual area\n"); - return NULL; - } - - munmap(addr, (*size) + mem_size); - close(fd); - - /* align addr to a mem_size boundary */ - aligned_addr = (uintptr_t)addr; - aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size); - addr = (void *)(aligned_addr); - - RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", - addr, *size); - - return addr; -} - -/** - * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm - * /memsize-mB/memsize file, and the size unit is mB. - */ -static int -get_xen_memory_size(void) -{ - char path[PATH_MAX]; - unsigned long mem_size = 0; - static const char *file_name; - - file_name = "memsize"; - snprintf(path, sizeof(path), "%s/%s", - sys_dir_path, file_name); - - if (eal_parse_sysfs_value(path, &mem_size) < 0) - return -1; - - if (mem_size == 0) - rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not" - " configured.\n",sys_dir_path, file_name); - if (mem_size % 2) - rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be" - " even number.\n",sys_dir_path, file_name); - - if (mem_size > DOM0_CONFIG_MEMSIZE) - rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger" - " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE); - - return mem_size; -} - -/** - * Based on physical address to caculate MFN in Xen Dom0. - */ -phys_addr_t -rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr) -{ - int mfn_id, i; - uint64_t mfn, mfn_offset; - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct rte_memseg *memseg = mcfg->memseg; - - /* find the memory segment owning the physical address */ - if (memseg_id == -1) { - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - if ((phy_addr >= memseg[i].phys_addr) && - (phy_addr < memseg[i].phys_addr + - memseg[i].len)) { - memseg_id = i; - break; - } - } - if (memseg_id == -1) - return RTE_BAD_PHYS_ADDR; - } - - mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M; - - /*the MFN is contiguous in 2M */ - mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) % - RTE_PGSIZE_2M / PAGE_SIZE; - mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id]; - - /** return mechine address */ - return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE; -} - -int -rte_xen_dom0_memory_init(void) -{ - void *vir_addr, *vma_addr = NULL; - int err, ret = 0; - uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0; - size_t vma_len = 0; - struct memory_info meminfo; - struct memseg_info seginfo[RTE_MAX_MEMSEG]; - int flags, page_size = getpagesize(); - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct rte_memseg *memseg = mcfg->memseg; - uint64_t total_mem = internal_config.memory; - - memset(seginfo, 0, sizeof(seginfo)); - memset(&meminfo, 0, sizeof(struct memory_info)); - - mem_size = get_xen_memory_size(); - requested = (unsigned) (total_mem / 0x100000); - if (requested > mem_size) - /* if we didn't satisfy total memory requirements */ - rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB," - " available: %uMB\n", requested, mem_size); - else if (total_mem != 0) - mem_size = requested; - - /* Check FD and open once */ - if (xen_fd < 0) { - xen_fd = open(DOM0_MM_DEV, O_RDWR); - if (xen_fd < 0) { - RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); - return -1; - } - } - - meminfo.size = mem_size; - - /* construct memory mangement name for Dom0 */ - snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s", - internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); - - /* Notify kernel driver to allocate memory */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo); - if (ret < 0) { - RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n"); - err = -EIO; - goto fail; - } - - /* Get number of memory segment from driver */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg); - if (ret < 0) { - RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n"); - err = -EIO; - goto fail; - } - - if(num_memseg > RTE_MAX_MEMSEG){ - RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater" - " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG); - err = -EIO; - goto fail; - } - - /* get all memory segements information */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo); - if (ret < 0) { - RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n"); - err = -EIO; - goto fail; - } - - /* map all memory segments to contiguous user space */ - for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++) - { - vma_len = seginfo[memseg_idx].size; - - /** - * get the biggest virtual memory area up to vma_len. If it fails, - * vma_addr is NULL, so let the kernel provide the address. - */ - vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M); - if (vma_addr == NULL) { - flags = MAP_SHARED; - vma_len = RTE_PGSIZE_2M; - } else - flags = MAP_SHARED | MAP_FIXED; - - seginfo[memseg_idx].size = vma_len; - vir_addr = mmap(vma_addr, seginfo[memseg_idx].size, - PROT_READ|PROT_WRITE, flags, xen_fd, - memseg_idx * page_size); - if (vir_addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n", - DOM0_MM_DEV); - err = -EIO; - goto fail; - } - - memseg[memseg_idx].addr = vir_addr; - memseg[memseg_idx].phys_addr = page_size * - seginfo[memseg_idx].pfn ; - memseg[memseg_idx].len = seginfo[memseg_idx].size; - for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++) - memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i]; - - /* MFNs are continuous in 2M, so assume that page size is 2M */ - memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M; - - memseg[memseg_idx].nchannel = mcfg->nchannel; - memseg[memseg_idx].nrank = mcfg->nrank; - - /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/ - memseg[memseg_idx].socket_id = 0; - } - - return 0; -fail: - if (xen_fd > 0) { - close(xen_fd); - xen_fd = -1; - } - return err; -} - -/* - * This creates the memory mappings in the secondary process to match that of - * the server process. It goes through each memory segment in the DPDK runtime - * configuration, mapping them in order to form a contiguous block in the - * virtual memory space - */ -int -rte_xen_dom0_memory_attach(void) -{ - const struct rte_mem_config *mcfg; - unsigned s = 0; /* s used to track the segment number */ - int xen_fd = -1; - int ret = -1; - void *vir_addr; - char name[DOM0_NAME_MAX] = {0}; - int page_size = getpagesize(); - - mcfg = rte_eal_get_configuration()->mem_config; - - /* Check FD and open once */ - if (xen_fd < 0) { - xen_fd = open(DOM0_MM_DEV, O_RDWR); - if (xen_fd < 0) { - RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); - goto error; - } - } - - /* construct memory mangement name for Dom0 */ - snprintf(name, DOM0_NAME_MAX, "%s-%s", - internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); - /* attach to memory segments of primary process */ - ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name); - if (ret) { - RTE_LOG(ERR, EAL,"attach memory segments fail.\n"); - goto error; - } - - /* map all segments into memory to make sure we get the addrs */ - for (s = 0; s < RTE_MAX_MEMSEG; ++s) { - - /* - * the first memory segment with len==0 is the one that - * follows the last valid segment. - */ - if (mcfg->memseg[s].len == 0) - break; - - vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, - PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd, - s * page_size); - if (vir_addr == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " - "in %s to requested address [%p]\n", - (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV, - mcfg->memseg[s].addr); - goto error; - } - } - return 0; - -error: - if (xen_fd >= 0) { - close(xen_fd); - xen_fd = -1; - } - return -1; -} diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h deleted file mode 100644 index d9707780..00000000 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h +++ /dev/null @@ -1,108 +0,0 @@ -/*- - * This file is provided under a dual BSD/LGPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GNU LESSER GENERAL PUBLIC LICENSE - * - * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * Contact Information: - * Intel Corporation - * - * - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#ifndef _RTE_DOM0_COMMON_H_ -#define _RTE_DOM0_COMMON_H_ - -#ifdef __KERNEL__ -#include -#endif - -#define DOM0_NAME_MAX 256 -#define DOM0_MM_DEV "/dev/dom0_mm" - -#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */ -#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ -#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */ -#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ -#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ - -#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) -#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) -#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) -#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) - -/** - * A structure used to store memory information. - */ -struct memory_info { - char name[DOM0_NAME_MAX]; - uint64_t size; -}; - -/** - * A structure used to store memory segment information. - */ -struct memseg_info { - uint32_t idx; - uint64_t pfn; - uint64_t size; - uint64_t mfn[DOM0_NUM_MEMBLOCK]; -}; - -/** - * A structure used to store memory block information. - */ -struct memblock_info { - uint8_t exchange_flag; - uint8_t used; - uint64_t vir_addr; - uint64_t pfn; - uint64_t mfn; -}; -#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h deleted file mode 100644 index 6daffebf..00000000 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ /dev/null @@ -1,239 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RTE_INTERRUPTS_H_ -#error "don't include this file directly, please include generic " -#endif - -#ifndef _RTE_LINUXAPP_INTERRUPTS_H_ -#define _RTE_LINUXAPP_INTERRUPTS_H_ - -#define RTE_MAX_RXTX_INTR_VEC_ID 32 -#define RTE_INTR_VEC_ZERO_OFFSET 0 -#define RTE_INTR_VEC_RXTX_OFFSET 1 - -enum rte_intr_handle_type { - RTE_INTR_HANDLE_UNKNOWN = 0, - RTE_INTR_HANDLE_UIO, /**< uio device handle */ - RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */ - RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ - RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */ - RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */ - RTE_INTR_HANDLE_ALARM, /**< alarm handle */ - RTE_INTR_HANDLE_EXT, /**< external handler */ - RTE_INTR_HANDLE_VDEV, /**< virtual device */ - RTE_INTR_HANDLE_MAX -}; - -#define RTE_INTR_EVENT_ADD 1UL -#define RTE_INTR_EVENT_DEL 2UL - -typedef void (*rte_intr_event_cb_t)(int fd, void *arg); - -struct rte_epoll_data { - uint32_t event; /**< event type */ - void *data; /**< User data */ - rte_intr_event_cb_t cb_fun; /**< IN: callback fun */ - void *cb_arg; /**< IN: callback arg */ -}; - -enum { - RTE_EPOLL_INVALID = 0, - RTE_EPOLL_VALID, - RTE_EPOLL_EXEC, -}; - -/** interrupt epoll event obj, taken by epoll_event.ptr */ -struct rte_epoll_event { - volatile uint32_t status; /**< OUT: event status */ - int fd; /**< OUT: event fd */ - int epfd; /**< OUT: epoll instance the ev associated with */ - struct rte_epoll_data epdata; -}; - -/** Handle for interrupts. */ -struct rte_intr_handle { - RTE_STD_C11 - union { - int vfio_dev_fd; /**< VFIO device file descriptor */ - int uio_cfg_fd; /**< UIO config file descriptor - for uio_pci_generic */ - }; - int fd; /**< interrupt event file descriptor */ - enum rte_intr_handle_type type; /**< handle type */ - uint32_t max_intr; /**< max interrupt requested */ - uint32_t nb_efd; /**< number of available efd(event fd) */ - int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */ - struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID]; - /**< intr vector epoll event */ - int *intr_vec; /**< intr vector number array */ -}; - -#define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */ - -/** - * It waits for events on the epoll instance. - * - * @param epfd - * Epoll instance fd on which the caller wait for events. - * @param events - * Memory area contains the events that will be available for the caller. - * @param maxevents - * Up to maxevents are returned, must greater than zero. - * @param timeout - * Specifying a timeout of -1 causes a block indefinitely. - * Specifying a timeout equal to zero cause to return immediately. - * @return - * - On success, returns the number of available event. - * - On failure, a negative value. - */ -int -rte_epoll_wait(int epfd, struct rte_epoll_event *events, - int maxevents, int timeout); - -/** - * It performs control operations on epoll instance referred by the epfd. - * It requests that the operation op be performed for the target fd. - * - * @param epfd - * Epoll instance fd on which the caller perform control operations. - * @param op - * The operation be performed for the target fd. - * @param fd - * The target fd on which the control ops perform. - * @param event - * Describes the object linked to the fd. - * Note: The caller must take care the object deletion after CTL_DEL. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -rte_epoll_ctl(int epfd, int op, int fd, - struct rte_epoll_event *event); - -/** - * The function returns the per thread epoll instance. - * - * @return - * epfd the epoll instance referred to. - */ -int -rte_intr_tls_epfd(void); - -/** - * @param intr_handle - * Pointer to the interrupt handle. - * @param epfd - * Epoll instance fd which the intr vector associated to. - * @param op - * The operation be performed for the vector. - * Operation type of {ADD, DEL}. - * @param vec - * RX intr vector number added to the epoll instance wait list. - * @param data - * User raw data. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, - int epfd, int op, unsigned int vec, void *data); - -/** - * It deletes registered eventfds. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -void -rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle); - -/** - * It enables the packet I/O interrupt event if it's necessary. - * It creates event fd for each interrupt vector when MSIX is used, - * otherwise it multiplexes a single event fd. - * - * @param intr_handle - * Pointer to the interrupt handle. - * @param nb_efd - * Number of interrupt vector trying to enable. - * The value 0 is not allowed. - * @return - * - On success, zero. - * - On failure, a negative value. - */ -int -rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd); - -/** - * It disables the packet I/O interrupt event. - * It deletes registered eventfds and closes the open fds. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -void -rte_intr_efd_disable(struct rte_intr_handle *intr_handle); - -/** - * The packet I/O interrupt on datapath is enabled or not. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int -rte_intr_dp_is_en(struct rte_intr_handle *intr_handle); - -/** - * The interrupt handle instance allows other causes or not. - * Other causes stand for any none packet I/O interrupts. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int -rte_intr_allow_others(struct rte_intr_handle *intr_handle); - -/** - * The multiple interrupt vector capability of interrupt handle instance. - * It returns zero if no multiple interrupt vector support. - * - * @param intr_handle - * Pointer to the interrupt handle. - */ -int -rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); - -#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map deleted file mode 100644 index 3a8f1540..00000000 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ /dev/null @@ -1,244 +0,0 @@ -DPDK_2.0 { - global: - - __rte_panic; - devargs_list; - eal_parse_sysfs_value; - eal_timer_source; - lcore_config; - per_lcore__lcore_id; - per_lcore__rte_errno; - rte_calloc; - rte_calloc_socket; - rte_cpu_check_supported; - rte_cpu_get_flag_enabled; - rte_cycles_vmware_tsc_map; - rte_delay_us; - rte_dump_physmem_layout; - rte_dump_registers; - rte_dump_stack; - rte_dump_tailq; - rte_eal_alarm_cancel; - rte_eal_alarm_set; - rte_eal_devargs_add; - rte_eal_devargs_dump; - rte_eal_devargs_type_count; - rte_eal_get_configuration; - rte_eal_get_lcore_state; - rte_eal_get_physmem_layout; - rte_eal_get_physmem_size; - rte_eal_has_hugepages; - rte_eal_hpet_init; - rte_eal_init; - rte_eal_iopl_init; - rte_eal_lcore_role; - rte_eal_mp_remote_launch; - rte_eal_mp_wait_lcore; - rte_eal_parse_devargs_str; - rte_eal_process_type; - rte_eal_remote_launch; - rte_eal_tailq_lookup; - rte_eal_tailq_register; - rte_eal_wait_lcore; - rte_exit; - rte_free; - rte_get_hpet_cycles; - rte_get_hpet_hz; - rte_get_log_level; - rte_get_log_type; - rte_get_tsc_hz; - rte_hexdump; - rte_intr_callback_register; - rte_intr_callback_unregister; - rte_intr_disable; - rte_intr_enable; - rte_log; - rte_log_cur_msg_loglevel; - rte_log_cur_msg_logtype; - rte_logs; - rte_malloc; - rte_malloc_dump_stats; - rte_malloc_get_socket_stats; - rte_malloc_set_limit; - rte_malloc_socket; - rte_malloc_validate; - rte_malloc_virt2phy; - rte_mem_lock_page; - rte_mem_phy2mch; - rte_mem_virt2phy; - rte_memdump; - rte_memory_get_nchannel; - rte_memory_get_nrank; - rte_memzone_dump; - rte_memzone_lookup; - rte_memzone_reserve; - rte_memzone_reserve_aligned; - rte_memzone_reserve_bounded; - rte_memzone_walk; - rte_openlog_stream; - rte_realloc; - rte_set_application_usage_hook; - rte_set_log_level; - rte_set_log_type; - rte_socket_id; - rte_strerror; - rte_strsplit; - rte_sys_gettid; - rte_thread_get_affinity; - rte_thread_set_affinity; - rte_vlog; - rte_xen_dom0_memory_attach; - rte_xen_dom0_memory_init; - rte_zmalloc; - rte_zmalloc_socket; - - local: *; -}; - -DPDK_2.1 { - global: - - rte_epoll_ctl; - rte_epoll_wait; - rte_intr_allow_others; - rte_intr_dp_is_en; - rte_intr_efd_disable; - rte_intr_efd_enable; - rte_intr_rx_ctl; - rte_intr_tls_epfd; - rte_memzone_free; - -} DPDK_2.0; - -DPDK_2.2 { - global: - - rte_intr_cap_multiple; - rte_keepalive_create; - rte_keepalive_dispatch_pings; - rte_keepalive_mark_alive; - rte_keepalive_register_core; - rte_xen_dom0_supported; - rte_xen_mem_phy2mch; - -} DPDK_2.1; - -DPDK_16.04 { - global: - - rte_cpu_get_flag_name; - rte_eal_primary_proc_alive; - -} DPDK_2.2; - -DPDK_16.07 { - global: - - pci_get_sysfs_path; - rte_keepalive_mark_sleep; - rte_keepalive_register_relay_callback; - rte_rtm_supported; - rte_thread_setname; - -} DPDK_16.04; - -DPDK_16.11 { - global: - - rte_delay_us_block; - rte_delay_us_callback_register; - rte_eal_dev_attach; - rte_eal_dev_detach; - -} DPDK_16.07; - -DPDK_17.02 { - global: - - rte_bus_dump; - rte_bus_probe; - rte_bus_register; - rte_bus_scan; - rte_bus_unregister; - -} DPDK_16.11; - -DPDK_17.05 { - global: - - rte_cpu_is_supported; - rte_intr_free_epoll_fd; - rte_log_dump; - rte_log_get_global_level; - rte_log_register; - rte_log_set_global_level; - rte_log_set_level; - rte_log_set_level_regexp; - rte_pci_detach; - rte_pci_dump; - rte_pci_ioport_map; - rte_pci_ioport_read; - rte_pci_ioport_unmap; - rte_pci_ioport_write; - rte_pci_map_device; - rte_pci_probe; - rte_pci_probe_one; - rte_pci_read_config; - rte_pci_register; - rte_pci_scan; - rte_pci_unmap_device; - rte_pci_unregister; - rte_pci_write_config; - rte_vdev_init; - rte_vdev_register; - rte_vdev_uninit; - rte_vdev_unregister; - vfio_get_container_fd; - vfio_get_group_fd; - vfio_get_group_no; - -} DPDK_17.02; - -DPDK_17.08 { - global: - - rte_bus_find; - rte_bus_find_by_device; - rte_bus_find_by_name; - rte_log_get_level; - -} DPDK_17.05; - -EXPERIMENTAL { - global: - - rte_eal_devargs_insert; - rte_eal_devargs_parse; - rte_eal_devargs_remove; - rte_eal_hotplug_add; - rte_eal_hotplug_remove; - rte_service_disable_on_lcore; - rte_service_dump; - rte_service_enable_on_lcore; - rte_service_get_by_id; - rte_service_get_by_name; - rte_service_get_count; - rte_service_get_enabled_on_lcore; - rte_service_is_running; - rte_service_lcore_add; - rte_service_lcore_count; - rte_service_lcore_del; - rte_service_lcore_list; - rte_service_lcore_reset_all; - rte_service_lcore_start; - rte_service_lcore_stop; - rte_service_probe_capability; - rte_service_register; - rte_service_reset; - rte_service_set_stats_enable; - rte_service_start; - rte_service_start_with_defaults; - rte_service_stop; - rte_service_unregister; - -} DPDK_17.08; -- cgit 1.2.3-korg