aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_eal
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2017-11-08 14:15:11 +0000
committerLuca Boccassi <luca.boccassi@gmail.com>2017-11-08 14:45:54 +0000
commit055c52583a2794da8ba1e85a48cce3832372b12f (patch)
tree8ceb1cb78fbb46a0f341f8ee24feb3c6b5540013 /lib/librte_eal
parentf239aed5e674965691846e8ce3f187dd47523689 (diff)
New upstream version 17.11-rc3
Change-Id: I6a5baa40612fe0c20f30b5fa773a6cbbac63a685 Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'lib/librte_eal')
-rw-r--r--lib/librte_eal/bsdapp/eal/Makefile11
-rw-r--r--lib/librte_eal/bsdapp/eal/eal.c112
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_interrupts.c35
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_memory.c21
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_pci.c670
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_thread.c1
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_timer.c1
-rw-r--r--lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h107
-rw-r--r--lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h137
-rw-r--r--lib/librte_eal/bsdapp/eal/rte_eal_version.map239
-rw-r--r--lib/librte_eal/common/Makefile10
-rw-r--r--lib/librte_eal/common/arch/arm/rte_cpuflags.c2
-rw-r--r--lib/librte_eal/common/arch/arm/rte_cycles.c45
-rw-r--r--lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c2
-rw-r--r--lib/librte_eal/common/arch/ppc_64/rte_cycles.c52
-rw-r--r--lib/librte_eal/common/arch/x86/rte_cpuflags.c40
-rw-r--r--lib/librte_eal/common/arch/x86/rte_cycles.c152
-rw-r--r--lib/librte_eal/common/arch/x86/rte_memcpy.c58
-rw-r--r--lib/librte_eal/common/arch/x86/rte_spinlock.c3
-rw-r--r--lib/librte_eal/common/eal_common_bus.c49
-rw-r--r--lib/librte_eal/common/eal_common_dev.c22
-rw-r--r--lib/librte_eal/common/eal_common_errno.c22
-rw-r--r--lib/librte_eal/common/eal_common_launch.c1
-rw-r--r--lib/librte_eal/common/eal_common_log.c43
-rw-r--r--lib/librte_eal/common/eal_common_memory.c5
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c6
-rw-r--r--lib/librte_eal/common/eal_common_options.c11
-rw-r--r--lib/librte_eal/common/eal_common_pci.c580
-rw-r--r--lib/librte_eal/common/eal_common_pci_uio.c233
-rw-r--r--lib/librte_eal/common/eal_common_tailqs.c1
-rw-r--r--lib/librte_eal/common/eal_common_thread.c14
-rw-r--r--lib/librte_eal/common/eal_common_timer.c8
-rw-r--r--lib/librte_eal/common/eal_common_vdev.c342
-rw-r--r--lib/librte_eal/common/eal_internal_cfg.h3
-rw-r--r--lib/librte_eal/common/eal_options.h4
-rw-r--r--lib/librte_eal/common/eal_private.h155
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_vect.h2
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic_32.h2
-rw-r--r--lib/librte_eal/common/include/rte_bitmap.h561
-rw-r--r--lib/librte_eal/common/include/rte_bus.h42
-rw-r--r--lib/librte_eal/common/include/rte_common.h23
-rw-r--r--lib/librte_eal/common/include/rte_debug.h2
-rw-r--r--lib/librte_eal/common/include/rte_dev.h31
-rw-r--r--lib/librte_eal/common/include/rte_eal.h52
-rw-r--r--lib/librte_eal/common/include/rte_eal_interrupts.h (renamed from lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h)25
-rw-r--r--lib/librte_eal/common/include/rte_interrupts.h2
-rw-r--r--lib/librte_eal/common/include/rte_lcore.h14
-rw-r--r--lib/librte_eal/common/include/rte_log.h30
-rw-r--r--lib/librte_eal/common/include/rte_malloc.h17
-rw-r--r--lib/librte_eal/common/include/rte_memory.h103
-rw-r--r--lib/librte_eal/common/include/rte_memzone.h6
-rw-r--r--lib/librte_eal/common/include/rte_pci.h598
-rw-r--r--lib/librte_eal/common/include/rte_service.h197
-rw-r--r--lib/librte_eal/common/include/rte_service_component.h36
-rw-r--r--lib/librte_eal/common/include/rte_vdev.h131
-rw-r--r--lib/librte_eal/common/include/rte_version.h6
-rw-r--r--lib/librte_eal/common/include/rte_vfio.h153
-rw-r--r--lib/librte_eal/common/malloc_elem.c8
-rw-r--r--lib/librte_eal/common/malloc_elem.h4
-rw-r--r--lib/librte_eal/common/rte_malloc.c21
-rw-r--r--lib/librte_eal/common/rte_service.c362
-rw-r--r--lib/librte_eal/linuxapp/Makefile2
-rw-r--r--lib/librte_eal/linuxapp/eal/Makefile20
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c117
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_alarm.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_hugepage_info.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_interrupts.c21
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_log.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c99
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci.c722
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_init.h97
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_uio.c567
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_vfio.c674
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_thread.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_timer.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.c75
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.h49
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c7
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_xen_memory.c381
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h108
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/compat.h21
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/igb_uio.c315
-rw-r--r--lib/librte_eal/linuxapp/kni/compat.h31
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h24
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/Makefile53
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/compat.h15
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h107
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c780
-rw-r--r--lib/librte_eal/rte_eal_version.map (renamed from lib/librte_eal/linuxapp/eal/rte_eal_version.map)72
89 files changed, 2353 insertions, 7634 deletions
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index 005019ed..afa117de 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -46,16 +46,15 @@ LDLIBS += -lexecinfo
LDLIBS += -lpthread
LDLIBS += -lgcc_s
-EXPORT_MAP := rte_eal_version.map
+EXPORT_MAP := ../../rte_eal_version.map
-LIBABIVER := 5
+LIBABIVER := 6
# specific to bsdapp exec-env
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c
@@ -68,9 +67,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_vdev.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c
@@ -92,6 +88,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c
# from arch dir
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c
SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c
+SRCS-y += rte_cycles.c
CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
@@ -107,7 +104,7 @@ CFLAGS_eal_thread.o += -Wno-return-type
CFLAGS_eal_hpet.o += -Wno-return-type
endif
-INC := rte_interrupts.h
+INC := # no bsdapp specific headers
SYMLINK-$(CONFIG_RTE_EXEC_ENV_BSDAPP)-include/exec-env := \
$(addprefix include/exec-env/,$(INC))
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 5fa59884..369a682a 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -51,7 +51,6 @@
#include <rte_common.h>
#include <rte_debug.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
@@ -66,7 +65,6 @@
#include <rte_cpuflags.h>
#include <rte_interrupts.h>
#include <rte_bus.h>
-#include <rte_pci.h>
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_version.h>
@@ -112,6 +110,13 @@ struct internal_config internal_config;
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* Return mbuf pool ops name */
+const char *
+rte_eal_mbuf_default_mempool_ops(void)
+{
+ return internal_config.mbuf_pool_ops_name;
+}
+
/* Return a pointer to the configuration structure */
struct rte_config *
rte_eal_get_configuration(void)
@@ -119,6 +124,12 @@ rte_eal_get_configuration(void)
return &rte_config;
}
+enum rte_iova_mode
+rte_eal_iova_mode(void)
+{
+ return rte_eal_get_configuration()->iova_mode;
+}
+
/* parse a sysfs (or other) file containing one integer value */
int
eal_parse_sysfs_value(const char *filename, unsigned long *val)
@@ -385,6 +396,9 @@ eal_parse_args(int argc, char **argv)
continue;
switch (opt) {
+ case OPT_MBUF_POOL_OPS_NAME_NUM:
+ internal_config.mbuf_pool_ops_name = optarg;
+ break;
case 'h':
eal_usage(prgname);
exit(EXIT_SUCCESS);
@@ -535,6 +549,29 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ if (eal_plugins_init() < 0) {
+ rte_eal_init_alert("Cannot init plugins\n");
+ rte_errno = EINVAL;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (eal_option_device_parse()) {
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (rte_bus_scan()) {
+ rte_eal_init_alert("Cannot scan the buses for devices\n");
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ /* autodetect the iova mapping mode (default is iova_pa) */
+ rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
+
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
eal_hugepage_info_init() < 0) {
@@ -603,9 +640,6 @@ rte_eal_init(int argc, char **argv)
eal_check_mem_on_local_socket();
- if (eal_plugins_init() < 0)
- rte_eal_init_alert("Cannot init plugins\n");
-
eal_thread_init_master(rte_config.master_lcore);
ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
@@ -614,17 +648,6 @@ rte_eal_init(int argc, char **argv)
rte_config.master_lcore, thread_id, cpuset,
ret == 0 ? "" : "...");
- if (eal_option_device_parse()) {
- rte_errno = ENODEV;
- return -1;
- }
-
- if (rte_bus_scan()) {
- rte_eal_init_alert("Cannot scan the buses for devices\n");
- rte_errno = ENODEV;
- return -1;
- }
-
RTE_LCORE_FOREACH_SLAVE(i) {
/*
@@ -698,3 +721,60 @@ rte_eal_process_type(void)
{
return rte_config.process_type;
}
+
+int rte_eal_has_pci(void)
+{
+ return !internal_config.no_pci;
+}
+
+int rte_eal_create_uio_dev(void)
+{
+ return internal_config.create_uio_dev;
+}
+
+enum rte_intr_mode
+rte_eal_vfio_intr_mode(void)
+{
+ return RTE_INTR_MODE_NONE;
+}
+
+/* dummy forward declaration. */
+struct vfio_device_info;
+
+/* dummy prototypes. */
+int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+ int *vfio_dev_fd, struct vfio_device_info *device_info);
+int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
+int rte_vfio_enable(const char *modname);
+int rte_vfio_is_enabled(const char *modname);
+int rte_vfio_noiommu_is_enabled(void);
+
+int rte_vfio_setup_device(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int *vfio_dev_fd,
+ __rte_unused struct vfio_device_info *device_info)
+{
+ return -1;
+}
+
+int rte_vfio_release_device(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int fd)
+{
+ return -1;
+}
+
+int rte_vfio_enable(__rte_unused const char *modname)
+{
+ return -1;
+}
+
+int rte_vfio_is_enabled(__rte_unused const char *modname)
+{
+ return 0;
+}
+
+int rte_vfio_noiommu_is_enabled(void)
+{
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
index ea2afff4..deba8770 100644
--- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
@@ -125,3 +125,38 @@ rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
RTE_SET_USED(intr_handle);
return 0;
}
+
+int
+rte_epoll_wait(int epfd, struct rte_epoll_event *events,
+ int maxevents, int timeout)
+{
+ RTE_SET_USED(epfd);
+ RTE_SET_USED(events);
+ RTE_SET_USED(maxevents);
+ RTE_SET_USED(timeout);
+
+ return -ENOTSUP;
+}
+
+int
+rte_epoll_ctl(int epfd, int op, int fd, struct rte_epoll_event *event)
+{
+ RTE_SET_USED(epfd);
+ RTE_SET_USED(op);
+ RTE_SET_USED(fd);
+ RTE_SET_USED(event);
+
+ return -ENOTSUP;
+}
+
+int
+rte_intr_tls_epfd(void)
+{
+ return -ENOTSUP;
+}
+
+void
+rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle)
+{
+ RTE_SET_USED(intr_handle);
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
index 3614da8d..6ba05857 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memory.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -54,9 +54,14 @@ phys_addr_t
rte_mem_virt2phy(const void *virtaddr)
{
/* XXX not implemented. This function is only used by
- * rte_mempool_virt2phy() when hugepages are disabled. */
+ * rte_mempool_virt2iova() when hugepages are disabled. */
(void)virtaddr;
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
+}
+rte_iova_t
+rte_mem_virt2iova(const void *virtaddr)
+{
+ return rte_mem_virt2phy(virtaddr);
}
int
@@ -73,7 +78,7 @@ rte_eal_hugepage_init(void)
/* for debug purposes, hugetlbfs can be disabled */
if (internal_config.no_hugetlbfs) {
addr = malloc(internal_config.memory);
- mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr;
+ mcfg->memseg[0].iova = (rte_iova_t)(uintptr_t)addr;
mcfg->memseg[0].addr = addr;
mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
mcfg->memseg[0].len = internal_config.memory;
@@ -88,7 +93,7 @@ rte_eal_hugepage_init(void)
hpi = &internal_config.hugepage_info[i];
for (j = 0; j < hpi->num_pages[0]; j++) {
struct rte_memseg *seg;
- uint64_t physaddr;
+ rte_iova_t physaddr;
int error;
size_t sysctl_size = sizeof(physaddr);
char physaddr_str[64];
@@ -114,7 +119,7 @@ rte_eal_hugepage_init(void)
seg = &mcfg->memseg[seg_idx++];
seg->addr = addr;
- seg->phys_addr = physaddr;
+ seg->iova = physaddr;
seg->hugepage_sz = hpi->hugepage_sz;
seg->len = hpi->hugepage_sz;
seg->nchannel = mcfg->nchannel;
@@ -192,3 +197,9 @@ error:
close(fd_hugepage);
return -1;
}
+
+int
+rte_eal_using_phys_addrs(void)
+{
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
deleted file mode 100644
index 04eacdcc..00000000
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ /dev/null
@@ -1,670 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <ctype.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <dirent.h>
-#include <limits.h>
-#include <sys/queue.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <sys/pciio.h>
-#include <dev/pci/pcireg.h>
-
-#if defined(RTE_ARCH_X86)
-#include <machine/cpufunc.h>
-#endif
-
-#include <rte_interrupts.h>
-#include <rte_log.h>
-#include <rte_pci.h>
-#include <rte_common.h>
-#include <rte_launch.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_eal.h>
-#include <rte_eal_memconfig.h>
-#include <rte_per_lcore.h>
-#include <rte_lcore.h>
-#include <rte_malloc.h>
-#include <rte_string_fns.h>
-#include <rte_debug.h>
-#include <rte_devargs.h>
-
-#include "eal_filesystem.h"
-#include "eal_private.h"
-
-/**
- * @file
- * PCI probing under linux
- *
- * This code is used to simulate a PCI probe by parsing information in
- * sysfs. Moreover, when a registered driver matches a device, the
- * kernel driver currently using it is unloaded and replaced by
- * igb_uio module, which is a very minimal userland driver for Intel
- * network card, only providing access to PCI BAR to applications, and
- * enabling bus master.
- */
-
-extern struct rte_pci_bus rte_pci_bus;
-
-/* Map pci device */
-int
-rte_pci_map_device(struct rte_pci_device *dev)
-{
- int ret = -1;
-
- /* try mapping the NIC resources */
- switch (dev->kdrv) {
- case RTE_KDRV_NIC_UIO:
- /* map resources for devices that use uio */
- ret = pci_uio_map_resource(dev);
- break;
- default:
- RTE_LOG(DEBUG, EAL,
- " Not managed by a supported kernel driver, skipped\n");
- ret = 1;
- break;
- }
-
- return ret;
-}
-
-/* Unmap pci device */
-void
-rte_pci_unmap_device(struct rte_pci_device *dev)
-{
- /* try unmapping the NIC resources */
- switch (dev->kdrv) {
- case RTE_KDRV_NIC_UIO:
- /* unmap resources for devices that use uio */
- pci_uio_unmap_resource(dev);
- break;
- default:
- RTE_LOG(DEBUG, EAL,
- " Not managed by a supported kernel driver, skipped\n");
- break;
- }
-}
-
-void
-pci_uio_free_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource *uio_res)
-{
- rte_free(uio_res);
-
- if (dev->intr_handle.fd) {
- close(dev->intr_handle.fd);
- dev->intr_handle.fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
- }
-}
-
-int
-pci_uio_alloc_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource **uio_res)
-{
- char devname[PATH_MAX]; /* contains the /dev/uioX */
- struct rte_pci_addr *loc;
-
- loc = &dev->addr;
-
- snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u",
- dev->addr.bus, dev->addr.devid, dev->addr.function);
-
- if (access(devname, O_RDWR) < 0) {
- RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, "
- "skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
- return 1;
- }
-
- /* save fd if in primary process */
- dev->intr_handle.fd = open(devname, O_RDWR);
- if (dev->intr_handle.fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- devname, strerror(errno));
- goto error;
- }
- dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
-
- /* allocate the mapping details for secondary processes*/
- *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
- if (*uio_res == NULL) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot store uio mmap details\n", __func__);
- goto error;
- }
-
- snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname);
- memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
-
- return 0;
-
-error:
- pci_uio_free_resource(dev, *uio_res);
- return -1;
-}
-
-int
-pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
- struct mapped_pci_resource *uio_res, int map_idx)
-{
- int fd;
- char *devname;
- void *mapaddr;
- uint64_t offset;
- uint64_t pagesz;
- struct pci_map *maps;
-
- maps = uio_res->maps;
- devname = uio_res->path;
- pagesz = sysconf(_SC_PAGESIZE);
-
- /* allocate memory to keep path */
- maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
- if (maps[map_idx].path == NULL) {
- RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
- strerror(errno));
- return -1;
- }
-
- /*
- * open resource file, to mmap it
- */
- fd = open(devname, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- devname, strerror(errno));
- goto error;
- }
-
- /* if matching map is found, then use it */
- offset = res_idx * pagesz;
- mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
- (size_t)dev->mem_resource[res_idx].len, 0);
- close(fd);
- if (mapaddr == MAP_FAILED)
- goto error;
-
- maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
- maps[map_idx].size = dev->mem_resource[res_idx].len;
- maps[map_idx].addr = mapaddr;
- maps[map_idx].offset = offset;
- strcpy(maps[map_idx].path, devname);
- dev->mem_resource[res_idx].addr = mapaddr;
-
- return 0;
-
-error:
- rte_free(maps[map_idx].path);
- return -1;
-}
-
-static int
-pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
-{
- struct rte_pci_device *dev;
- struct pci_bar_io bar;
- unsigned i, max;
-
- dev = malloc(sizeof(*dev));
- if (dev == NULL) {
- return -1;
- }
-
- memset(dev, 0, sizeof(*dev));
- dev->addr.domain = conf->pc_sel.pc_domain;
- dev->addr.bus = conf->pc_sel.pc_bus;
- dev->addr.devid = conf->pc_sel.pc_dev;
- dev->addr.function = conf->pc_sel.pc_func;
-
- /* get vendor id */
- dev->id.vendor_id = conf->pc_vendor;
-
- /* get device id */
- dev->id.device_id = conf->pc_device;
-
- /* get subsystem_vendor id */
- dev->id.subsystem_vendor_id = conf->pc_subvendor;
-
- /* get subsystem_device id */
- dev->id.subsystem_device_id = conf->pc_subdevice;
-
- /* get class id */
- dev->id.class_id = (conf->pc_class << 16) |
- (conf->pc_subclass << 8) |
- (conf->pc_progif);
-
- /* TODO: get max_vfs */
- dev->max_vfs = 0;
-
- /* FreeBSD has no NUMA support (yet) */
- dev->device.numa_node = 0;
-
- pci_name_set(dev);
-
- /* FreeBSD has only one pass through driver */
- dev->kdrv = RTE_KDRV_NIC_UIO;
-
- /* parse resources */
- switch (conf->pc_hdr & PCIM_HDRTYPE) {
- case PCIM_HDRTYPE_NORMAL:
- max = PCIR_MAX_BAR_0;
- break;
- case PCIM_HDRTYPE_BRIDGE:
- max = PCIR_MAX_BAR_1;
- break;
- case PCIM_HDRTYPE_CARDBUS:
- max = PCIR_MAX_BAR_2;
- break;
- default:
- goto skipdev;
- }
-
- for (i = 0; i <= max; i++) {
- bar.pbi_sel = conf->pc_sel;
- bar.pbi_reg = PCIR_BAR(i);
- if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0)
- continue;
-
- dev->mem_resource[i].len = bar.pbi_length;
- if (PCI_BAR_IO(bar.pbi_base)) {
- dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf));
- continue;
- }
- dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf);
- }
-
- /* device is valid, add in list (sorted) */
- if (TAILQ_EMPTY(&rte_pci_bus.device_list)) {
- rte_pci_add_device(dev);
- }
- else {
- struct rte_pci_device *dev2 = NULL;
- int ret;
-
- TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) {
- ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
- if (ret > 0)
- continue;
- else if (ret < 0) {
- rte_pci_insert_device(dev2, dev);
- } else { /* already registered */
- dev2->kdrv = dev->kdrv;
- dev2->max_vfs = dev->max_vfs;
- pci_name_set(dev2);
- memmove(dev2->mem_resource,
- dev->mem_resource,
- sizeof(dev->mem_resource));
- free(dev);
- }
- return 0;
- }
- rte_pci_add_device(dev);
- }
-
- return 0;
-
-skipdev:
- free(dev);
- return 0;
-}
-
-/*
- * Scan the content of the PCI bus, and add the devices in the devices
- * list. Call pci_scan_one() for each pci entry found.
- */
-int
-rte_pci_scan(void)
-{
- int fd;
- unsigned dev_count = 0;
- struct pci_conf matches[16];
- struct pci_conf_io conf_io = {
- .pat_buf_len = 0,
- .num_patterns = 0,
- .patterns = NULL,
- .match_buf_len = sizeof(matches),
- .matches = &matches[0],
- };
-
- /* for debug purposes, PCI can be disabled */
- if (internal_config.no_pci)
- return 0;
-
- fd = open("/dev/pci", O_RDONLY);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
- goto error;
- }
-
- do {
- unsigned i;
- if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
- RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
- __func__, strerror(errno));
- goto error;
- }
-
- for (i = 0; i < conf_io.num_matches; i++)
- if (pci_scan_one(fd, &matches[i]) < 0)
- goto error;
-
- dev_count += conf_io.num_matches;
- } while(conf_io.status == PCI_GETCONF_MORE_DEVS);
-
- close(fd);
-
- RTE_LOG(DEBUG, EAL, "PCI scan found %u devices\n", dev_count);
- return 0;
-
-error:
- if (fd >= 0)
- close(fd);
- return -1;
-}
-
-int
-pci_update_device(const struct rte_pci_addr *addr)
-{
- int fd;
- struct pci_conf matches[2];
- struct pci_match_conf match = {
- .pc_sel = {
- .pc_domain = addr->domain,
- .pc_bus = addr->bus,
- .pc_dev = addr->devid,
- .pc_func = addr->function,
- },
- };
- struct pci_conf_io conf_io = {
- .pat_buf_len = 0,
- .num_patterns = 1,
- .patterns = &match,
- .match_buf_len = sizeof(matches),
- .matches = &matches[0],
- };
-
- fd = open("/dev/pci", O_RDONLY);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
- goto error;
- }
-
- if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
- RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
- __func__, strerror(errno));
- goto error;
- }
-
- if (conf_io.num_matches != 1)
- goto error;
-
- if (pci_scan_one(fd, &matches[0]) < 0)
- goto error;
-
- close(fd);
-
- return 0;
-
-error:
- if (fd >= 0)
- close(fd);
- return -1;
-}
-
-/* Read PCI config space. */
-int rte_pci_read_config(const struct rte_pci_device *dev,
- void *buf, size_t len, off_t offset)
-{
- int fd = -1;
- int size;
- struct pci_io pi = {
- .pi_sel = {
- .pc_domain = dev->addr.domain,
- .pc_bus = dev->addr.bus,
- .pc_dev = dev->addr.devid,
- .pc_func = dev->addr.function,
- },
- .pi_reg = offset,
- };
-
- fd = open("/dev/pci", O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
- goto error;
- }
-
- while (len > 0) {
- size = (len >= 4) ? 4 : ((len >= 2) ? 2 : 1);
- pi.pi_width = size;
-
- if (ioctl(fd, PCIOCREAD, &pi) < 0)
- goto error;
- memcpy(buf, &pi.pi_data, size);
-
- buf = (char *)buf + size;
- pi.pi_reg += size;
- len -= size;
- }
- close(fd);
-
- return 0;
-
- error:
- if (fd >= 0)
- close(fd);
- return -1;
-}
-
-/* Write PCI config space. */
-int rte_pci_write_config(const struct rte_pci_device *dev,
- const void *buf, size_t len, off_t offset)
-{
- int fd = -1;
-
- struct pci_io pi = {
- .pi_sel = {
- .pc_domain = dev->addr.domain,
- .pc_bus = dev->addr.bus,
- .pc_dev = dev->addr.devid,
- .pc_func = dev->addr.function,
- },
- .pi_reg = offset,
- .pi_data = *(const uint32_t *)buf,
- .pi_width = len,
- };
-
- if (len == 3 || len > sizeof(pi.pi_data)) {
- RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__);
- goto error;
- }
-
- memcpy(&pi.pi_data, buf, len);
-
- fd = open("/dev/pci", O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
- goto error;
- }
-
- if (ioctl(fd, PCIOCWRITE, &pi) < 0)
- goto error;
-
- close(fd);
- return 0;
-
- error:
- if (fd >= 0)
- close(fd);
- return -1;
-}
-
-int
-rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
-{
- int ret;
-
- switch (dev->kdrv) {
-#if defined(RTE_ARCH_X86)
- case RTE_KDRV_NIC_UIO:
- if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) {
- p->base = (uintptr_t)dev->mem_resource[bar].addr;
- ret = 0;
- } else
- ret = -1;
- break;
-#endif
- default:
- ret = -1;
- break;
- }
-
- if (!ret)
- p->dev = dev;
-
- return ret;
-}
-
-static void
-pci_uio_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
-{
-#if defined(RTE_ARCH_X86)
- uint8_t *d;
- int size;
- unsigned short reg = p->base + offset;
-
- for (d = data; len > 0; d += size, reg += size, len -= size) {
- if (len >= 4) {
- size = 4;
- *(uint32_t *)d = inl(reg);
- } else if (len >= 2) {
- size = 2;
- *(uint16_t *)d = inw(reg);
- } else {
- size = 1;
- *d = inb(reg);
- }
- }
-#else
- RTE_SET_USED(p);
- RTE_SET_USED(data);
- RTE_SET_USED(len);
- RTE_SET_USED(offset);
-#endif
-}
-
-void
-rte_pci_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
-{
- switch (p->dev->kdrv) {
- case RTE_KDRV_NIC_UIO:
- pci_uio_ioport_read(p, data, len, offset);
- break;
- default:
- break;
- }
-}
-
-static void
-pci_uio_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
-{
-#if defined(RTE_ARCH_X86)
- const uint8_t *s;
- int size;
- unsigned short reg = p->base + offset;
-
- for (s = data; len > 0; s += size, reg += size, len -= size) {
- if (len >= 4) {
- size = 4;
- outl(reg, *(const uint32_t *)s);
- } else if (len >= 2) {
- size = 2;
- outw(reg, *(const uint16_t *)s);
- } else {
- size = 1;
- outb(reg, *s);
- }
- }
-#else
- RTE_SET_USED(p);
- RTE_SET_USED(data);
- RTE_SET_USED(len);
- RTE_SET_USED(offset);
-#endif
-}
-
-void
-rte_pci_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
-{
- switch (p->dev->kdrv) {
- case RTE_KDRV_NIC_UIO:
- pci_uio_ioport_write(p, data, len, offset);
- break;
- default:
- break;
- }
-}
-
-int
-rte_pci_ioport_unmap(struct rte_pci_ioport *p)
-{
- int ret;
-
- switch (p->dev->kdrv) {
-#if defined(RTE_ARCH_X86)
- case RTE_KDRV_NIC_UIO:
- ret = 0;
- break;
-#endif
- default:
- ret = -1;
- break;
- }
-
- return ret;
-}
diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
index 783d68c5..2a2136a2 100644
--- a/lib/librte_eal/bsdapp/eal/eal_thread.c
+++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
@@ -46,7 +46,6 @@
#include <rte_launch.h>
#include <rte_log.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_per_lcore.h>
#include <rte_eal.h>
#include <rte_lcore.h>
diff --git a/lib/librte_eal/bsdapp/eal/eal_timer.c b/lib/librte_eal/bsdapp/eal/eal_timer.c
index f12d9bd2..14421943 100644
--- a/lib/librte_eal/bsdapp/eal/eal_timer.c
+++ b/lib/librte_eal/bsdapp/eal/eal_timer.c
@@ -42,7 +42,6 @@
#include <rte_log.h>
#include <rte_cycles.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_debug.h>
diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h
deleted file mode 100644
index 99a33432..00000000
--- a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*-
- * This file is provided under a dual BSD/LGPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GNU LESSER GENERAL PUBLIC LICENSE
- *
- * Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Contact Information:
- * Intel Corporation
- *
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _RTE_DOM0_COMMON_H_
-#define _RTE_DOM0_COMMON_H_
-
-#ifdef __KERNEL__
-#include <linux/if.h>
-#endif
-
-#define DOM0_NAME_MAX 256
-#define DOM0_MM_DEV "/dev/dom0_mm"
-
-#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */
-#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */
-#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */
-#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */
-#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
-
-#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info)
-#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *)
-#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int)
-#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *)
-
-/**
- * A structure used to store memory information.
- */
-struct memory_info {
- char name[DOM0_NAME_MAX];
- uint64_t size;
-};
-
-/**
- * A structure used to store memory segment information.
- */
-struct memseg_info {
- uint32_t idx;
- uint64_t pfn;
- uint64_t size;
- uint64_t mfn[DOM0_NUM_MEMBLOCK];
-};
-
-/**
- * A structure used to store memory block information.
- */
-struct memblock_info {
- uint8_t exchange_flag;
- uint64_t vir_addr;
- uint64_t pfn;
- uint64_t mfn;
-};
-#endif /* _RTE_DOM0_COMMON_H_ */
diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
deleted file mode 100644
index c1995ee1..00000000
--- a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RTE_INTERRUPTS_H_
-#error "don't include this file directly, please include generic <rte_interrupts.h>"
-#endif
-
-#ifndef _RTE_BSDAPP_INTERRUPTS_H_
-#define _RTE_BSDAPP_INTERRUPTS_H_
-
-#define RTE_INTR_VEC_ZERO_OFFSET 0
-#define RTE_INTR_VEC_RXTX_OFFSET 1
-
-#define RTE_MAX_RXTX_INTR_VEC_ID 32
-
-enum rte_intr_handle_type {
- RTE_INTR_HANDLE_UNKNOWN = 0,
- RTE_INTR_HANDLE_UIO, /**< uio device handle */
- RTE_INTR_HANDLE_ALARM, /**< alarm handle */
- RTE_INTR_HANDLE_MAX
-};
-
-/** Handle for interrupts. */
-struct rte_intr_handle {
- int fd; /**< file descriptor */
- int uio_cfg_fd; /**< UIO config file descriptor */
- enum rte_intr_handle_type type; /**< handle type */
- int max_intr; /**< max interrupt requested */
- uint32_t nb_efd; /**< number of available efds */
- int *intr_vec; /**< intr vector number array */
-};
-
-/**
- * @param intr_handle
- * Pointer to the interrupt handle.
- * @param epfd
- * Epoll instance fd which the intr vector associated to.
- * @param op
- * The operation be performed for the vector.
- * Operation type of {ADD, DEL}.
- * @param vec
- * RX intr vector number added to the epoll instance wait list.
- * @param data
- * User raw data.
- * @return
- * - On success, zero.
- * - On failure, a negative value.
- */
-int
-rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
- int epfd, int op, unsigned int vec, void *data);
-
-/**
- * It enables the fastpath event fds if it's necessary.
- * It creates event fds when multi-vectors allowed,
- * otherwise it multiplexes the single event fds.
- *
- * @param intr_handle
- * Pointer to the interrupt handle.
- * @param nb_efd
- * Number of interrupt vector trying to enable.
- * The value 0 is not allowed.
- * @return
- * - On success, zero.
- * - On failure, a negative value.
- */
-int
-rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd);
-
-/**
- * It disable the fastpath event fds.
- * It deletes registered eventfds and closes the open fds.
- *
- * @param intr_handle
- * Pointer to the interrupt handle.
- */
-void
-rte_intr_efd_disable(struct rte_intr_handle *intr_handle);
-
-/**
- * The fastpath interrupt is enabled or not.
- *
- * @param intr_handle
- * Pointer to the interrupt handle.
- */
-int rte_intr_dp_is_en(struct rte_intr_handle *intr_handle);
-
-/**
- * The interrupt handle instance allows other cause or not.
- * Other cause stands for none fastpath interrupt.
- *
- * @param intr_handle
- * Pointer to the interrupt handle.
- */
-int rte_intr_allow_others(struct rte_intr_handle *intr_handle);
-
-/**
- * The multiple interrupt vector capability of interrupt handle instance.
- * It returns zero if no multiple interrupt vector support.
- *
- * @param intr_handle
- * Pointer to the interrupt handle.
- */
-int
-rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
-
-#endif /* _RTE_BSDAPP_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
deleted file mode 100644
index aac6fd77..00000000
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ /dev/null
@@ -1,239 +0,0 @@
-DPDK_2.0 {
- global:
-
- __rte_panic;
- devargs_list;
- eal_parse_sysfs_value;
- eal_timer_source;
- lcore_config;
- per_lcore__lcore_id;
- per_lcore__rte_errno;
- rte_calloc;
- rte_calloc_socket;
- rte_cpu_check_supported;
- rte_cpu_get_flag_enabled;
- rte_cycles_vmware_tsc_map;
- rte_delay_us;
- rte_dump_physmem_layout;
- rte_dump_registers;
- rte_dump_stack;
- rte_dump_tailq;
- rte_eal_alarm_cancel;
- rte_eal_alarm_set;
- rte_eal_devargs_add;
- rte_eal_devargs_dump;
- rte_eal_devargs_type_count;
- rte_eal_get_configuration;
- rte_eal_get_lcore_state;
- rte_eal_get_physmem_layout;
- rte_eal_get_physmem_size;
- rte_eal_has_hugepages;
- rte_eal_hpet_init;
- rte_eal_init;
- rte_eal_iopl_init;
- rte_eal_lcore_role;
- rte_eal_mp_remote_launch;
- rte_eal_mp_wait_lcore;
- rte_eal_parse_devargs_str;
- rte_eal_process_type;
- rte_eal_remote_launch;
- rte_eal_tailq_lookup;
- rte_eal_tailq_register;
- rte_eal_wait_lcore;
- rte_exit;
- rte_free;
- rte_get_hpet_cycles;
- rte_get_hpet_hz;
- rte_get_log_level;
- rte_get_log_type;
- rte_get_tsc_hz;
- rte_hexdump;
- rte_intr_callback_register;
- rte_intr_callback_unregister;
- rte_intr_disable;
- rte_intr_enable;
- rte_log;
- rte_log_cur_msg_loglevel;
- rte_log_cur_msg_logtype;
- rte_logs;
- rte_malloc;
- rte_malloc_dump_stats;
- rte_malloc_get_socket_stats;
- rte_malloc_set_limit;
- rte_malloc_socket;
- rte_malloc_validate;
- rte_malloc_virt2phy;
- rte_mem_lock_page;
- rte_mem_phy2mch;
- rte_mem_virt2phy;
- rte_memdump;
- rte_memory_get_nchannel;
- rte_memory_get_nrank;
- rte_memzone_dump;
- rte_memzone_lookup;
- rte_memzone_reserve;
- rte_memzone_reserve_aligned;
- rte_memzone_reserve_bounded;
- rte_memzone_walk;
- rte_openlog_stream;
- rte_realloc;
- rte_set_application_usage_hook;
- rte_set_log_level;
- rte_set_log_type;
- rte_socket_id;
- rte_strerror;
- rte_strsplit;
- rte_sys_gettid;
- rte_thread_get_affinity;
- rte_thread_set_affinity;
- rte_vlog;
- rte_xen_dom0_memory_attach;
- rte_xen_dom0_memory_init;
- rte_zmalloc;
- rte_zmalloc_socket;
-
- local: *;
-};
-
-DPDK_2.1 {
- global:
-
- rte_intr_allow_others;
- rte_intr_dp_is_en;
- rte_intr_efd_disable;
- rte_intr_efd_enable;
- rte_intr_rx_ctl;
- rte_memzone_free;
-
-} DPDK_2.0;
-
-DPDK_2.2 {
- global:
-
- rte_intr_cap_multiple;
- rte_keepalive_create;
- rte_keepalive_dispatch_pings;
- rte_keepalive_mark_alive;
- rte_keepalive_register_core;
- rte_xen_dom0_supported;
-
-} DPDK_2.1;
-
-DPDK_16.04 {
- global:
-
- rte_cpu_get_flag_name;
- rte_eal_primary_proc_alive;
-
-} DPDK_2.2;
-
-DPDK_16.07 {
- global:
-
- pci_get_sysfs_path;
- rte_keepalive_mark_sleep;
- rte_keepalive_register_relay_callback;
- rte_rtm_supported;
- rte_thread_setname;
-
-} DPDK_16.04;
-
-DPDK_16.11 {
- global:
-
- rte_delay_us_block;
- rte_delay_us_callback_register;
- rte_eal_dev_attach;
- rte_eal_dev_detach;
-
-} DPDK_16.07;
-
-DPDK_17.02 {
- global:
-
- rte_bus_dump;
- rte_bus_probe;
- rte_bus_register;
- rte_bus_scan;
- rte_bus_unregister;
-
-} DPDK_16.11;
-
-DPDK_17.05 {
- global:
-
- rte_cpu_is_supported;
- rte_log_dump;
- rte_log_register;
- rte_log_get_global_level;
- rte_log_set_global_level;
- rte_log_set_level;
- rte_log_set_level_regexp;
- rte_pci_detach;
- rte_pci_dump;
- rte_pci_ioport_map;
- rte_pci_ioport_read;
- rte_pci_ioport_unmap;
- rte_pci_ioport_write;
- rte_pci_map_device;
- rte_pci_probe;
- rte_pci_probe_one;
- rte_pci_read_config;
- rte_pci_register;
- rte_pci_scan;
- rte_pci_unmap_device;
- rte_pci_unregister;
- rte_pci_write_config;
- rte_vdev_init;
- rte_vdev_register;
- rte_vdev_uninit;
- rte_vdev_unregister;
- vfio_get_container_fd;
- vfio_get_group_fd;
- vfio_get_group_no;
-
-} DPDK_17.02;
-
-DPDK_17.08 {
- global:
-
- rte_bus_find;
- rte_bus_find_by_device;
- rte_bus_find_by_name;
- rte_log_get_level;
-
-} DPDK_17.05;
-
-EXPERIMENTAL {
- global:
-
- rte_eal_devargs_insert;
- rte_eal_devargs_parse;
- rte_eal_devargs_remove;
- rte_eal_hotplug_add;
- rte_eal_hotplug_remove;
- rte_service_disable_on_lcore;
- rte_service_dump;
- rte_service_enable_on_lcore;
- rte_service_get_by_id;
- rte_service_get_by_name;
- rte_service_get_count;
- rte_service_get_enabled_on_lcore;
- rte_service_is_running;
- rte_service_lcore_add;
- rte_service_lcore_count;
- rte_service_lcore_del;
- rte_service_lcore_list;
- rte_service_lcore_reset_all;
- rte_service_lcore_start;
- rte_service_lcore_stop;
- rte_service_probe_capability;
- rte_service_register;
- rte_service_reset;
- rte_service_set_stats_enable;
- rte_service_start;
- rte_service_start_with_defaults;
- rte_service_stop;
- rte_service_unregister;
-
-} DPDK_17.08;
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index e8fd67a2..9effd0d4 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -32,16 +32,18 @@
include $(RTE_SDK)/mk/rte.vars.mk
INC := rte_branch_prediction.h rte_common.h
-INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_debug.h rte_eal.h rte_eal_interrupts.h
+INC += rte_errno.h rte_launch.h rte_lcore.h
+INC += rte_log.h rte_memory.h rte_memzone.h
INC += rte_per_lcore.h rte_random.h
INC += rte_tailq.h rte_interrupts.h rte_alarm.h
INC += rte_string_fns.h rte_version.h
INC += rte_eal_memconfig.h rte_malloc_heap.h
-INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h
+INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
INC += rte_malloc.h rte_keepalive.h rte_time.h
INC += rte_service.h rte_service_component.h
+INC += rte_bitmap.h rte_vfio.h
GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
@@ -49,7 +51,7 @@ GENERIC_INC += rte_vect.h rte_pause.h rte_io.h
# defined in mk/arch/$(RTE_ARCH)/rte.vars.mk
ARCH_DIR ?= $(RTE_ARCH)
-ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h))
+ARCH_INC := $(sort $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h)))
SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC))
SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \
diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
index 5636e9c1..88f1cbe3 100644
--- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
@@ -137,7 +137,7 @@ rte_cpu_get_features(hwcap_registers_t out)
_Elfx_auxv_t auxv;
auxv_fd = open("/proc/self/auxv", O_RDONLY);
- assert(auxv_fd);
+ assert(auxv_fd != -1);
while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) {
if (auxv.a_type == AT_HWCAP) {
out[REG_HWCAP] = auxv.a_un.a_val;
diff --git a/lib/librte_eal/common/arch/arm/rte_cycles.c b/lib/librte_eal/common/arch/arm/rte_cycles.c
new file mode 100644
index 00000000..3e31e5be
--- /dev/null
+++ b/lib/librte_eal/common/arch/arm/rte_cycles.c
@@ -0,0 +1,45 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium, Inc. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "eal_private.h"
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+#if defined RTE_ARCH_ARM64 && !defined RTE_ARM_EAL_RDTSC_USE_PMU
+ uint64_t freq;
+ asm volatile("mrs %0, cntfrq_el0" : "=r" (freq));
+ return freq;
+#else
+ return 0;
+#endif
+}
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
index fcf96e04..970a61c5 100644
--- a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
@@ -108,7 +108,7 @@ rte_cpu_get_features(hwcap_registers_t out)
Elf64_auxv_t auxv;
auxv_fd = open("/proc/self/auxv", O_RDONLY);
- assert(auxv_fd);
+ assert(auxv_fd != -1);
while (read(auxv_fd, &auxv,
sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) {
if (auxv.a_type == AT_HWCAP)
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cycles.c b/lib/librte_eal/common/arch/ppc_64/rte_cycles.c
new file mode 100644
index 00000000..69a9f747
--- /dev/null
+++ b/lib/librte_eal/common/arch/ppc_64/rte_cycles.c
@@ -0,0 +1,52 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2014.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+static const char sys_cpu_dir[] = "/sys/devices/system/cpu";
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+ unsigned long cpu_hz;
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "%s/cpu%d/cpufreq/cpuinfo_cur_freq",
+ sys_cpu_dir, rte_get_master_lcore());
+ if (eal_parse_sysfs_value(path, &cpu_hz) < 0)
+ RTE_LOG(WARNING, EAL, "Unable to parse %s\n", path);
+
+ return cpu_hz*1000;
+}
diff --git a/lib/librte_eal/common/arch/x86/rte_cpuflags.c b/lib/librte_eal/common/arch/x86/rte_cpuflags.c
index 01382571..7d4a0fef 100644
--- a/lib/librte_eal/common/arch/x86/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/x86/rte_cpuflags.c
@@ -36,6 +36,7 @@
#include <stdio.h>
#include <errno.h>
#include <stdint.h>
+#include <cpuid.h>
enum cpu_register_t {
RTE_REG_EAX = 0,
@@ -156,38 +157,12 @@ const struct feature_entry rte_cpu_feature_table[] = {
FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX, 8)
};
-/*
- * Execute CPUID instruction and get contents of a specific register
- *
- * This function, when compiled with GCC, will generate architecture-neutral
- * code, as per GCC manual.
- */
-static void
-rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out)
-{
-#if defined(__i386__) && defined(__PIC__)
- /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */
- asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0"
- : "=r" (out[RTE_REG_EBX]),
- "=a" (out[RTE_REG_EAX]),
- "=c" (out[RTE_REG_ECX]),
- "=d" (out[RTE_REG_EDX])
- : "a" (leaf), "c" (subleaf));
-#else
- asm volatile("cpuid"
- : "=a" (out[RTE_REG_EAX]),
- "=b" (out[RTE_REG_EBX]),
- "=c" (out[RTE_REG_ECX]),
- "=d" (out[RTE_REG_EDX])
- : "a" (leaf), "c" (subleaf));
-#endif
-}
-
int
rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
{
const struct feature_entry *feat;
cpuid_registers_t regs;
+ unsigned int maxleaf;
if (feature >= RTE_CPUFLAG_NUMFLAGS)
/* Flag does not match anything in the feature tables */
@@ -199,13 +174,14 @@ rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
/* This entry in the table wasn't filled out! */
return -EFAULT;
- rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs);
- if (((regs[RTE_REG_EAX] ^ feat->leaf) & 0xffff0000) ||
- regs[RTE_REG_EAX] < feat->leaf)
+ maxleaf = __get_cpuid_max(feat->leaf & 0x80000000, NULL);
+
+ if (maxleaf < feat->leaf)
return 0;
- /* get the cpuid leaf containing the desired feature */
- rte_cpu_get_features(feat->leaf, feat->subleaf, regs);
+ __cpuid_count(feat->leaf, feat->subleaf,
+ regs[RTE_REG_EAX], regs[RTE_REG_EBX],
+ regs[RTE_REG_ECX], regs[RTE_REG_EDX]);
/* check if the feature is enabled */
return (regs[feat->reg] >> feat->bit) & 1;
diff --git a/lib/librte_eal/common/arch/x86/rte_cycles.c b/lib/librte_eal/common/arch/x86/rte_cycles.c
new file mode 100644
index 00000000..417850ee
--- /dev/null
+++ b/lib/librte_eal/common/arch/x86/rte_cycles.c
@@ -0,0 +1,152 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <cpuid.h>
+
+#include <rte_common.h>
+
+#include "eal_private.h"
+
+static unsigned int
+rte_cpu_get_model(uint32_t fam_mod_step)
+{
+ uint32_t family, model, ext_model;
+
+ family = (fam_mod_step >> 8) & 0xf;
+ model = (fam_mod_step >> 4) & 0xf;
+
+ if (family == 6 || family == 15) {
+ ext_model = (fam_mod_step >> 16) & 0xf;
+ model += (ext_model << 4);
+ }
+
+ return model;
+}
+
+static int32_t
+rdmsr(int msr, uint64_t *val)
+{
+#ifdef RTE_EXEC_ENV_LINUXAPP
+ int fd;
+ int ret;
+
+ fd = open("/dev/cpu/0/msr", O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ ret = pread(fd, val, sizeof(uint64_t), msr);
+
+ close(fd);
+
+ return ret;
+#else
+ RTE_SET_USED(msr);
+ RTE_SET_USED(val);
+
+ return -1;
+#endif
+}
+
+static uint32_t
+check_model_wsm_nhm(uint8_t model)
+{
+ switch (model) {
+ /* Westmere */
+ case 0x25:
+ case 0x2C:
+ case 0x2F:
+ /* Nehalem */
+ case 0x1E:
+ case 0x1F:
+ case 0x1A:
+ case 0x2E:
+ return 1;
+ }
+
+ return 0;
+}
+
+static uint32_t
+check_model_gdm_dnv(uint8_t model)
+{
+ switch (model) {
+ /* Goldmont */
+ case 0x5C:
+ /* Denverton */
+ case 0x5F:
+ return 1;
+ }
+
+ return 0;
+}
+
+uint64_t
+get_tsc_freq_arch(void)
+{
+ uint64_t tsc_hz = 0;
+ uint32_t a, b, c, d, maxleaf;
+ uint8_t mult, model;
+ int32_t ret;
+
+ /*
+ * Time Stamp Counter and Nominal Core Crystal Clock
+ * Information Leaf
+ */
+ maxleaf = __get_cpuid_max(0, NULL);
+
+ if (maxleaf >= 0x15) {
+ __cpuid(0x15, a, b, c, d);
+
+ /* EBX : TSC/Crystal ratio, ECX : Crystal Hz */
+ if (b && c)
+ return c * (b / a);
+ }
+
+ __cpuid(0x1, a, b, c, d);
+ model = rte_cpu_get_model(a);
+
+ if (check_model_wsm_nhm(model))
+ mult = 133;
+ else if ((c & bit_AVX) || check_model_gdm_dnv(model))
+ mult = 100;
+ else
+ return 0;
+
+ ret = rdmsr(0xCE, &tsc_hz);
+ if (ret < 0)
+ return 0;
+
+ return ((tsc_hz >> 8) & 0xff) * mult * 1E6;
+}
diff --git a/lib/librte_eal/common/arch/x86/rte_memcpy.c b/lib/librte_eal/common/arch/x86/rte_memcpy.c
new file mode 100644
index 00000000..174bef15
--- /dev/null
+++ b/lib/librte_eal/common/arch/x86/rte_memcpy.c
@@ -0,0 +1,58 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_memcpy.h>
+#include <rte_cpuflags.h>
+#include <rte_log.h>
+
+void *(*rte_memcpy_ptr)(void *dst, const void *src, size_t n) = NULL;
+
+RTE_INIT(rte_memcpy_init)
+{
+#ifdef CC_SUPPORT_AVX512F
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) {
+ rte_memcpy_ptr = rte_memcpy_avx512f;
+ RTE_LOG(DEBUG, EAL, "AVX512 memcpy is using!\n");
+ return;
+ }
+#endif
+#ifdef CC_SUPPORT_AVX2
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
+ rte_memcpy_ptr = rte_memcpy_avx2;
+ RTE_LOG(DEBUG, EAL, "AVX2 memcpy is using!\n");
+ return;
+ }
+#endif
+ rte_memcpy_ptr = rte_memcpy_sse;
+ RTE_LOG(DEBUG, EAL, "Default SSE/AVX memcpy is using!\n");
+}
diff --git a/lib/librte_eal/common/arch/x86/rte_spinlock.c b/lib/librte_eal/common/arch/x86/rte_spinlock.c
index c383e9f0..1244a90b 100644
--- a/lib/librte_eal/common/arch/x86/rte_spinlock.c
+++ b/lib/librte_eal/common/arch/x86/rte_spinlock.c
@@ -38,8 +38,7 @@
uint8_t rte_rtm_supported; /* cache the flag to avoid the overhead
of the rte_cpu_get_flag_enabled function */
-static void __attribute__((constructor))
-rte_rtm_init(void)
+RTE_INIT(rte_rtm_init)
{
rte_rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM);
}
diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c
index 08bec2d9..3e022d51 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -35,6 +35,7 @@
#include <sys/queue.h>
#include <rte_bus.h>
+#include <rte_debug.h>
#include "eal_private.h"
@@ -73,11 +74,9 @@ rte_bus_scan(void)
TAILQ_FOREACH(bus, &rte_bus_list, next) {
ret = bus->scan();
- if (ret) {
+ if (ret)
RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n",
bus->name);
- return ret;
- }
}
return 0;
@@ -97,20 +96,16 @@ rte_bus_probe(void)
}
ret = bus->probe();
- if (ret) {
+ if (ret)
RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
bus->name);
- return ret;
- }
}
if (vbus) {
ret = vbus->probe();
- if (ret) {
+ if (ret)
RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n",
vbus->name);
- return ret;
- }
}
return 0;
@@ -152,15 +147,16 @@ struct rte_bus *
rte_bus_find(const struct rte_bus *start, rte_bus_cmp_t cmp,
const void *data)
{
- struct rte_bus *bus = NULL;
+ struct rte_bus *bus;
- TAILQ_FOREACH(bus, &rte_bus_list, next) {
- if (start && bus == start) {
- start = NULL; /* starting point found */
- continue;
- }
+ if (start != NULL)
+ bus = TAILQ_NEXT(start, next);
+ else
+ bus = TAILQ_FIRST(&rte_bus_list);
+ while (bus != NULL) {
if (cmp(bus, data) == 0)
break;
+ bus = TAILQ_NEXT(bus, next);
}
return bus;
}
@@ -222,3 +218,26 @@ rte_bus_find_by_device_name(const char *str)
c[0] = '\0';
return rte_bus_find(NULL, bus_can_parse, name);
}
+
+
+/*
+ * Get iommu class of devices on the bus.
+ */
+enum rte_iova_mode
+rte_bus_get_iommu_class(void)
+{
+ int mode = RTE_IOVA_DC;
+ struct rte_bus *bus;
+
+ TAILQ_FOREACH(bus, &rte_bus_list, next) {
+
+ if (bus->get_iommu_class)
+ mode |= bus->get_iommu_class();
+ }
+
+ if (mode != RTE_IOVA_VA) {
+ /* Use default IOVA mode */
+ mode = RTE_IOVA_PA;
+ }
+ return mode;
+}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index e2512755..dda8f583 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -67,7 +67,6 @@ static int cmp_dev_name(const struct rte_device *dev, const void *_name)
int rte_eal_dev_attach(const char *name, const char *devargs)
{
struct rte_bus *bus;
- int ret;
if (name == NULL || devargs == NULL) {
RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
@@ -80,22 +79,13 @@ int rte_eal_dev_attach(const char *name, const char *devargs)
name);
return -EINVAL;
}
- if (strcmp(bus->name, "pci") == 0)
- return rte_eal_hotplug_add("pci", name, devargs);
- if (strcmp(bus->name, "vdev") != 0) {
- RTE_LOG(ERR, EAL, "Device attach is only supported for PCI and vdev devices.\n");
- return -ENOTSUP;
- }
+ if (strcmp(bus->name, "pci") == 0 || strcmp(bus->name, "vdev") == 0)
+ return rte_eal_hotplug_add(bus->name, name, devargs);
- /*
- * If we haven't found a bus device the user meant to "hotplug" a
- * virtual device instead.
- */
- ret = rte_vdev_init(name, devargs);
- if (ret)
- RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
- name);
- return ret;
+ RTE_LOG(ERR, EAL,
+ "Device attach is only supported for PCI and vdev devices.\n");
+
+ return -ENOTSUP;
}
int rte_eal_dev_detach(struct rte_device *dev)
diff --git a/lib/librte_eal/common/eal_common_errno.c b/lib/librte_eal/common/eal_common_errno.c
index de48d8e4..dc5b7c04 100644
--- a/lib/librte_eal/common/eal_common_errno.c
+++ b/lib/librte_eal/common/eal_common_errno.c
@@ -46,18 +46,20 @@ RTE_DEFINE_PER_LCORE(int, _rte_errno);
const char *
rte_strerror(int errnum)
{
+ /* BSD puts a colon in the "unknown error" messages, Linux doesn't */
+#ifdef RTE_EXEC_ENV_BSDAPP
+ static const char *sep = ":";
+#else
+ static const char *sep = "";
+#endif
#define RETVAL_SZ 256
static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval);
+ char *ret = RTE_PER_LCORE(retval);
/* since some implementations of strerror_r throw an error
* themselves if errnum is too big, we handle that case here */
- if (errnum > RTE_MAX_ERRNO)
- snprintf(RTE_PER_LCORE(retval), RETVAL_SZ,
-#ifdef RTE_EXEC_ENV_BSDAPP
- "Unknown error: %d", errnum);
-#else
- "Unknown error %d", errnum);
-#endif
+ if (errnum >= RTE_MAX_ERRNO)
+ snprintf(ret, RETVAL_SZ, "Unknown error%s %d", sep, errnum);
else
switch (errnum){
case E_RTE_SECONDARY:
@@ -65,8 +67,10 @@ rte_strerror(int errnum)
case E_RTE_NO_CONFIG:
return "Missing rte_config structure";
default:
- strerror_r(errnum, RTE_PER_LCORE(retval), RETVAL_SZ);
+ if (strerror_r(errnum, ret, RETVAL_SZ) != 0)
+ snprintf(ret, RETVAL_SZ, "Unknown error%s %d",
+ sep, errnum);
}
- return RTE_PER_LCORE(retval);
+ return ret;
}
diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c
index 137c191d..2d5cae9f 100644
--- a/lib/librte_eal/common/eal_common_launch.c
+++ b/lib/librte_eal/common/eal_common_launch.c
@@ -38,7 +38,6 @@
#include <rte_launch.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_atomic.h>
#include <rte_pause.h>
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index 0e3b9320..be404136 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -89,14 +89,6 @@ rte_log_set_global_level(uint32_t level)
rte_logs.level = (uint32_t)level;
}
-/* Set global log level */
-/* replaced by rte_log_set_global_level */
-__rte_deprecated void
-rte_set_log_level(uint32_t level)
-{
- rte_log_set_global_level(level);
-}
-
/* Get global log level */
uint32_t
rte_log_get_global_level(void)
@@ -104,14 +96,6 @@ rte_log_get_global_level(void)
return rte_logs.level;
}
-/* Get global log level */
-/* replaced by rte_log_get_global_level */
-uint32_t
-rte_get_log_level(void)
-{
- return rte_log_get_global_level();
-}
-
int
rte_log_get_level(uint32_t type)
{
@@ -121,30 +105,6 @@ rte_log_get_level(uint32_t type)
return rte_logs.dynamic_types[type].loglevel;
}
-/* Set global log type */
-__rte_deprecated void
-rte_set_log_type(uint32_t type, int enable)
-{
- if (type < RTE_LOGTYPE_FIRST_EXT_ID) {
- if (enable)
- rte_logs.type |= 1 << type;
- else
- rte_logs.type &= ~(1 << type);
- }
-
- if (enable)
- rte_log_set_level(type, 0);
- else
- rte_log_set_level(type, RTE_LOG_DEBUG);
-}
-
-/* Get global log type */
-__rte_deprecated uint32_t
-rte_get_log_type(void)
-{
- return rte_logs.type;
-}
-
int
rte_log_set_level(uint32_t type, uint32_t level)
{
@@ -289,7 +249,8 @@ static const struct logtype logtype_strings[] = {
{RTE_LOGTYPE_USER8, "user8"}
};
-RTE_INIT(rte_log_init);
+/* Logging should be first initialzer (before drivers and bus) */
+RTE_INIT_PRIO(rte_log_init, 101);
static void
rte_log_init(void)
{
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 996877ef..fc6c44da 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -41,7 +41,6 @@
#include <sys/queue.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_log.h>
@@ -96,11 +95,11 @@ rte_dump_physmem_layout(FILE *f)
if (mcfg->memseg[i].addr == NULL)
break;
- fprintf(f, "Segment %u: phys:0x%"PRIx64", len:%zu, "
+ fprintf(f, "Segment %u: IOVA:0x%"PRIx64", len:%zu, "
"virt:%p, socket_id:%"PRId32", "
"hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
"nrank:%"PRIx32"\n", i,
- mcfg->memseg[i].phys_addr,
+ mcfg->memseg[i].iova,
mcfg->memseg[i].len,
mcfg->memseg[i].addr,
mcfg->memseg[i].socket_id,
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 3026e36b..ea072a25 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -251,7 +251,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
mcfg->memzone_cnt++;
snprintf(mz->name, sizeof(mz->name), "%s", name);
- mz->phys_addr = rte_malloc_virt2phy(mz_addr);
+ mz->iova = rte_malloc_virt2iova(mz_addr);
mz->addr = mz_addr;
mz->len = (requested_len == 0 ? elem->size : requested_len);
mz->hugepage_sz = elem->ms->hugepage_sz;
@@ -391,10 +391,10 @@ rte_memzone_dump(FILE *f)
for (i=0; i<RTE_MAX_MEMZONE; i++) {
if (mcfg->memzone[i].addr == NULL)
break;
- fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx"
+ fprintf(f, "Zone %u: name:<%s>, IO:0x%"PRIx64", len:0x%zx"
", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
mcfg->memzone[i].name,
- mcfg->memzone[i].phys_addr,
+ mcfg->memzone[i].iova,
mcfg->memzone[i].len,
mcfg->memzone[i].addr,
mcfg->memzone[i].socket_id,
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 1da185e5..996a0342 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -85,6 +85,7 @@ eal_long_options[] = {
{OPT_LCORES, 1, NULL, OPT_LCORES_NUM },
{OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM },
{OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM },
+ {OPT_MBUF_POOL_OPS_NAME, 1, NULL, OPT_MBUF_POOL_OPS_NAME_NUM},
{OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM },
{OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM },
{OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM },
@@ -97,7 +98,6 @@ eal_long_options[] = {
{OPT_VDEV, 1, NULL, OPT_VDEV_NUM },
{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
{OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
- {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM },
{0, 0, NULL, 0 }
};
@@ -208,8 +208,6 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
internal_cfg->syslog_facility = LOG_DAEMON;
- internal_cfg->xen_dom0_support = 0;
-
/* if set to NONE, interrupt mode is determined automatically */
internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE;
@@ -220,6 +218,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
#endif
internal_cfg->vmware_tsc_map = 0;
internal_cfg->create_uio_dev = 0;
+ internal_cfg->mbuf_pool_ops_name = RTE_MBUF_DEFAULT_MEMPOOL_OPS;
}
static int
@@ -279,12 +278,13 @@ int
eal_plugins_init(void)
{
struct shared_driver *solib = NULL;
+ struct stat sb;
- if (*default_solib_dir != '\0')
+ if (*default_solib_dir != '\0' && stat(default_solib_dir, &sb) == 0 &&
+ S_ISDIR(sb.st_mode))
eal_plugin_add(default_solib_dir);
TAILQ_FOREACH(solib, &solib_list, next) {
- struct stat sb;
if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) {
if (eal_plugindir_init(solib->name) == -1) {
@@ -1279,6 +1279,7 @@ eal_common_usage(void)
" '@' can be omitted if cpus and lcores have the same value\n"
" -s SERVICE COREMASK Hexadecimal bitmask of cores to be used as service cores\n"
" --"OPT_MASTER_LCORE" ID Core ID that is used as master\n"
+ " --"OPT_MBUF_POOL_OPS_NAME" Pool ops name for mbuf to use\n"
" -n CHANNELS Number of memory channels\n"
" -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n"
" -r RANKS Force number of memory ranks (don't detect)\n"
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
deleted file mode 100644
index 52fd38cd..00000000
--- a/lib/librte_eal/common/eal_common_pci.c
+++ /dev/null
@@ -1,580 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * Copyright 2013-2014 6WIND S.A.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include <inttypes.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/queue.h>
-#include <sys/mman.h>
-
-#include <rte_errno.h>
-#include <rte_interrupts.h>
-#include <rte_log.h>
-#include <rte_bus.h>
-#include <rte_pci.h>
-#include <rte_per_lcore.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_eal.h>
-#include <rte_string_fns.h>
-#include <rte_common.h>
-#include <rte_devargs.h>
-
-#include "eal_private.h"
-
-extern struct rte_pci_bus rte_pci_bus;
-
-#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
-
-const char *pci_get_sysfs_path(void)
-{
- const char *path = NULL;
-
- path = getenv("SYSFS_PCI_DEVICES");
- if (path == NULL)
- return SYSFS_PCI_DEVICES;
-
- return path;
-}
-
-static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
-{
- struct rte_devargs *devargs;
- struct rte_pci_addr addr;
- struct rte_bus *pbus;
-
- pbus = rte_bus_find_by_name("pci");
- TAILQ_FOREACH(devargs, &devargs_list, next) {
- if (devargs->bus != pbus)
- continue;
- devargs->bus->parse(devargs->name, &addr);
- if (!rte_eal_compare_pci_addr(&dev->addr, &addr))
- return devargs;
- }
- return NULL;
-}
-
-void
-pci_name_set(struct rte_pci_device *dev)
-{
- struct rte_devargs *devargs;
-
- /* Each device has its internal, canonical name set. */
- rte_pci_device_name(&dev->addr,
- dev->name, sizeof(dev->name));
- devargs = pci_devargs_lookup(dev);
- dev->device.devargs = devargs;
- /* In blacklist mode, if the device is not blacklisted, no
- * rte_devargs exists for it.
- */
- if (devargs != NULL)
- /* If an rte_devargs exists, the generic rte_device uses the
- * given name as its namea
- */
- dev->device.name = dev->device.devargs->name;
- else
- /* Otherwise, it uses the internal, canonical form. */
- dev->device.name = dev->name;
-}
-
-/* map a particular resource from a file */
-void *
-pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
- int additional_flags)
-{
- void *mapaddr;
-
- /* Map the PCI memory resource of device */
- mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
- MAP_SHARED | additional_flags, fd, offset);
- if (mapaddr == MAP_FAILED) {
- RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
- __func__, fd, requested_addr,
- (unsigned long)size, (unsigned long)offset,
- strerror(errno), mapaddr);
- } else
- RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr);
-
- return mapaddr;
-}
-
-/* unmap a particular resource */
-void
-pci_unmap_resource(void *requested_addr, size_t size)
-{
- if (requested_addr == NULL)
- return;
-
- /* Unmap the PCI memory resource of device */
- if (munmap(requested_addr, size)) {
- RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n",
- __func__, requested_addr, (unsigned long)size,
- strerror(errno));
- } else
- RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n",
- requested_addr);
-}
-
-/*
- * Match the PCI Driver and Device using the ID Table
- *
- * @param pci_drv
- * PCI driver from which ID table would be extracted
- * @param pci_dev
- * PCI device to match against the driver
- * @return
- * 1 for successful match
- * 0 for unsuccessful match
- */
-static int
-rte_pci_match(const struct rte_pci_driver *pci_drv,
- const struct rte_pci_device *pci_dev)
-{
- const struct rte_pci_id *id_table;
-
- for (id_table = pci_drv->id_table; id_table->vendor_id != 0;
- id_table++) {
- /* check if device's identifiers match the driver's ones */
- if (id_table->vendor_id != pci_dev->id.vendor_id &&
- id_table->vendor_id != PCI_ANY_ID)
- continue;
- if (id_table->device_id != pci_dev->id.device_id &&
- id_table->device_id != PCI_ANY_ID)
- continue;
- if (id_table->subsystem_vendor_id !=
- pci_dev->id.subsystem_vendor_id &&
- id_table->subsystem_vendor_id != PCI_ANY_ID)
- continue;
- if (id_table->subsystem_device_id !=
- pci_dev->id.subsystem_device_id &&
- id_table->subsystem_device_id != PCI_ANY_ID)
- continue;
- if (id_table->class_id != pci_dev->id.class_id &&
- id_table->class_id != RTE_CLASS_ANY_ID)
- continue;
-
- return 1;
- }
-
- return 0;
-}
-
-/*
- * If vendor/device ID match, call the probe() function of the
- * driver.
- */
-static int
-rte_pci_probe_one_driver(struct rte_pci_driver *dr,
- struct rte_pci_device *dev)
-{
- int ret;
- struct rte_pci_addr *loc;
-
- if ((dr == NULL) || (dev == NULL))
- return -EINVAL;
-
- loc = &dev->addr;
-
- /* The device is not blacklisted; Check if driver supports it */
- if (!rte_pci_match(dr, dev))
- /* Match of device and driver failed */
- return 1;
-
- RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
- loc->domain, loc->bus, loc->devid, loc->function,
- dev->device.numa_node);
-
- /* no initialization when blacklisted, return without error */
- if (dev->device.devargs != NULL &&
- dev->device.devargs->policy ==
- RTE_DEV_BLACKLISTED) {
- RTE_LOG(INFO, EAL, " Device is blacklisted, not"
- " initializing\n");
- return 1;
- }
-
- if (dev->device.numa_node < 0) {
- RTE_LOG(WARNING, EAL, " Invalid NUMA socket, default to 0\n");
- dev->device.numa_node = 0;
- }
-
- RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id,
- dev->id.device_id, dr->driver.name);
-
- if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
- /* map resources for devices that use igb_uio */
- ret = rte_pci_map_device(dev);
- if (ret != 0)
- return ret;
- }
-
- /* reference driver structure */
- dev->driver = dr;
- dev->device.driver = &dr->driver;
-
- /* call the driver probe() function */
- ret = dr->probe(dr, dev);
- if (ret) {
- dev->driver = NULL;
- dev->device.driver = NULL;
- if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
- /* Don't unmap if device is unsupported and
- * driver needs mapped resources.
- */
- !(ret > 0 &&
- (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES)))
- rte_pci_unmap_device(dev);
- }
-
- return ret;
-}
-
-/*
- * If vendor/device ID match, call the remove() function of the
- * driver.
- */
-static int
-rte_pci_detach_dev(struct rte_pci_device *dev)
-{
- struct rte_pci_addr *loc;
- struct rte_pci_driver *dr;
-
- if (dev == NULL)
- return -EINVAL;
-
- dr = dev->driver;
- loc = &dev->addr;
-
- RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
- loc->domain, loc->bus, loc->devid,
- loc->function, dev->device.numa_node);
-
- RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id,
- dev->id.device_id, dr->driver.name);
-
- if (dr->remove && (dr->remove(dev) < 0))
- return -1; /* negative value is an error */
-
- /* clear driver structure */
- dev->driver = NULL;
-
- if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)
- /* unmap resources for devices that use igb_uio */
- rte_pci_unmap_device(dev);
-
- return 0;
-}
-
-/*
- * If vendor/device ID match, call the probe() function of all
- * registered driver for the given device. Return -1 if initialization
- * failed, return 1 if no driver is found for this device.
- */
-static int
-pci_probe_all_drivers(struct rte_pci_device *dev)
-{
- struct rte_pci_driver *dr = NULL;
- int rc = 0;
-
- if (dev == NULL)
- return -1;
-
- /* Check if a driver is already loaded */
- if (dev->driver != NULL)
- return 0;
-
- FOREACH_DRIVER_ON_PCIBUS(dr) {
- rc = rte_pci_probe_one_driver(dr, dev);
- if (rc < 0)
- /* negative value is an error */
- return -1;
- if (rc > 0)
- /* positive value means driver doesn't support it */
- continue;
- return 0;
- }
- return 1;
-}
-
-/*
- * Find the pci device specified by pci address, then invoke probe function of
- * the driver of the device.
- */
-int
-rte_pci_probe_one(const struct rte_pci_addr *addr)
-{
- struct rte_pci_device *dev = NULL;
-
- int ret = 0;
-
- if (addr == NULL)
- return -1;
-
- /* update current pci device in global list, kernel bindings might have
- * changed since last time we looked at it.
- */
- if (pci_update_device(addr) < 0)
- goto err_return;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- if (rte_eal_compare_pci_addr(&dev->addr, addr))
- continue;
-
- ret = pci_probe_all_drivers(dev);
- if (ret)
- goto err_return;
- return 0;
- }
- return -1;
-
-err_return:
- RTE_LOG(WARNING, EAL,
- "Requested device " PCI_PRI_FMT " cannot be used\n",
- addr->domain, addr->bus, addr->devid, addr->function);
- return -1;
-}
-
-/*
- * Detach device specified by its pci address.
- */
-int
-rte_pci_detach(const struct rte_pci_addr *addr)
-{
- struct rte_pci_device *dev = NULL;
- int ret = 0;
-
- if (addr == NULL)
- return -1;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- if (rte_eal_compare_pci_addr(&dev->addr, addr))
- continue;
-
- ret = rte_pci_detach_dev(dev);
- if (ret < 0)
- /* negative value is an error */
- goto err_return;
- if (ret > 0)
- /* positive value means driver doesn't support it */
- continue;
-
- rte_pci_remove_device(dev);
- free(dev);
- return 0;
- }
- return -1;
-
-err_return:
- RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
- " cannot be used\n", dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- return -1;
-}
-
-/*
- * Scan the content of the PCI bus, and call the probe() function for
- * all registered drivers that have a matching entry in its id_table
- * for discovered devices.
- */
-int
-rte_pci_probe(void)
-{
- struct rte_pci_device *dev = NULL;
- size_t probed = 0, failed = 0;
- struct rte_devargs *devargs;
- int probe_all = 0;
- int ret = 0;
-
- if (rte_pci_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST)
- probe_all = 1;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- probed++;
-
- devargs = dev->device.devargs;
- /* probe all or only whitelisted devices */
- if (probe_all)
- ret = pci_probe_all_drivers(dev);
- else if (devargs != NULL &&
- devargs->policy == RTE_DEV_WHITELISTED)
- ret = pci_probe_all_drivers(dev);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT
- " cannot be used\n", dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- rte_errno = errno;
- failed++;
- ret = 0;
- }
- }
-
- return (probed && probed == failed) ? -1 : 0;
-}
-
-/* dump one device */
-static int
-pci_dump_one_device(FILE *f, struct rte_pci_device *dev)
-{
- int i;
-
- fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id,
- dev->id.device_id);
-
- for (i = 0; i != sizeof(dev->mem_resource) /
- sizeof(dev->mem_resource[0]); i++) {
- fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n",
- dev->mem_resource[i].phys_addr,
- dev->mem_resource[i].len);
- }
- return 0;
-}
-
-/* dump devices on the bus */
-void
-rte_pci_dump(FILE *f)
-{
- struct rte_pci_device *dev = NULL;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- pci_dump_one_device(f, dev);
- }
-}
-
-static int
-pci_parse(const char *name, void *addr)
-{
- struct rte_pci_addr *out = addr;
- struct rte_pci_addr pci_addr;
- bool parse;
-
- parse = (eal_parse_pci_BDF(name, &pci_addr) == 0 ||
- eal_parse_pci_DomBDF(name, &pci_addr) == 0);
- if (parse && addr != NULL)
- *out = pci_addr;
- return parse == false;
-}
-
-/* register a driver */
-void
-rte_pci_register(struct rte_pci_driver *driver)
-{
- TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next);
- driver->bus = &rte_pci_bus;
-}
-
-/* unregister a driver */
-void
-rte_pci_unregister(struct rte_pci_driver *driver)
-{
- TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next);
- driver->bus = NULL;
-}
-
-/* Add a device to PCI bus */
-void
-rte_pci_add_device(struct rte_pci_device *pci_dev)
-{
- TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next);
-}
-
-/* Insert a device into a predefined position in PCI bus */
-void
-rte_pci_insert_device(struct rte_pci_device *exist_pci_dev,
- struct rte_pci_device *new_pci_dev)
-{
- TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next);
-}
-
-/* Remove a device from PCI bus */
-void
-rte_pci_remove_device(struct rte_pci_device *pci_dev)
-{
- TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next);
-}
-
-static struct rte_device *
-pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
- const void *data)
-{
- struct rte_pci_device *dev;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- if (start && &dev->device == start) {
- start = NULL; /* starting point found */
- continue;
- }
- if (cmp(&dev->device, data) == 0)
- return &dev->device;
- }
-
- return NULL;
-}
-
-static int
-pci_plug(struct rte_device *dev)
-{
- return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev));
-}
-
-static int
-pci_unplug(struct rte_device *dev)
-{
- struct rte_pci_device *pdev;
- int ret;
-
- pdev = RTE_DEV_TO_PCI(dev);
- ret = rte_pci_detach_dev(pdev);
- rte_pci_remove_device(pdev);
- free(pdev);
- return ret;
-}
-
-struct rte_pci_bus rte_pci_bus = {
- .bus = {
- .scan = rte_pci_scan,
- .probe = rte_pci_probe,
- .find_device = pci_find_device,
- .plug = pci_plug,
- .unplug = pci_unplug,
- .parse = pci_parse,
- },
- .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
- .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
-};
-
-RTE_REGISTER_BUS(pci, rte_pci_bus.bus);
diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c
deleted file mode 100644
index 367a6816..00000000
--- a/lib/librte_eal/common/eal_common_pci_uio.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <fcntl.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-#include <rte_eal.h>
-#include <rte_tailq.h>
-#include <rte_log.h>
-#include <rte_malloc.h>
-
-#include "eal_private.h"
-
-static struct rte_tailq_elem rte_uio_tailq = {
- .name = "UIO_RESOURCE_LIST",
-};
-EAL_REGISTER_TAILQ(rte_uio_tailq)
-
-static int
-pci_uio_map_secondary(struct rte_pci_device *dev)
-{
- int fd, i, j;
- struct mapped_pci_resource *uio_res;
- struct mapped_pci_res_list *uio_res_list =
- RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
-
- TAILQ_FOREACH(uio_res, uio_res_list, next) {
-
- /* skip this element if it doesn't match our PCI address */
- if (rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
- continue;
-
- for (i = 0; i != uio_res->nb_maps; i++) {
- /*
- * open devname, to mmap it
- */
- fd = open(uio_res->maps[i].path, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- uio_res->maps[i].path, strerror(errno));
- return -1;
- }
-
- void *mapaddr = pci_map_resource(uio_res->maps[i].addr,
- fd, (off_t)uio_res->maps[i].offset,
- (size_t)uio_res->maps[i].size, 0);
- /* fd is not needed in slave process, close it */
- close(fd);
- if (mapaddr != uio_res->maps[i].addr) {
- RTE_LOG(ERR, EAL,
- "Cannot mmap device resource file %s to address: %p\n",
- uio_res->maps[i].path,
- uio_res->maps[i].addr);
- if (mapaddr != MAP_FAILED) {
- /* unmap addrs correctly mapped */
- for (j = 0; j < i; j++)
- pci_unmap_resource(
- uio_res->maps[j].addr,
- (size_t)uio_res->maps[j].size);
- /* unmap addr wrongly mapped */
- pci_unmap_resource(mapaddr,
- (size_t)uio_res->maps[i].size);
- }
- return -1;
- }
- }
- return 0;
- }
-
- RTE_LOG(ERR, EAL, "Cannot find resource for device\n");
- return 1;
-}
-
-/* map the PCI resource of a PCI device in virtual memory */
-int
-pci_uio_map_resource(struct rte_pci_device *dev)
-{
- int i, map_idx = 0, ret;
- uint64_t phaddr;
- struct mapped_pci_resource *uio_res = NULL;
- struct mapped_pci_res_list *uio_res_list =
- RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
-
- dev->intr_handle.fd = -1;
- dev->intr_handle.uio_cfg_fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
-
- /* secondary processes - use already recorded details */
- if (rte_eal_process_type() != RTE_PROC_PRIMARY)
- return pci_uio_map_secondary(dev);
-
- /* allocate uio resource */
- ret = pci_uio_alloc_resource(dev, &uio_res);
- if (ret)
- return ret;
-
- /* Map all BARs */
- for (i = 0; i != PCI_MAX_RESOURCE; i++) {
- /* skip empty BAR */
- phaddr = dev->mem_resource[i].phys_addr;
- if (phaddr == 0)
- continue;
-
- ret = pci_uio_map_resource_by_index(dev, i,
- uio_res, map_idx);
- if (ret)
- goto error;
-
- map_idx++;
- }
-
- uio_res->nb_maps = map_idx;
-
- TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
-
- return 0;
-error:
- for (i = 0; i < map_idx; i++) {
- pci_unmap_resource(uio_res->maps[i].addr,
- (size_t)uio_res->maps[i].size);
- rte_free(uio_res->maps[i].path);
- }
- pci_uio_free_resource(dev, uio_res);
- return -1;
-}
-
-static void
-pci_uio_unmap(struct mapped_pci_resource *uio_res)
-{
- int i;
-
- if (uio_res == NULL)
- return;
-
- for (i = 0; i != uio_res->nb_maps; i++) {
- pci_unmap_resource(uio_res->maps[i].addr,
- (size_t)uio_res->maps[i].size);
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- rte_free(uio_res->maps[i].path);
- }
-}
-
-static struct mapped_pci_resource *
-pci_uio_find_resource(struct rte_pci_device *dev)
-{
- struct mapped_pci_resource *uio_res;
- struct mapped_pci_res_list *uio_res_list =
- RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
-
- if (dev == NULL)
- return NULL;
-
- TAILQ_FOREACH(uio_res, uio_res_list, next) {
-
- /* skip this element if it doesn't match our PCI address */
- if (!rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
- return uio_res;
- }
- return NULL;
-}
-
-/* unmap the PCI resource of a PCI device in virtual memory */
-void
-pci_uio_unmap_resource(struct rte_pci_device *dev)
-{
- struct mapped_pci_resource *uio_res;
- struct mapped_pci_res_list *uio_res_list =
- RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
-
- if (dev == NULL)
- return;
-
- /* find an entry for the device */
- uio_res = pci_uio_find_resource(dev);
- if (uio_res == NULL)
- return;
-
- /* secondary processes - just free maps */
- if (rte_eal_process_type() != RTE_PROC_PRIMARY)
- return pci_uio_unmap(uio_res);
-
- TAILQ_REMOVE(uio_res_list, uio_res, next);
-
- /* unmap all resources */
- pci_uio_unmap(uio_res);
-
- /* free uio resource */
- rte_free(uio_res);
-
- /* close fd if in primary process */
- close(dev->intr_handle.fd);
- if (dev->intr_handle.uio_cfg_fd >= 0) {
- close(dev->intr_handle.uio_cfg_fd);
- dev->intr_handle.uio_cfg_fd = -1;
- }
-
- dev->intr_handle.fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
-}
diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c
index 55955f9e..6ae09fdb 100644
--- a/lib/librte_eal/common/eal_common_tailqs.c
+++ b/lib/librte_eal/common/eal_common_tailqs.c
@@ -40,7 +40,6 @@
#include <inttypes.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 2405e93f..55e96963 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -53,6 +53,20 @@ unsigned rte_socket_id(void)
return RTE_PER_LCORE(_socket_id);
}
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
+{
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ if (lcore_id >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ if (cfg->lcore_role[lcore_id] == role)
+ return 0;
+
+ return -EINVAL;
+}
+
int eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
{
unsigned cpu = 0;
diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c
index ed0b16d0..568ae2fd 100644
--- a/lib/librte_eal/common/eal_common_timer.c
+++ b/lib/librte_eal/common/eal_common_timer.c
@@ -80,8 +80,11 @@ estimate_tsc_freq(void)
void
set_tsc_freq(void)
{
- uint64_t freq = get_tsc_freq();
+ uint64_t freq;
+ freq = get_tsc_freq_arch();
+ if (!freq)
+ freq = get_tsc_freq();
if (!freq)
freq = estimate_tsc_freq();
@@ -94,8 +97,7 @@ void rte_delay_us_callback_register(void (*userfunc)(unsigned int))
rte_delay_us = userfunc;
}
-static void __attribute__((constructor))
-rte_timer_init(void)
+RTE_INIT(rte_timer_init)
{
/* set rte_delay_us_block as a delay function */
rte_delay_us_callback_register(rte_delay_us_block);
diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c
deleted file mode 100644
index f7e547a6..00000000
--- a/lib/librte_eal/common/eal_common_vdev.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2016 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <sys/queue.h>
-
-#include <rte_eal.h>
-#include <rte_dev.h>
-#include <rte_bus.h>
-#include <rte_vdev.h>
-#include <rte_common.h>
-#include <rte_devargs.h>
-#include <rte_memory.h>
-#include <rte_errno.h>
-
-/* Forward declare to access virtual bus name */
-static struct rte_bus rte_vdev_bus;
-
-/** Double linked list of virtual device drivers. */
-TAILQ_HEAD(vdev_device_list, rte_vdev_device);
-
-static struct vdev_device_list vdev_device_list =
- TAILQ_HEAD_INITIALIZER(vdev_device_list);
-struct vdev_driver_list vdev_driver_list =
- TAILQ_HEAD_INITIALIZER(vdev_driver_list);
-
-/* register a driver */
-void
-rte_vdev_register(struct rte_vdev_driver *driver)
-{
- TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next);
-}
-
-/* unregister a driver */
-void
-rte_vdev_unregister(struct rte_vdev_driver *driver)
-{
- TAILQ_REMOVE(&vdev_driver_list, driver, next);
-}
-
-static int
-vdev_parse(const char *name, void *addr)
-{
- struct rte_vdev_driver **out = addr;
- struct rte_vdev_driver *driver = NULL;
-
- TAILQ_FOREACH(driver, &vdev_driver_list, next) {
- if (strncmp(driver->driver.name, name,
- strlen(driver->driver.name)) == 0)
- break;
- if (driver->driver.alias &&
- strncmp(driver->driver.alias, name,
- strlen(driver->driver.alias)) == 0)
- break;
- }
- if (driver != NULL &&
- addr != NULL)
- *out = driver;
- return driver == NULL;
-}
-
-static int
-vdev_probe_all_drivers(struct rte_vdev_device *dev)
-{
- const char *name;
- struct rte_vdev_driver *driver;
- int ret;
-
- name = rte_vdev_device_name(dev);
-
- RTE_LOG(DEBUG, EAL, "Search driver %s to probe device %s\n", name,
- rte_vdev_device_name(dev));
-
- if (vdev_parse(name, &driver))
- return -1;
- dev->device.driver = &driver->driver;
- ret = driver->probe(dev);
- if (ret)
- dev->device.driver = NULL;
- return ret;
-}
-
-static struct rte_vdev_device *
-find_vdev(const char *name)
-{
- struct rte_vdev_device *dev;
-
- if (!name)
- return NULL;
-
- TAILQ_FOREACH(dev, &vdev_device_list, next) {
- const char *devname = rte_vdev_device_name(dev);
- if (!strncmp(devname, name, strlen(name)))
- return dev;
- }
-
- return NULL;
-}
-
-static struct rte_devargs *
-alloc_devargs(const char *name, const char *args)
-{
- struct rte_devargs *devargs;
- int ret;
-
- devargs = calloc(1, sizeof(*devargs));
- if (!devargs)
- return NULL;
-
- devargs->bus = &rte_vdev_bus;
- if (args)
- devargs->args = strdup(args);
- else
- devargs->args = strdup("");
-
- ret = snprintf(devargs->name, sizeof(devargs->name), "%s", name);
- if (ret < 0 || ret >= (int)sizeof(devargs->name)) {
- free(devargs->args);
- free(devargs);
- return NULL;
- }
-
- return devargs;
-}
-
-int
-rte_vdev_init(const char *name, const char *args)
-{
- struct rte_vdev_device *dev;
- struct rte_devargs *devargs;
- int ret;
-
- if (name == NULL)
- return -EINVAL;
-
- dev = find_vdev(name);
- if (dev)
- return -EEXIST;
-
- devargs = alloc_devargs(name, args);
- if (!devargs)
- return -ENOMEM;
-
- dev = calloc(1, sizeof(*dev));
- if (!dev) {
- ret = -ENOMEM;
- goto fail;
- }
-
- dev->device.devargs = devargs;
- dev->device.numa_node = SOCKET_ID_ANY;
- dev->device.name = devargs->name;
-
- ret = vdev_probe_all_drivers(dev);
- if (ret) {
- if (ret > 0)
- RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
- goto fail;
- }
-
- TAILQ_INSERT_TAIL(&devargs_list, devargs, next);
-
- TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
- return 0;
-
-fail:
- free(devargs->args);
- free(devargs);
- free(dev);
- return ret;
-}
-
-static int
-vdev_remove_driver(struct rte_vdev_device *dev)
-{
- const char *name = rte_vdev_device_name(dev);
- const struct rte_vdev_driver *driver;
-
- if (!dev->device.driver) {
- RTE_LOG(DEBUG, EAL, "no driver attach to device %s\n", name);
- return 1;
- }
-
- driver = container_of(dev->device.driver, const struct rte_vdev_driver,
- driver);
- return driver->remove(dev);
-}
-
-int
-rte_vdev_uninit(const char *name)
-{
- struct rte_vdev_device *dev;
- struct rte_devargs *devargs;
- int ret;
-
- if (name == NULL)
- return -EINVAL;
-
- dev = find_vdev(name);
- if (!dev)
- return -ENOENT;
-
- devargs = dev->device.devargs;
-
- ret = vdev_remove_driver(dev);
- if (ret)
- return ret;
-
- TAILQ_REMOVE(&vdev_device_list, dev, next);
-
- TAILQ_REMOVE(&devargs_list, devargs, next);
-
- free(devargs->args);
- free(devargs);
- free(dev);
- return 0;
-}
-
-static int
-vdev_scan(void)
-{
- struct rte_vdev_device *dev;
- struct rte_devargs *devargs;
-
- /* for virtual devices we scan the devargs_list populated via cmdline */
- TAILQ_FOREACH(devargs, &devargs_list, next) {
-
- if (devargs->bus != &rte_vdev_bus)
- continue;
-
- dev = find_vdev(devargs->name);
- if (dev)
- continue;
-
- dev = calloc(1, sizeof(*dev));
- if (!dev)
- return -1;
-
- dev->device.devargs = devargs;
- dev->device.numa_node = SOCKET_ID_ANY;
- dev->device.name = devargs->name;
-
- TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
- }
-
- return 0;
-}
-
-static int
-vdev_probe(void)
-{
- struct rte_vdev_device *dev;
-
- /* call the init function for each virtual device */
- TAILQ_FOREACH(dev, &vdev_device_list, next) {
-
- if (dev->device.driver)
- continue;
-
- if (vdev_probe_all_drivers(dev)) {
- RTE_LOG(ERR, EAL, "failed to initialize %s device\n",
- rte_vdev_device_name(dev));
- return -1;
- }
- }
-
- return 0;
-}
-
-static struct rte_device *
-vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
- const void *data)
-{
- struct rte_vdev_device *dev;
-
- TAILQ_FOREACH(dev, &vdev_device_list, next) {
- if (start && &dev->device == start) {
- start = NULL;
- continue;
- }
- if (cmp(&dev->device, data) == 0)
- return &dev->device;
- }
- return NULL;
-}
-
-static int
-vdev_plug(struct rte_device *dev)
-{
- return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev));
-}
-
-static int
-vdev_unplug(struct rte_device *dev)
-{
- return rte_vdev_uninit(dev->name);
-}
-
-static struct rte_bus rte_vdev_bus = {
- .scan = vdev_scan,
- .probe = vdev_probe,
- .find_device = vdev_find_device,
- .plug = vdev_plug,
- .unplug = vdev_unplug,
- .parse = vdev_parse,
-};
-
-RTE_REGISTER_BUS(vdev, rte_vdev_bus);
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 7b7e8c88..fa6ccbec 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -65,7 +65,6 @@ struct internal_config {
volatile unsigned force_nrank; /**< force number of ranks */
volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
unsigned hugepage_unlink; /**< true to unlink backing files */
- volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
volatile unsigned no_pci; /**< true to disable PCI */
volatile unsigned no_hpet; /**< true to disable HPET */
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
@@ -82,7 +81,7 @@ struct internal_config {
volatile enum rte_intr_mode vfio_intr_mode;
const char *hugefile_prefix; /**< the base filename of hugetlbfs files */
const char *hugepage_dir; /**< specific hugetlbfs directory to use */
-
+ const char *mbuf_pool_ops_name; /**< mbuf pool ops name */
unsigned num_hugepage_sizes; /**< how many sizes on this system */
struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
};
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 439a2610..30e6bb41 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -61,6 +61,8 @@ enum {
OPT_LOG_LEVEL_NUM,
#define OPT_MASTER_LCORE "master-lcore"
OPT_MASTER_LCORE_NUM,
+#define OPT_MBUF_POOL_OPS_NAME "mbuf-pool-ops-name"
+ OPT_MBUF_POOL_OPS_NAME_NUM,
#define OPT_PROC_TYPE "proc-type"
OPT_PROC_TYPE_NUM,
#define OPT_NO_HPET "no-hpet"
@@ -81,8 +83,6 @@ enum {
OPT_VFIO_INTR_NUM,
#define OPT_VMWARE_TSC_MAP "vmware-tsc-map"
OPT_VMWARE_TSC_MAP_NUM,
-#define OPT_XEN_DOM0 "xen-dom0"
- OPT_XEN_DOM0_NUM,
OPT_LONG_MAX_NUM
};
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 597d82e4..462226f1 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -35,8 +35,8 @@
#define _EAL_PRIVATE_H_
#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
-#include <rte_pci.h>
/**
* Initialize the memzone subsystem (private to eal).
@@ -109,137 +109,6 @@ int rte_eal_timer_init(void);
*/
int rte_eal_log_init(const char *id, int facility);
-struct rte_pci_driver;
-struct rte_pci_device;
-
-/**
- * Find the name of a PCI device.
- */
-void pci_name_set(struct rte_pci_device *dev);
-
-/**
- * Add a PCI device to the PCI Bus (append to PCI Device list). This function
- * also updates the bus references of the PCI Device (and the generic device
- * object embedded within.
- *
- * @param pci_dev
- * PCI device to add
- * @return void
- */
-void rte_pci_add_device(struct rte_pci_device *pci_dev);
-
-/**
- * Insert a PCI device in the PCI Bus at a particular location in the device
- * list. It also updates the PCI Bus reference of the new devices to be
- * inserted.
- *
- * @param exist_pci_dev
- * Existing PCI device in PCI Bus
- * @param new_pci_dev
- * PCI device to be added before exist_pci_dev
- * @return void
- */
-void rte_pci_insert_device(struct rte_pci_device *exist_pci_dev,
- struct rte_pci_device *new_pci_dev);
-
-/**
- * Remove a PCI device from the PCI Bus. This sets to NULL the bus references
- * in the PCI device object as well as the generic device object.
- *
- * @param pci_device
- * PCI device to be removed from PCI Bus
- * @return void
- */
-void rte_pci_remove_device(struct rte_pci_device *pci_device);
-
-/**
- * Update a pci device object by asking the kernel for the latest information.
- *
- * This function is private to EAL.
- *
- * @param addr
- * The PCI Bus-Device-Function address to look for
- * @return
- * - 0 on success.
- * - negative on error.
- */
-int pci_update_device(const struct rte_pci_addr *addr);
-
-/**
- * Unbind kernel driver for this device
- *
- * This function is private to EAL.
- *
- * @return
- * 0 on success, negative on error
- */
-int pci_unbind_kernel_driver(struct rte_pci_device *dev);
-
-/**
- * Map the PCI resource of a PCI device in virtual memory
- *
- * This function is private to EAL.
- *
- * @return
- * 0 on success, negative on error
- */
-int pci_uio_map_resource(struct rte_pci_device *dev);
-
-/**
- * Unmap the PCI resource of a PCI device
- *
- * This function is private to EAL.
- */
-void pci_uio_unmap_resource(struct rte_pci_device *dev);
-
-/**
- * Allocate uio resource for PCI device
- *
- * This function is private to EAL.
- *
- * @param dev
- * PCI device to allocate uio resource
- * @param uio_res
- * Pointer to uio resource.
- * If the function returns 0, the pointer will be filled.
- * @return
- * 0 on success, negative on error
- */
-int pci_uio_alloc_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource **uio_res);
-
-/**
- * Free uio resource for PCI device
- *
- * This function is private to EAL.
- *
- * @param dev
- * PCI device to free uio resource
- * @param uio_res
- * Pointer to uio resource.
- */
-void pci_uio_free_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource *uio_res);
-
-/**
- * Map device memory to uio resource
- *
- * This function is private to EAL.
- *
- * @param dev
- * PCI device that has memory information.
- * @param res_idx
- * Memory resource index of the PCI device.
- * @param uio_res
- * uio resource that will keep mapping information.
- * @param map_idx
- * Mapping information index of the uio resource.
- * @return
- * 0 on success, negative on error
- */
-int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
- struct mapped_pci_resource *uio_res, int map_idx);
-
/**
* Init tail queues for non-EAL library structures. This is to allow
* the rings, mempools, etc. lists to be shared among multiple processes
@@ -315,6 +184,17 @@ void set_tsc_freq(void);
uint64_t get_tsc_freq(void);
/**
+ * Get TSC frequency if the architecture supports.
+ *
+ * This function is private to the EAL.
+ *
+ * @return
+ * The number of TSC cycles in one second.
+ * Returns zero if the architecture support is not available.
+ */
+uint64_t get_tsc_freq_arch(void);
+
+/**
* Prepare physical memory mapping
* i.e. hugepages on Linux and
* contigmem on BSD.
@@ -333,17 +213,6 @@ int rte_eal_hugepage_init(void);
int rte_eal_hugepage_attach(void);
/**
- * Returns true if the system is able to obtain
- * physical addresses. Return false if using DMA
- * addresses through an IOMMU.
- *
- * Drivers based on uio will not load unless physical
- * addresses are obtainable. It is only possible to get
- * physical addresses when running as a privileged user.
- */
-bool rte_eal_using_phys_addrs(void);
-
-/**
* Find a bus capable of identifying a device.
*
* @param str
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index 782350d1..aa887a97 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -136,7 +136,7 @@ vgetq_lane_p64(poly64x2_t x, const int lane)
#endif
/*
- * If (0 <= index <= 15), then call the ASIMD ext intruction on the
+ * If (0 <= index <= 15), then call the ASIMD ext instruction on the
* 128 bit regs v0 and v1 with the appropriate index.
*
* Else returns a zero vector.
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
index 2e04c759..fb3abf18 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
@@ -81,7 +81,7 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
: "memory" ); /* no-clobber list */
#else
asm volatile (
- "mov %%ebx, %%edi\n"
+ "xchgl %%ebx, %%edi;\n"
MPLOCKED
"cmpxchg8b (%[dst]);"
"setz %[res];"
diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
new file mode 100644
index 00000000..010d752c
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_bitmap.h
@@ -0,0 +1,561 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_BITMAP_H__
+#define __INCLUDE_RTE_BITMAP_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Bitmap
+ *
+ * The bitmap component provides a mechanism to manage large arrays of bits
+ * through bit get/set/clear and bit array scan operations.
+ *
+ * The bitmap scan operation is optimized for 64-bit CPUs using 64/128 byte cache
+ * lines. The bitmap is hierarchically organized using two arrays (array1 and
+ * array2), with each bit in array1 being associated with a full cache line
+ * (512/1024 bits) of bitmap bits, which are stored in array2: the bit in array1
+ * is set only when there is at least one bit set within its associated array2
+ * bits, otherwise the bit in array1 is cleared. The read and write operations
+ * for array1 and array2 are always done in slabs of 64 bits.
+ *
+ * This bitmap is not thread safe. For lock free operation on a specific bitmap
+ * instance, a single writer thread performing bit set/clear operations is
+ * allowed, only the writer thread can do bitmap scan operations, while there
+ * can be several reader threads performing bit get operations in parallel with
+ * the writer thread. When the use of locking primitives is acceptable, the
+ * serialization of the bit set/clear and bitmap scan operations needs to be
+ * enforced by the caller, while the bit get operation does not require locking
+ * the bitmap.
+ *
+ ***/
+
+#include <string.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_branch_prediction.h>
+#include <rte_prefetch.h>
+
+#ifndef RTE_BITMAP_OPTIMIZATIONS
+#define RTE_BITMAP_OPTIMIZATIONS 1
+#endif
+
+/* Slab */
+#define RTE_BITMAP_SLAB_BIT_SIZE 64
+#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2 6
+#define RTE_BITMAP_SLAB_BIT_MASK (RTE_BITMAP_SLAB_BIT_SIZE - 1)
+
+/* Cache line (CL) */
+#define RTE_BITMAP_CL_BIT_SIZE (RTE_CACHE_LINE_SIZE * 8)
+#define RTE_BITMAP_CL_BIT_SIZE_LOG2 (RTE_CACHE_LINE_SIZE_LOG2 + 3)
+#define RTE_BITMAP_CL_BIT_MASK (RTE_BITMAP_CL_BIT_SIZE - 1)
+
+#define RTE_BITMAP_CL_SLAB_SIZE (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE)
+#define RTE_BITMAP_CL_SLAB_SIZE_LOG2 (RTE_BITMAP_CL_BIT_SIZE_LOG2 - RTE_BITMAP_SLAB_BIT_SIZE_LOG2)
+#define RTE_BITMAP_CL_SLAB_MASK (RTE_BITMAP_CL_SLAB_SIZE - 1)
+
+/** Bitmap data structure */
+struct rte_bitmap {
+ /* Context for array1 and array2 */
+ uint64_t *array1; /**< Bitmap array1 */
+ uint64_t *array2; /**< Bitmap array2 */
+ uint32_t array1_size; /**< Number of 64-bit slabs in array1 that are actually used */
+ uint32_t array2_size; /**< Number of 64-bit slabs in array2 */
+
+ /* Context for the "scan next" operation */
+ uint32_t index1; /**< Bitmap scan: Index of current array1 slab */
+ uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */
+ uint32_t index2; /**< Bitmap scan: Index of current array2 slab */
+ uint32_t go2; /**< Bitmap scan: Go/stop condition for current array2 cache line */
+
+ /* Storage space for array1 and array2 */
+ uint8_t memory[];
+};
+
+static inline void
+__rte_bitmap_index1_inc(struct rte_bitmap *bmp)
+{
+ bmp->index1 = (bmp->index1 + 1) & (bmp->array1_size - 1);
+}
+
+static inline uint64_t
+__rte_bitmap_mask1_get(struct rte_bitmap *bmp)
+{
+ return (~1lu) << bmp->offset1;
+}
+
+static inline void
+__rte_bitmap_index2_set(struct rte_bitmap *bmp)
+{
+ bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2);
+}
+
+#if RTE_BITMAP_OPTIMIZATIONS
+
+static inline int
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+ if (likely(slab == 0)) {
+ return 0;
+ }
+
+ *pos = __builtin_ctzll(slab);
+ return 1;
+}
+
+#else
+
+static inline int
+rte_bsf64(uint64_t slab, uint32_t *pos)
+{
+ uint64_t mask;
+ uint32_t i;
+
+ if (likely(slab == 0)) {
+ return 0;
+ }
+
+ for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) {
+ if (unlikely(slab & mask)) {
+ *pos = i;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#endif
+
+static inline uint32_t
+__rte_bitmap_get_memory_footprint(uint32_t n_bits,
+ uint32_t *array1_byte_offset, uint32_t *array1_slabs,
+ uint32_t *array2_byte_offset, uint32_t *array2_slabs)
+{
+ uint32_t n_slabs_context, n_slabs_array1, n_cache_lines_context_and_array1;
+ uint32_t n_cache_lines_array2;
+ uint32_t n_bytes_total;
+
+ n_cache_lines_array2 = (n_bits + RTE_BITMAP_CL_BIT_SIZE - 1) / RTE_BITMAP_CL_BIT_SIZE;
+ n_slabs_array1 = (n_cache_lines_array2 + RTE_BITMAP_SLAB_BIT_SIZE - 1) / RTE_BITMAP_SLAB_BIT_SIZE;
+ n_slabs_array1 = rte_align32pow2(n_slabs_array1);
+ n_slabs_context = (sizeof(struct rte_bitmap) + (RTE_BITMAP_SLAB_BIT_SIZE / 8) - 1) / (RTE_BITMAP_SLAB_BIT_SIZE / 8);
+ n_cache_lines_context_and_array1 = (n_slabs_context + n_slabs_array1 + RTE_BITMAP_CL_SLAB_SIZE - 1) / RTE_BITMAP_CL_SLAB_SIZE;
+ n_bytes_total = (n_cache_lines_context_and_array1 + n_cache_lines_array2) * RTE_CACHE_LINE_SIZE;
+
+ if (array1_byte_offset) {
+ *array1_byte_offset = n_slabs_context * (RTE_BITMAP_SLAB_BIT_SIZE / 8);
+ }
+ if (array1_slabs) {
+ *array1_slabs = n_slabs_array1;
+ }
+ if (array2_byte_offset) {
+ *array2_byte_offset = n_cache_lines_context_and_array1 * RTE_CACHE_LINE_SIZE;
+ }
+ if (array2_slabs) {
+ *array2_slabs = n_cache_lines_array2 * RTE_BITMAP_CL_SLAB_SIZE;
+ }
+
+ return n_bytes_total;
+}
+
+static inline void
+__rte_bitmap_scan_init(struct rte_bitmap *bmp)
+{
+ bmp->index1 = bmp->array1_size - 1;
+ bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1;
+ __rte_bitmap_index2_set(bmp);
+ bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE;
+
+ bmp->go2 = 0;
+}
+
+/**
+ * Bitmap memory footprint calculation
+ *
+ * @param n_bits
+ * Number of bits in the bitmap
+ * @return
+ * Bitmap memory footprint measured in bytes on success, 0 on error
+ */
+static inline uint32_t
+rte_bitmap_get_memory_footprint(uint32_t n_bits) {
+ /* Check input arguments */
+ if (n_bits == 0) {
+ return 0;
+ }
+
+ return __rte_bitmap_get_memory_footprint(n_bits, NULL, NULL, NULL, NULL);
+}
+
+/**
+ * Bitmap initialization
+ *
+ * @param mem_size
+ * Minimum expected size of bitmap.
+ * @param mem
+ * Base address of array1 and array2.
+ * @param n_bits
+ * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @return
+ * Handle to bitmap instance.
+ */
+static inline struct rte_bitmap *
+rte_bitmap_init(uint32_t n_bits, uint8_t *mem, uint32_t mem_size)
+{
+ struct rte_bitmap *bmp;
+ uint32_t array1_byte_offset, array1_slabs, array2_byte_offset, array2_slabs;
+ uint32_t size;
+
+ /* Check input arguments */
+ if (n_bits == 0) {
+ return NULL;
+ }
+
+ if ((mem == NULL) || (((uintptr_t) mem) & RTE_CACHE_LINE_MASK)) {
+ return NULL;
+ }
+
+ size = __rte_bitmap_get_memory_footprint(n_bits,
+ &array1_byte_offset, &array1_slabs,
+ &array2_byte_offset, &array2_slabs);
+ if (size < mem_size) {
+ return NULL;
+ }
+
+ /* Setup bitmap */
+ memset(mem, 0, size);
+ bmp = (struct rte_bitmap *) mem;
+
+ bmp->array1 = (uint64_t *) &mem[array1_byte_offset];
+ bmp->array1_size = array1_slabs;
+ bmp->array2 = (uint64_t *) &mem[array2_byte_offset];
+ bmp->array2_size = array2_slabs;
+
+ __rte_bitmap_scan_init(bmp);
+
+ return bmp;
+}
+
+/**
+ * Bitmap free
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @return
+ * 0 upon success, error code otherwise
+ */
+static inline int
+rte_bitmap_free(struct rte_bitmap *bmp)
+{
+ /* Check input arguments */
+ if (bmp == NULL) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Bitmap reset
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ */
+static inline void
+rte_bitmap_reset(struct rte_bitmap *bmp)
+{
+ memset(bmp->array1, 0, bmp->array1_size * sizeof(uint64_t));
+ memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t));
+ __rte_bitmap_scan_init(bmp);
+}
+
+/**
+ * Bitmap location prefetch into CPU L1 cache
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ * @return
+ * 0 upon success, error code otherwise
+ */
+static inline void
+rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab2;
+ uint32_t index2;
+
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ slab2 = bmp->array2 + index2;
+ rte_prefetch0((void *) slab2);
+}
+
+/**
+ * Bitmap bit get
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ * @return
+ * 0 when bit is cleared, non-zero when bit is set
+ */
+static inline uint64_t
+rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab2;
+ uint32_t index2, offset2;
+
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ return (*slab2) & (1lu << offset2);
+}
+
+/**
+ * Bitmap bit set
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ */
+static inline void
+rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1, offset2;
+
+ /* Set bit in array2 slab and set bit in array1 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ slab1 = bmp->array1 + index1;
+
+ *slab2 |= 1lu << offset2;
+ *slab1 |= 1lu << offset1;
+}
+
+/**
+ * Bitmap slab set
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position identifying the array2 slab
+ * @param slab
+ * Value to be assigned to the 64-bit slab in array2
+ */
+static inline void
+rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1;
+
+ /* Set bits in array2 slab and set bit in array1 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+ slab1 = bmp->array1 + index1;
+
+ *slab2 |= slab;
+ *slab1 |= 1lu << offset1;
+}
+
+static inline uint64_t
+__rte_bitmap_line_not_empty(uint64_t *slab2)
+{
+ uint64_t v1, v2, v3, v4;
+
+ v1 = slab2[0] | slab2[1];
+ v2 = slab2[2] | slab2[3];
+ v3 = slab2[4] | slab2[5];
+ v4 = slab2[6] | slab2[7];
+ v1 |= v2;
+ v3 |= v4;
+
+ return v1 | v3;
+}
+
+/**
+ * Bitmap bit clear
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * Bit position
+ */
+static inline void
+rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
+{
+ uint64_t *slab1, *slab2;
+ uint32_t index1, index2, offset1, offset2;
+
+ /* Clear bit in array2 slab */
+ index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
+ slab2 = bmp->array2 + index2;
+
+ /* Return if array2 slab is not all-zeros */
+ *slab2 &= ~(1lu << offset2);
+ if (*slab2){
+ return;
+ }
+
+ /* Check the entire cache line of array2 for all-zeros */
+ index2 &= ~ RTE_BITMAP_CL_SLAB_MASK;
+ slab2 = bmp->array2 + index2;
+ if (__rte_bitmap_line_not_empty(slab2)) {
+ return;
+ }
+
+ /* The array2 cache line is all-zeros, so clear bit in array1 slab */
+ index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
+ offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
+ slab1 = bmp->array1 + index1;
+ *slab1 &= ~(1lu << offset1);
+
+ return;
+}
+
+static inline int
+__rte_bitmap_scan_search(struct rte_bitmap *bmp)
+{
+ uint64_t value1;
+ uint32_t i;
+
+ /* Check current array1 slab */
+ value1 = bmp->array1[bmp->index1];
+ value1 &= __rte_bitmap_mask1_get(bmp);
+
+ if (rte_bsf64(value1, &bmp->offset1)) {
+ return 1;
+ }
+
+ __rte_bitmap_index1_inc(bmp);
+ bmp->offset1 = 0;
+
+ /* Look for another array1 slab */
+ for (i = 0; i < bmp->array1_size; i ++, __rte_bitmap_index1_inc(bmp)) {
+ value1 = bmp->array1[bmp->index1];
+
+ if (rte_bsf64(value1, &bmp->offset1)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static inline void
+__rte_bitmap_scan_read_init(struct rte_bitmap *bmp)
+{
+ __rte_bitmap_index2_set(bmp);
+ bmp->go2 = 1;
+ rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8));
+}
+
+static inline int
+__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+ uint64_t *slab2;
+
+ slab2 = bmp->array2 + bmp->index2;
+ for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) {
+ if (*slab2) {
+ *pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
+ *slab = *slab2;
+
+ bmp->index2 ++;
+ slab2 ++;
+ bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Bitmap scan (with automatic wrap-around)
+ *
+ * @param bmp
+ * Handle to bitmap instance
+ * @param pos
+ * When function call returns 1, pos contains the position of the next set
+ * bit, otherwise not modified
+ * @param slab
+ * When function call returns 1, slab contains the value of the entire 64-bit
+ * slab where the bit indicated by pos is located. Slabs are always 64-bit
+ * aligned, so the position of the first bit of the slab (this bit is not
+ * necessarily set) is pos / 64. Once a slab has been returned by the bitmap
+ * scan operation, the internal pointers of the bitmap are updated to point
+ * after this slab, so the same slab will not be returned again if it
+ * contains more than one bit which is set. When function call returns 0,
+ * slab is not modified.
+ * @return
+ * 0 if there is no bit set in the bitmap, 1 otherwise
+ */
+static inline int
+rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab)
+{
+ /* Return data from current array2 line if available */
+ if (__rte_bitmap_scan_read(bmp, pos, slab)) {
+ return 1;
+ }
+
+ /* Look for non-empty array2 line */
+ if (__rte_bitmap_scan_search(bmp)) {
+ __rte_bitmap_scan_read_init(bmp);
+ __rte_bitmap_scan_read(bmp, pos, slab);
+ return 1;
+ }
+
+ /* Empty bitmap */
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_BITMAP_H__ */
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index c79368d3..6fb08341 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -55,6 +55,21 @@ extern "C" {
/** Double linked list of buses */
TAILQ_HEAD(rte_bus_list, rte_bus);
+
+/**
+ * IOVA mapping mode.
+ *
+ * IOVA mapping mode is iommu programming mode of a device.
+ * That device (for example: IOMMU backed DMA device) based
+ * on rte_iova_mode will generate physical or virtual address.
+ *
+ */
+enum rte_iova_mode {
+ RTE_IOVA_DC = 0, /* Don't care mode */
+ RTE_IOVA_PA = (1 << 0), /* DMA using physical address */
+ RTE_IOVA_VA = (1 << 1) /* DMA using virtual address */
+};
+
/**
* Bus specific scan for devices attached on the bus.
* For each bus object, the scan would be responsible for finding devices and
@@ -168,6 +183,20 @@ struct rte_bus_conf {
enum rte_bus_scan_mode scan_mode; /**< Scan policy. */
};
+
+/**
+ * Get common iommu class of the all the devices on the bus. The bus may
+ * check that those devices are attached to iommu driver.
+ * If no devices are attached to the bus. The bus may return with don't care
+ * (_DC) value.
+ * Otherwise, The bus will return appropriate _pa or _va iova mode.
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+typedef enum rte_iova_mode (*rte_bus_get_iommu_class_t)(void);
+
+
/**
* A structure describing a generic bus.
*/
@@ -181,6 +210,7 @@ struct rte_bus {
rte_bus_unplug_t unplug; /**< Remove single device from driver */
rte_bus_parse_t parse; /**< Parse a device name */
struct rte_bus_conf conf; /**< Bus configuration */
+ rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
};
/**
@@ -280,12 +310,22 @@ struct rte_bus *rte_bus_find_by_device(const struct rte_device *dev);
*/
struct rte_bus *rte_bus_find_by_name(const char *busname);
+
+/**
+ * Get the common iommu class of devices bound on to buses available in the
+ * system. The default mode is PA.
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+enum rte_iova_mode rte_bus_get_iommu_class(void);
+
/**
* Helper for Bus registration.
* The constructor has higher priority than PMD constructors.
*/
#define RTE_REGISTER_BUS(nm, bus) \
-RTE_INIT_PRIO(businitfn_ ##nm, 101); \
+RTE_INIT_PRIO(businitfn_ ##nm, 110); \
static void businitfn_ ##nm(void) \
{\
(bus).name = RTE_STR(nm);\
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 1afc66e3..de853e16 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -109,6 +109,29 @@ typedef uint16_t unaligned_uint16_t;
#define RTE_SET_USED(x) (void)(x)
/**
+ * Run function before main() with low priority.
+ *
+ * The constructor will be run after prioritized constructors.
+ *
+ * @param func
+ * Constructor function.
+ */
+#define RTE_INIT(func) \
+static void __attribute__((constructor, used)) func(void)
+
+/**
+ * Run function before main() with high priority.
+ *
+ * @param func
+ * Constructor function.
+ * @param prio
+ * Priority number must be above 100.
+ * Lowest number is the first to run.
+ */
+#define RTE_INIT_PRIO(func, prio) \
+static void __attribute__((constructor(prio), used)) func(void)
+
+/**
* Force a function to be inlined
*/
#define __rte_always_inline inline __attribute__((always_inline))
diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h
index cab6fb4c..79b67b3e 100644
--- a/lib/librte_eal/common/include/rte_debug.h
+++ b/lib/librte_eal/common/include/rte_debug.h
@@ -79,7 +79,7 @@ void rte_dump_registers(void);
#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy")
#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__)
-#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG
+#ifdef RTE_ENABLE_ASSERT
#define RTE_ASSERT(exp) RTE_VERIFY(exp)
#else
#define RTE_ASSERT(exp) do {} while (0)
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 5386d3a2..9342e0cb 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -49,7 +49,6 @@ extern "C" {
#include <stdio.h>
#include <sys/queue.h>
-#include <rte_config.h>
#include <rte_log.h>
__attribute__((format(printf, 2, 0)))
@@ -152,7 +151,11 @@ struct rte_driver {
const char *alias; /**< Driver alias. */
};
-#define RTE_DEV_NAME_MAX_LEN (32)
+/*
+ * Internal identifier length
+ * Sufficiently large to allow for UUID or PCI address
+ */
+#define RTE_DEV_NAME_MAX_LEN 64
/**
* A structure describing a generic device.
@@ -166,28 +169,6 @@ struct rte_device {
};
/**
- * Initialize a driver specified by name.
- *
- * @param name
- * The pointer to a driver name to be initialized.
- * @param args
- * The pointer to arguments used by driver initialization.
- * @return
- * 0 on success, negative on error
- */
-int rte_vdev_init(const char *name, const char *args);
-
-/**
- * Uninitalize a driver specified by name.
- *
- * @param name
- * The pointer to a driver name to be initialized.
- * @return
- * 0 on success, negative on error
- */
-int rte_vdev_uninit(const char *name);
-
-/**
* Attach a device to a registered driver.
*
* @param name
@@ -312,4 +293,4 @@ __attribute__((used)) = str
}
#endif
-#endif /* _RTE_VDEV_H_ */
+#endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 0e7363d7..09b66819 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -44,7 +44,9 @@
#include <sched.h>
#include <rte_per_lcore.h>
-#include <rte_config.h>
+#include <rte_bus.h>
+
+#include <rte_pci_dev_feature_defs.h>
#ifdef __cplusplus
extern "C" {
@@ -87,6 +89,9 @@ struct rte_config {
/** Primary or secondary configuration */
enum rte_proc_type_t process_type;
+ /** PA or VA mapping mode */
+ enum rte_iova_mode iova_mode;
+
/**
* Pointer to memory configuration, which may be shared across multiple
* DPDK instances
@@ -264,6 +269,32 @@ rte_set_application_usage_hook(rte_usage_hook_t usage_func);
int rte_eal_has_hugepages(void);
/**
+ * Whether EAL is using PCI bus.
+ * Disabled by --no-pci option.
+ *
+ * @return
+ * Nonzero if the PCI bus is enabled.
+ */
+int rte_eal_has_pci(void);
+
+/**
+ * Whether the EAL was asked to create UIO device.
+ *
+ * @return
+ * Nonzero if true.
+ */
+int rte_eal_create_uio_dev(void);
+
+/**
+ * The user-configured vfio interrupt mode.
+ *
+ * @return
+ * Interrupt mode configured with the command line,
+ * RTE_INTR_MODE_NONE by default.
+ */
+enum rte_intr_mode rte_eal_vfio_intr_mode(void);
+
+/**
* A wrap API for syscall gettid.
*
* @return
@@ -287,11 +318,22 @@ static inline int rte_gettid(void)
return RTE_PER_LCORE(_thread_id);
}
-#define RTE_INIT(func) \
-static void __attribute__((constructor, used)) func(void)
+/**
+ * Get the iova mode
+ *
+ * @return
+ * enum rte_iova_mode value.
+ */
+enum rte_iova_mode rte_eal_iova_mode(void);
-#define RTE_INIT_PRIO(func, prio) \
-static void __attribute__((constructor(prio), used)) func(void)
+/**
+ * Get default pool ops name for mbuf
+ *
+ * @return
+ * returns default pool ops name.
+ */
+const char *
+rte_eal_mbuf_default_mempool_ops(void);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h
index 6daffebf..031f78cc 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/common/include/rte_eal_interrupts.h
@@ -35,15 +35,26 @@
#error "don't include this file directly, please include generic <rte_interrupts.h>"
#endif
-#ifndef _RTE_LINUXAPP_INTERRUPTS_H_
-#define _RTE_LINUXAPP_INTERRUPTS_H_
+/**
+ * @file rte_eal_interrupts.h
+ * @internal
+ *
+ * Contains function prototypes exposed by the EAL for interrupt handling by
+ * drivers and other DPDK internal consumers.
+ */
+
+#ifndef _RTE_EAL_INTERRUPTS_H_
+#define _RTE_EAL_INTERRUPTS_H_
#define RTE_MAX_RXTX_INTR_VEC_ID 32
#define RTE_INTR_VEC_ZERO_OFFSET 0
#define RTE_INTR_VEC_RXTX_OFFSET 1
+/**
+ * The interrupt source type, e.g. UIO, VFIO, ALARM etc.
+ */
enum rte_intr_handle_type {
- RTE_INTR_HANDLE_UNKNOWN = 0,
+ RTE_INTR_HANDLE_UNKNOWN = 0, /**< generic unknown handle */
RTE_INTR_HANDLE_UIO, /**< uio device handle */
RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */
RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */
@@ -52,7 +63,7 @@ enum rte_intr_handle_type {
RTE_INTR_HANDLE_ALARM, /**< alarm handle */
RTE_INTR_HANDLE_EXT, /**< external handler */
RTE_INTR_HANDLE_VDEV, /**< virtual device */
- RTE_INTR_HANDLE_MAX
+ RTE_INTR_HANDLE_MAX /**< count of elements */
};
#define RTE_INTR_EVENT_ADD 1UL
@@ -86,13 +97,13 @@ struct rte_intr_handle {
RTE_STD_C11
union {
int vfio_dev_fd; /**< VFIO device file descriptor */
- int uio_cfg_fd; /**< UIO config file descriptor
- for uio_pci_generic */
+ int uio_cfg_fd; /**< UIO cfg file desc for uio_pci_generic */
};
int fd; /**< interrupt event file descriptor */
enum rte_intr_handle_type type; /**< handle type */
uint32_t max_intr; /**< max interrupt requested */
uint32_t nb_efd; /**< number of available efd(event fd) */
+ uint8_t efd_counter_size; /**< size of efd counter, used for vdev */
int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */
struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID];
/**< intr vector epoll event */
@@ -236,4 +247,4 @@ rte_intr_allow_others(struct rte_intr_handle *intr_handle);
int
rte_intr_cap_multiple(struct rte_intr_handle *intr_handle);
-#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */
+#endif /* _RTE_EAL_INTERRUPTS_H_ */
diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h
index 5d06ed79..43177c7a 100644
--- a/lib/librte_eal/common/include/rte_interrupts.h
+++ b/lib/librte_eal/common/include/rte_interrupts.h
@@ -53,7 +53,7 @@ struct rte_intr_handle;
/** Function to be registered for the specific interrupt */
typedef void (*rte_intr_callback_fn)(void *cb_arg);
-#include <exec-env/rte_interrupts.h>
+#include "rte_eal_interrupts.h"
/**
* It registers the callback for the specific interrupt. Multiple
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
index 50e0d0fe..c89e6bab 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -262,6 +262,20 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp);
*/
int rte_thread_setname(pthread_t id, const char *name);
+/**
+ * Test if the core supplied has a specific role
+ *
+ * @param lcore_id
+ * The identifier of the lcore, which MUST be between 0 and
+ * RTE_MAX_LCORE-1.
+ * @param role
+ * The role to be checked against.
+ * @return
+ * On success, return 0; otherwise return a negative value.
+ */
+int
+rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index ec8dba79..16564d41 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -87,6 +87,7 @@ extern struct rte_logs rte_logs;
#define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */
#define RTE_LOGTYPE_EFD 18 /**< Log related to EFD. */
#define RTE_LOGTYPE_EVENTDEV 19 /**< Log related to eventdev. */
+#define RTE_LOGTYPE_GSO 20 /**< Log related to GSO. */
/* these log types can be used in an application */
#define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */
@@ -138,12 +139,6 @@ int rte_openlog_stream(FILE *f);
void rte_log_set_global_level(uint32_t level);
/**
- * Deprecated, replaced by rte_log_set_global_level().
- */
-__rte_deprecated
-void rte_set_log_level(uint32_t level);
-
-/**
* Get the global log level.
*
* @return
@@ -152,29 +147,6 @@ void rte_set_log_level(uint32_t level);
uint32_t rte_log_get_global_level(void);
/**
- * Deprecated, replaced by rte_log_get_global_level().
- */
-__rte_deprecated
-uint32_t rte_get_log_level(void);
-
-/**
- * Enable or disable the log type.
- *
- * @param type
- * Log type, for example, RTE_LOGTYPE_EAL.
- * @param enable
- * True for enable; false for disable.
- */
-__rte_deprecated
-void rte_set_log_type(uint32_t type, int enable);
-
-/**
- * Get the global log type.
- */
-__rte_deprecated
-uint32_t rte_get_log_type(void);
-
-/**
* Get the log level for a given type.
*
* @param logtype
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index 3d37f79b..5d4c11a7 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -323,17 +323,24 @@ int
rte_malloc_set_limit(const char *type, size_t max);
/**
- * Return the physical address of a virtual address obtained through
+ * Return the IO address of a virtual address obtained through
* rte_malloc
*
* @param addr
* Address obtained from a previous rte_malloc call
* @return
- * RTE_BAD_PHYS_ADDR on error
- * otherwise return physical address of the buffer
+ * RTE_BAD_IOVA on error
+ * otherwise return an address suitable for IO
*/
-phys_addr_t
-rte_malloc_virt2phy(const void *addr);
+rte_iova_t
+rte_malloc_virt2iova(const void *addr);
+
+__rte_deprecated
+static inline phys_addr_t
+rte_malloc_virt2phy(const void *addr)
+{
+ return rte_malloc_virt2iova(addr);
+}
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 4aa5d1f7..14aacea5 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -44,12 +44,6 @@
#include <stddef.h>
#include <stdio.h>
-#include <rte_config.h>
-
-#ifdef RTE_EXEC_ENV_LINUXAPP
-#include <exec-env/rte_dom0_common.h>
-#endif
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -98,14 +92,27 @@ enum rte_page_sizes {
*/
#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
-typedef uint64_t phys_addr_t; /**< Physical address definition. */
+typedef uint64_t phys_addr_t; /**< Physical address. */
#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1)
+/**
+ * IO virtual address type.
+ * When the physical addressing mode (IOVA as PA) is in use,
+ * the translation from an IO virtual address (IOVA) to a physical address
+ * is a direct mapping, i.e. the same value.
+ * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation.
+ */
+typedef uint64_t rte_iova_t;
+#define RTE_BAD_IOVA ((rte_iova_t)-1)
/**
* Physical memory segment descriptor.
*/
struct rte_memseg {
- phys_addr_t phys_addr; /**< Start physical address. */
+ RTE_STD_C11
+ union {
+ phys_addr_t phys_addr; /**< deprecated - Start physical address. */
+ rte_iova_t iova; /**< Start IO address. */
+ };
RTE_STD_C11
union {
void *addr; /**< Start virtual address. */
@@ -116,10 +123,6 @@ struct rte_memseg {
int32_t socket_id; /**< NUMA socket ID. */
uint32_t nchannel; /**< Number of channels. */
uint32_t nrank; /**< Number of ranks. */
-#ifdef RTE_LIBRTE_XEN_DOM0
- /**< store segment MFNs */
- uint64_t mfn[DOM0_NUM_MEMBLOCK];
-#endif
} __rte_packed;
/**
@@ -140,11 +143,21 @@ int rte_mem_lock_page(const void *virt);
* @param virt
* The virtual address.
* @return
- * The physical address or RTE_BAD_PHYS_ADDR on error.
+ * The physical address or RTE_BAD_IOVA on error.
*/
phys_addr_t rte_mem_virt2phy(const void *virt);
/**
+ * Get IO virtual address of any mapped virtual address in the current process.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * The IO address or RTE_BAD_IOVA on error.
+ */
+rte_iova_t rte_mem_virt2iova(const void *virt);
+
+/**
* Get the layout of the available physical memory.
*
* It can be useful for an application to have the full physical
@@ -195,68 +208,16 @@ unsigned rte_memory_get_nchannel(void);
*/
unsigned rte_memory_get_nrank(void);
-#ifdef RTE_LIBRTE_XEN_DOM0
-
-/**< Internal use only - should DOM0 memory mapping be used */
-int rte_xen_dom0_supported(void);
-
-/**< Internal use only - phys to virt mapping for xen */
-phys_addr_t rte_xen_mem_phy2mch(int32_t, const phys_addr_t);
-
/**
- * Return the physical address of elt, which is an element of the pool mp.
- *
- * @param memseg_id
- * Identifier of the memory segment owning the physical address. If
- * set to -1, find it automatically.
- * @param phy_addr
- * physical address of elt.
- *
- * @return
- * The physical address or RTE_BAD_PHYS_ADDR on error.
- */
-static inline phys_addr_t
-rte_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr)
-{
- if (rte_xen_dom0_supported())
- return rte_xen_mem_phy2mch(memseg_id, phy_addr);
- else
- return phy_addr;
-}
-
-/**
- * Memory init for supporting application running on Xen domain0.
- *
- * @param void
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
*
* @return
- * 0: successfully
- * negative: error
+ * 1 if the system is able to obtain physical addresses.
+ * 0 if using DMA addresses through an IOMMU.
*/
-int rte_xen_dom0_memory_init(void);
-
-/**
- * Attach to memory setments of primary process on Xen domain0.
- *
- * @param void
- *
- * @return
- * 0: successfully
- * negative: error
- */
-int rte_xen_dom0_memory_attach(void);
-#else
-static inline int rte_xen_dom0_supported(void)
-{
- return 0;
-}
-
-static inline phys_addr_t
-rte_mem_phy2mch(int32_t memseg_id __rte_unused, const phys_addr_t phy_addr)
-{
- return phy_addr;
-}
-#endif
+int rte_eal_using_phys_addrs(void);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index 1d0827f4..6f0ba182 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -78,7 +78,11 @@ struct rte_memzone {
#define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/
char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */
- phys_addr_t phys_addr; /**< Start physical address. */
+ RTE_STD_C11
+ union {
+ phys_addr_t phys_addr; /**< deprecated - Start physical address. */
+ rte_iova_t iova; /**< Start IO address. */
+ };
RTE_STD_C11
union {
void *addr; /**< Start virtual address. */
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
deleted file mode 100644
index 8b123391..00000000
--- a/lib/librte_eal/common/include/rte_pci.h
+++ /dev/null
@@ -1,598 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- * Copyright 2013-2014 6WIND S.A.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _RTE_PCI_H_
-#define _RTE_PCI_H_
-
-/**
- * @file
- *
- * RTE PCI Interface
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <errno.h>
-#include <sys/queue.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include <rte_debug.h>
-#include <rte_interrupts.h>
-#include <rte_dev.h>
-#include <rte_bus.h>
-
-/** Pathname of PCI devices directory. */
-const char *pci_get_sysfs_path(void);
-
-/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
-#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
-#define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X")
-
-/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */
-#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
-
-/** Nb. of values in PCI device identifier format string. */
-#define PCI_FMT_NVAL 4
-
-/** Nb. of values in PCI resource format. */
-#define PCI_RESOURCE_FMT_NVAL 3
-
-/** Maximum number of PCI resources. */
-#define PCI_MAX_RESOURCE 6
-
-/* Forward declarations */
-struct rte_pci_device;
-struct rte_pci_driver;
-
-/** List of PCI devices */
-TAILQ_HEAD(rte_pci_device_list, rte_pci_device);
-/** List of PCI drivers */
-TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver);
-
-/* PCI Bus iterators */
-#define FOREACH_DEVICE_ON_PCIBUS(p) \
- TAILQ_FOREACH(p, &(rte_pci_bus.device_list), next)
-
-#define FOREACH_DRIVER_ON_PCIBUS(p) \
- TAILQ_FOREACH(p, &(rte_pci_bus.driver_list), next)
-
-/**
- * A structure describing an ID for a PCI driver. Each driver provides a
- * table of these IDs for each device that it supports.
- */
-struct rte_pci_id {
- uint32_t class_id; /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */
- uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */
- uint16_t device_id; /**< Device ID or PCI_ANY_ID. */
- uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
- uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */
-};
-
-/**
- * A structure describing the location of a PCI device.
- */
-struct rte_pci_addr {
- uint32_t domain; /**< Device domain */
- uint8_t bus; /**< Device bus */
- uint8_t devid; /**< Device ID */
- uint8_t function; /**< Device function. */
-};
-
-struct rte_devargs;
-
-/**
- * A structure describing a PCI device.
- */
-struct rte_pci_device {
- TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */
- struct rte_device device; /**< Inherit core device */
- struct rte_pci_addr addr; /**< PCI location. */
- struct rte_pci_id id; /**< PCI ID. */
- struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
- /**< PCI Memory Resource */
- struct rte_intr_handle intr_handle; /**< Interrupt handle */
- struct rte_pci_driver *driver; /**< Associated driver */
- uint16_t max_vfs; /**< sriov enable if not zero */
- enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
- char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */
-};
-
-/**
- * @internal
- * Helper macro for drivers that need to convert to struct rte_pci_device.
- */
-#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device)
-
-/** Any PCI device identifier (vendor, device, ...) */
-#define PCI_ANY_ID (0xffff)
-#define RTE_CLASS_ANY_ID (0xffffff)
-
-#ifdef __cplusplus
-/** C++ macro used to help building up tables of device IDs */
-#define RTE_PCI_DEVICE(vend, dev) \
- RTE_CLASS_ANY_ID, \
- (vend), \
- (dev), \
- PCI_ANY_ID, \
- PCI_ANY_ID
-#else
-/** Macro used to help building up tables of device IDs */
-#define RTE_PCI_DEVICE(vend, dev) \
- .class_id = RTE_CLASS_ANY_ID, \
- .vendor_id = (vend), \
- .device_id = (dev), \
- .subsystem_vendor_id = PCI_ANY_ID, \
- .subsystem_device_id = PCI_ANY_ID
-#endif
-
-/**
- * Initialisation function for the driver called during PCI probing.
- */
-typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *);
-
-/**
- * Uninitialisation function for the driver called during hotplugging.
- */
-typedef int (pci_remove_t)(struct rte_pci_device *);
-
-/**
- * A structure describing a PCI driver.
- */
-struct rte_pci_driver {
- TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */
- struct rte_driver driver; /**< Inherit core driver. */
- struct rte_pci_bus *bus; /**< PCI bus reference. */
- pci_probe_t *probe; /**< Device Probe function. */
- pci_remove_t *remove; /**< Device Remove function. */
- const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
- uint32_t drv_flags; /**< Flags contolling handling of device. */
-};
-
-/**
- * Structure describing the PCI bus
- */
-struct rte_pci_bus {
- struct rte_bus bus; /**< Inherit the generic class */
- struct rte_pci_device_list device_list; /**< List of PCI devices */
- struct rte_pci_driver_list driver_list; /**< List of PCI drivers */
-};
-
-/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
-#define RTE_PCI_DRV_NEED_MAPPING 0x0001
-/** Device driver supports link state interrupt */
-#define RTE_PCI_DRV_INTR_LSC 0x0008
-/** Device driver supports device removal interrupt */
-#define RTE_PCI_DRV_INTR_RMV 0x0010
-/** Device driver needs to keep mapped resources if unsupported dev detected */
-#define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020
-
-/**
- * A structure describing a PCI mapping.
- */
-struct pci_map {
- void *addr;
- char *path;
- uint64_t offset;
- uint64_t size;
- uint64_t phaddr;
-};
-
-/**
- * A structure describing a mapped PCI resource.
- * For multi-process we need to reproduce all PCI mappings in secondary
- * processes, so save them in a tailq.
- */
-struct mapped_pci_resource {
- TAILQ_ENTRY(mapped_pci_resource) next;
-
- struct rte_pci_addr pci_addr;
- char path[PATH_MAX];
- int nb_maps;
- struct pci_map maps[PCI_MAX_RESOURCE];
-};
-
-/** mapped pci device list */
-TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
-
-/**< Internal use only - Macro used by pci addr parsing functions **/
-#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \
-do { \
- unsigned long val; \
- char *end; \
- errno = 0; \
- val = strtoul((in), &end, 16); \
- if (errno != 0 || end[0] != (dlm) || val > (lim)) \
- return -EINVAL; \
- (fd) = (typeof (fd))val; \
- (in) = end + 1; \
-} while(0)
-
-/**
- * Utility function to produce a PCI Bus-Device-Function value
- * given a string representation. Assumes that the BDF is provided without
- * a domain prefix (i.e. domain returned is always 0)
- *
- * @param input
- * The input string to be parsed. Should have the format XX:XX.X
- * @param dev_addr
- * The PCI Bus-Device-Function address to be returned. Domain will always be
- * returned as 0
- * @return
- * 0 on success, negative on error.
- */
-static inline int
-eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr)
-{
- dev_addr->domain = 0;
- GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
- GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
- GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
- return 0;
-}
-
-/**
- * Utility function to produce a PCI Bus-Device-Function value
- * given a string representation. Assumes that the BDF is provided including
- * a domain prefix.
- *
- * @param input
- * The input string to be parsed. Should have the format XXXX:XX:XX.X
- * @param dev_addr
- * The PCI Bus-Device-Function address to be returned
- * @return
- * 0 on success, negative on error.
- */
-static inline int
-eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
-{
- GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':');
- GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':');
- GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.');
- GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0);
- return 0;
-}
-#undef GET_PCIADDR_FIELD
-
-/**
- * Utility function to write a pci device name, this device name can later be
- * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_*
- * BDF helpers.
- *
- * @param addr
- * The PCI Bus-Device-Function address
- * @param output
- * The output buffer string
- * @param size
- * The output buffer size
- */
-static inline void
-rte_pci_device_name(const struct rte_pci_addr *addr,
- char *output, size_t size)
-{
- RTE_VERIFY(size >= PCI_PRI_STR_SIZE);
- RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT,
- addr->domain, addr->bus,
- addr->devid, addr->function) >= 0);
-}
-
-/* Compare two PCI device addresses. */
-/**
- * Utility function to compare two PCI device addresses.
- *
- * @param addr
- * The PCI Bus-Device-Function address to compare
- * @param addr2
- * The PCI Bus-Device-Function address to compare
- * @return
- * 0 on equal PCI address.
- * Positive on addr is greater than addr2.
- * Negative on addr is less than addr2, or error.
- */
-static inline int
-rte_eal_compare_pci_addr(const struct rte_pci_addr *addr,
- const struct rte_pci_addr *addr2)
-{
- uint64_t dev_addr, dev_addr2;
-
- if ((addr == NULL) || (addr2 == NULL))
- return -1;
-
- dev_addr = ((uint64_t)addr->domain << 24) |
- (addr->bus << 16) | (addr->devid << 8) | addr->function;
- dev_addr2 = ((uint64_t)addr2->domain << 24) |
- (addr2->bus << 16) | (addr2->devid << 8) | addr2->function;
-
- if (dev_addr > dev_addr2)
- return 1;
- else if (dev_addr < dev_addr2)
- return -1;
- else
- return 0;
-}
-
-/**
- * Scan the content of the PCI bus, and the devices in the devices
- * list
- *
- * @return
- * 0 on success, negative on error
- */
-int rte_pci_scan(void);
-
-/**
- * Probe the PCI bus
- *
- * @return
- * - 0 on success.
- * - !0 on error.
- */
-int
-rte_pci_probe(void);
-
-/**
- * Map the PCI device resources in user space virtual memory address
- *
- * Note that driver should not call this function when flag
- * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for
- * you when it's on.
- *
- * @param dev
- * A pointer to a rte_pci_device structure describing the device
- * to use
- *
- * @return
- * 0 on success, negative on error and positive if no driver
- * is found for the device.
- */
-int rte_pci_map_device(struct rte_pci_device *dev);
-
-/**
- * Unmap this device
- *
- * @param dev
- * A pointer to a rte_pci_device structure describing the device
- * to use
- */
-void rte_pci_unmap_device(struct rte_pci_device *dev);
-
-/**
- * @internal
- * Map a particular resource from a file.
- *
- * @param requested_addr
- * The starting address for the new mapping range.
- * @param fd
- * The file descriptor.
- * @param offset
- * The offset for the mapping range.
- * @param size
- * The size for the mapping range.
- * @param additional_flags
- * The additional flags for the mapping range.
- * @return
- * - On success, the function returns a pointer to the mapped area.
- * - On error, the value MAP_FAILED is returned.
- */
-void *pci_map_resource(void *requested_addr, int fd, off_t offset,
- size_t size, int additional_flags);
-
-/**
- * @internal
- * Unmap a particular resource.
- *
- * @param requested_addr
- * The address for the unmapping range.
- * @param size
- * The size for the unmapping range.
- */
-void pci_unmap_resource(void *requested_addr, size_t size);
-
-/**
- * Probe the single PCI device.
- *
- * Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the probe() function for registered driver that has a
- * matching entry in its id_table for discovered device.
- *
- * @param addr
- * The PCI Bus-Device-Function address to probe.
- * @return
- * - 0 on success.
- * - Negative on error.
- */
-int rte_pci_probe_one(const struct rte_pci_addr *addr);
-
-/**
- * Close the single PCI device.
- *
- * Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the remove() function for registered driver that has a
- * matching entry in its id_table for discovered device.
- *
- * @param addr
- * The PCI Bus-Device-Function address to close.
- * @return
- * - 0 on success.
- * - Negative on error.
- */
-int rte_pci_detach(const struct rte_pci_addr *addr);
-
-/**
- * Dump the content of the PCI bus.
- *
- * @param f
- * A pointer to a file for output
- */
-void rte_pci_dump(FILE *f);
-
-/**
- * Register a PCI driver.
- *
- * @param driver
- * A pointer to a rte_pci_driver structure describing the driver
- * to be registered.
- */
-void rte_pci_register(struct rte_pci_driver *driver);
-
-/** Helper for PCI device registration from driver (eth, crypto) instance */
-#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \
-RTE_INIT(pciinitfn_ ##nm); \
-static void pciinitfn_ ##nm(void) \
-{\
- (pci_drv).driver.name = RTE_STR(nm);\
- rte_pci_register(&pci_drv); \
-} \
-RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
-
-/**
- * Unregister a PCI driver.
- *
- * @param driver
- * A pointer to a rte_pci_driver structure describing the driver
- * to be unregistered.
- */
-void rte_pci_unregister(struct rte_pci_driver *driver);
-
-/**
- * Read PCI config space.
- *
- * @param device
- * A pointer to a rte_pci_device structure describing the device
- * to use
- * @param buf
- * A data buffer where the bytes should be read into
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into PCI config space
- */
-int rte_pci_read_config(const struct rte_pci_device *device,
- void *buf, size_t len, off_t offset);
-
-/**
- * Write PCI config space.
- *
- * @param device
- * A pointer to a rte_pci_device structure describing the device
- * to use
- * @param buf
- * A data buffer containing the bytes should be written
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into PCI config space
- */
-int rte_pci_write_config(const struct rte_pci_device *device,
- const void *buf, size_t len, off_t offset);
-
-/**
- * A structure used to access io resources for a pci device.
- * rte_pci_ioport is arch, os, driver specific, and should not be used outside
- * of pci ioport api.
- */
-struct rte_pci_ioport {
- struct rte_pci_device *dev;
- uint64_t base;
- uint64_t len; /* only filled for memory mapped ports */
-};
-
-/**
- * Initialize a rte_pci_ioport object for a pci device io resource.
- *
- * This object is then used to gain access to those io resources (see below).
- *
- * @param dev
- * A pointer to a rte_pci_device structure describing the device
- * to use.
- * @param bar
- * Index of the io pci resource we want to access.
- * @param p
- * The rte_pci_ioport object to be initialized.
- * @return
- * 0 on success, negative on error.
- */
-int rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p);
-
-/**
- * Release any resources used in a rte_pci_ioport object.
- *
- * @param p
- * The rte_pci_ioport object to be uninitialized.
- * @return
- * 0 on success, negative on error.
- */
-int rte_pci_ioport_unmap(struct rte_pci_ioport *p);
-
-/**
- * Read from a io pci resource.
- *
- * @param p
- * The rte_pci_ioport object from which we want to read.
- * @param data
- * A data buffer where the bytes should be read into
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into the pci io resource.
- */
-void rte_pci_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset);
-
-/**
- * Write to a io pci resource.
- *
- * @param p
- * The rte_pci_ioport object to which we want to write.
- * @param data
- * A data buffer where the bytes should be read into
- * @param len
- * The length of the data buffer.
- * @param offset
- * The offset into the pci io resource.
- */
-void rte_pci_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _RTE_PCI_H_ */
diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h
index 7c6f7383..92724406 100644
--- a/lib/librte_eal/common/include/rte_service.h
+++ b/lib/librte_eal/common/include/rte_service.h
@@ -61,9 +61,6 @@ extern "C" {
#include <rte_lcore.h>
-/* forward declaration only. Definition in rte_service_private.h */
-struct rte_service_spec;
-
#define RTE_SERVICE_NAME_MAX 32
/* Capabilities of a service.
@@ -89,40 +86,32 @@ struct rte_service_spec;
*/
uint32_t rte_service_get_count(void);
-
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Return the specification of a service by integer id.
+ * Return the id of a service by name.
*
- * This function provides the specification of a service. This can be used by
- * the application to understand what the service represents. The service
- * must not be modified by the application directly, only passed to the various
- * rte_service_* functions.
- *
- * @param id The integer id of the service to retrieve
- * @retval non-zero A valid pointer to the service_spec
- * @retval NULL Invalid *id* provided.
- */
-struct rte_service_spec *rte_service_get_by_id(uint32_t id);
-
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
+ * This function provides the id of the service using the service name as
+ * lookup key. The service id is to be passed to other functions in the
+ * rte_service_* API.
*
- * Return the specification of a service by name.
- *
- * This function provides the specification of a service using the service name
- * as lookup key. This can be used by the application to understand what the
- * service represents. The service must not be modified by the application
- * directly, only passed to the various rte_service_* functions.
+ * Example usage:
+ * @code
+ * uint32_t service_id;
+ * int32_t ret = rte_service_get_by_name("service_X", &service_id);
+ * if (ret) {
+ * // handle error
+ * }
+ * @endcode
*
* @param name The name of the service to retrieve
- * @retval non-zero A valid pointer to the service_spec
- * @retval NULL Invalid *name* provided.
+ * @param[out] service_id A pointer to a uint32_t, to be filled in with the id.
+ * @retval 0 Success. The service id is provided in *service_id*.
+ * @retval -EINVAL Null *service_id* pointer provided
+ * @retval -ENODEV No such service registered
*/
-struct rte_service_spec *rte_service_get_by_name(const char *name);
+int32_t rte_service_get_by_name(const char *name, uint32_t *service_id);
/**
* @warning
@@ -133,7 +122,7 @@ struct rte_service_spec *rte_service_get_by_name(const char *name);
* @return A pointer to the name of the service. The returned pointer remains
* in ownership of the service, and the application must not free it.
*/
-const char *rte_service_get_name(const struct rte_service_spec *service);
+const char *rte_service_get_name(uint32_t id);
/**
* @warning
@@ -146,17 +135,16 @@ const char *rte_service_get_name(const struct rte_service_spec *service);
* @retval 1 Capability supported by this service instance
* @retval 0 Capability not supported by this service instance
*/
-int32_t rte_service_probe_capability(const struct rte_service_spec *service,
- uint32_t capability);
+int32_t rte_service_probe_capability(uint32_t id, uint32_t capability);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Enable a core to run a service.
+ * Map or unmap a lcore to a service.
*
- * Each core can be added or removed from running specific services. This
- * functions adds *lcore* to the set of cores that will run *service*.
+ * Each core can be added or removed from running a specific service. This
+ * function enables or disables *lcore* to run *service_id*.
*
* If multiple cores are enabled on a service, an atomic is used to ensure that
* only one cores runs the service at a time. The exception to this is when
@@ -164,82 +152,120 @@ int32_t rte_service_probe_capability(const struct rte_service_spec *service,
* called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set,
* the service function can be run on multiple threads at the same time.
*
- * @retval 0 lcore added successfully
+ * @param service_id the service to apply the lcore to
+ * @param lcore The lcore that will be mapped to service
+ * @param enable Zero to unmap or disable the core, non-zero to enable
+ *
+ * @retval 0 lcore map updated successfully
* @retval -EINVAL An invalid service or lcore was provided.
*/
-int32_t rte_service_enable_on_lcore(struct rte_service_spec *service,
- uint32_t lcore);
+int32_t rte_service_map_lcore_set(uint32_t service_id, uint32_t lcore,
+ uint32_t enable);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Disable a core to run a service.
+ * Retrieve the mapping of an lcore to a service.
*
- * Each core can be added or removed from running specific services. This
- * functions removes *lcore* to the set of cores that will run *service*.
+ * @param service_id the service to apply the lcore to
+ * @param lcore The lcore that will be mapped to service
*
- * @retval 0 Lcore removed successfully
+ * @retval 1 lcore is mapped to service
+ * @retval 0 lcore is not mapped to service
* @retval -EINVAL An invalid service or lcore was provided.
*/
-int32_t rte_service_disable_on_lcore(struct rte_service_spec *service,
- uint32_t lcore);
+int32_t rte_service_map_lcore_get(uint32_t service_id, uint32_t lcore);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Return if an lcore is enabled for the service.
+ * Set the runstate of the service.
*
- * This function allows the application to query if *lcore* is currently set to
- * run *service*.
+ * Each service is either running or stopped. Setting a non-zero runstate
+ * enables the service to run, while setting runstate zero disables it.
*
- * @retval 1 Lcore enabled on this lcore
- * @retval 0 Lcore disabled on this lcore
- * @retval -EINVAL An invalid service or lcore was provided.
+ * @param id The id of the service
+ * @param runstate The run state to apply to the service
+ *
+ * @retval 0 The service was successfully started
+ * @retval -EINVAL Invalid service id
*/
-int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service,
- uint32_t lcore);
-
+int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Enable *service* to run.
- *
- * This function switches on a service during runtime.
- * @retval 0 The service was successfully started
+ * Get the runstate for the service with *id*. See *rte_service_runstate_set*
+ * for details of runstates. A service can call this function to ensure that
+ * the application has indicated that it will receive CPU cycles. Either a
+ * service-core is mapped (default case), or the application has explicitly
+ * disabled the check that a service-cores is mapped to the service and takes
+ * responsibility to run the service manually using the available function
+ * *rte_service_run_iter_on_app_lcore* to do so.
+ *
+ * @retval 1 Service is running
+ * @retval 0 Service is stopped
+ * @retval -EINVAL Invalid service id
*/
-int32_t rte_service_start(struct rte_service_spec *service);
+int32_t rte_service_runstate_get(uint32_t id);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Disable *service*.
+ * Enable or disable the check for a service-core being mapped to the service.
+ * An application can disable the check when takes the responsibility to run a
+ * service itself using *rte_service_run_iter_on_app_lcore*.
+ *
+ * @param id The id of the service to set the check on
+ * @param enable When zero, the check is disabled. Non-zero enables the check.
*
- * Switch off a service, so it is not run until it is *rte_service_start* is
- * called on it.
- * @retval 0 Service successfully switched off
+ * @retval 0 Success
+ * @retval -EINVAL Invalid service ID
*/
-int32_t rte_service_stop(struct rte_service_spec *service);
+int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enable);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Returns if *service* is currently running.
- *
- * This function returns true if the service has been started using
- * *rte_service_start*, AND a service core is mapped to the service. This
- * function can be used to ensure that the service will be run.
- *
- * @retval 1 Service is currently running, and has a service lcore mapped
- * @retval 0 Service is currently stopped, or no service lcore is mapped
- * @retval -EINVAL Invalid service pointer provided
+ * This function runs a service callback from a non-service lcore.
+ *
+ * This function is designed to enable gradual porting to service cores, and
+ * to enable unit tests to verify a service behaves as expected.
+ *
+ * When called, this function ensures that the service identified by *id* is
+ * safe to run on this lcore. Multi-thread safe services are invoked even if
+ * other cores are simultaneously running them as they are multi-thread safe.
+ *
+ * Multi-thread unsafe services are handled depending on the variable
+ * *serialize_multithread_unsafe*:
+ * - When set, the function will check if a service is already being invoked
+ * on another lcore, refusing to run it and returning -EBUSY.
+ * - When zero, the application takes responsibility to ensure that the service
+ * indicated by *id* is not going to be invoked by another lcore. This setting
+ * avoids atomic operations, so is likely to be more performant.
+ *
+ * @param id The ID of the service to run
+ * @param serialize_multithread_unsafe This parameter indicates to the service
+ * cores library if it is required to use atomics to serialize access
+ * to mult-thread unsafe services. As there is an overhead in using
+ * atomics, applications can choose to enable or disable this feature
+ *
+ * Note that any thread calling this function MUST be a DPDK EAL thread, as
+ * the *rte_lcore_id* function is used to access internal data structures.
+ *
+ * @retval 0 Service was run on the calling thread successfully
+ * @retval -EBUSY Another lcore is executing the service, and it is not a
+ * multi-thread safe service, so the service was not run on this lcore
+ * @retval -ENOEXEC Service is not in a run-able state
+ * @retval -EINVAL Invalid service id
*/
-int32_t rte_service_is_running(const struct rte_service_spec *service);
+int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
+ uint32_t serialize_multithread_unsafe);
/**
* @warning
@@ -341,13 +367,12 @@ int32_t rte_service_lcore_reset_all(void);
* Enable or disable statistics collection for *service*.
*
* This function enables per core, per-service cycle count collection.
- * @param service The service to enable statistics gathering on.
+ * @param id The service to enable statistics gathering on.
* @param enable Zero to disable statistics, non-zero to enable.
* @retval 0 Success
* @retval -EINVAL Invalid service pointer passed
*/
-int32_t rte_service_set_stats_enable(struct rte_service_spec *service,
- int32_t enable);
+int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable);
/**
* @warning
@@ -374,10 +399,26 @@ int32_t rte_service_lcore_list(uint32_t array[], uint32_t n);
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Dumps any information available about the service. If service is NULL,
- * dumps info for all services.
+ * Get the numer of services running on the supplied lcore.
+ *
+ * @param lcore Id of the service core.
+ * @retval >=0 Number of services registered to this core.
+ * @retval -EINVAL Invalid lcore provided
+ * @retval -ENOTSUP The provided lcore is not a service core.
+ */
+int32_t rte_service_lcore_count_services(uint32_t lcore);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Dumps any information available about the service. When id is UINT32_MAX,
+ * this function dumps info for all services.
+ *
+ * @retval 0 Statistics have been successfully dumped
+ * @retval -EINVAL Invalid service id provided
*/
-int32_t rte_service_dump(FILE *f, struct rte_service_spec *service);
+int32_t rte_service_dump(FILE *f, uint32_t id);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h
index 7a946a1e..ac965cb4 100644
--- a/lib/librte_eal/common/include/rte_service_component.h
+++ b/lib/librte_eal/common/include/rte_service_component.h
@@ -85,21 +85,30 @@ struct rte_service_spec {
*
* For example the eventdev SW PMD requires CPU cycles to perform its
* scheduling. This can be achieved by registering it as a service, and the
- * application can then assign CPU resources to it using
- * *rte_service_set_coremask*.
+ * application can then assign CPU resources to that service.
+ *
+ * Note that when a service component registers itself, it is not permitted to
+ * add or remove service-core threads, or modify lcore-to-service mappings. The
+ * only API that may be called by the service-component is
+ * *rte_service_component_runstate_set*, which indicates that the service
+ * component is ready to be executed.
*
* @param spec The specification of the service to register
+ * @param[out] service_id A pointer to a uint32_t, which will be filled in
+ * during registration of the service. It is set to the integers
+ * service number given to the service. This parameter may be NULL.
* @retval 0 Successfully registered the service.
* -EINVAL Attempted to register an invalid service (eg, no callback
* set)
*/
-int32_t rte_service_register(const struct rte_service_spec *spec);
+int32_t rte_service_component_register(const struct rte_service_spec *spec,
+ uint32_t *service_id);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Unregister a service.
+ * Unregister a service component.
*
* The service being removed must be stopped before calling this function.
*
@@ -107,7 +116,7 @@ int32_t rte_service_register(const struct rte_service_spec *spec);
* @retval -EBUSY The service is currently running, stop the service before
* calling unregister. No action has been taken.
*/
-int32_t rte_service_unregister(struct rte_service_spec *service);
+int32_t rte_service_component_unregister(uint32_t id);
/**
* @warning
@@ -131,6 +140,23 @@ int32_t rte_service_start_with_defaults(void);
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
+ * Set the backend runstate of a component.
+ *
+ * This function allows services to be registered at startup, but not yet
+ * enabled to run by default. When the service has been configured (via the
+ * usual method; eg rte_eventdev_configure, the service can mark itself as
+ * ready to run. The differentiation between backend runstate and
+ * service_runstate is that the backend runstate is set by the service
+ * component while the service runstate is reserved for application usage.
+ *
+ * @retval 0 Success
+ */
+int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
* Initialize the service library.
*
* In order to use the service library, it must be initialized. EAL initializes
diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h
deleted file mode 100644
index 29f5a523..00000000
--- a/lib/librte_eal/common/include/rte_vdev.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2016 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RTE_VDEV_H
-#define RTE_VDEV_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/queue.h>
-#include <rte_dev.h>
-#include <rte_devargs.h>
-
-struct rte_vdev_device {
- TAILQ_ENTRY(rte_vdev_device) next; /**< Next attached vdev */
- struct rte_device device; /**< Inherit core device */
-};
-
-/**
- * @internal
- * Helper macro for drivers that need to convert to struct rte_vdev_device.
- */
-#define RTE_DEV_TO_VDEV(ptr) \
- container_of(ptr, struct rte_vdev_device, device)
-
-static inline const char *
-rte_vdev_device_name(const struct rte_vdev_device *dev)
-{
- if (dev && dev->device.name)
- return dev->device.name;
- return NULL;
-}
-
-static inline const char *
-rte_vdev_device_args(const struct rte_vdev_device *dev)
-{
- if (dev && dev->device.devargs)
- return dev->device.devargs->args;
- return "";
-}
-
-/** Double linked list of virtual device drivers. */
-TAILQ_HEAD(vdev_driver_list, rte_vdev_driver);
-
-/**
- * Probe function called for each virtual device driver once.
- */
-typedef int (rte_vdev_probe_t)(struct rte_vdev_device *dev);
-
-/**
- * Remove function called for each virtual device driver once.
- */
-typedef int (rte_vdev_remove_t)(struct rte_vdev_device *dev);
-
-/**
- * A virtual device driver abstraction.
- */
-struct rte_vdev_driver {
- TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */
- struct rte_driver driver; /**< Inherited general driver. */
- rte_vdev_probe_t *probe; /**< Virtual device probe function. */
- rte_vdev_remove_t *remove; /**< Virtual device remove function. */
-};
-
-/**
- * Register a virtual device driver.
- *
- * @param driver
- * A pointer to a rte_vdev_driver structure describing the driver
- * to be registered.
- */
-void rte_vdev_register(struct rte_vdev_driver *driver);
-
-/**
- * Unregister a virtual device driver.
- *
- * @param driver
- * A pointer to a rte_vdev_driver structure describing the driver
- * to be unregistered.
- */
-void rte_vdev_unregister(struct rte_vdev_driver *driver);
-
-#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\
-RTE_INIT(vdrvinitfn_ ##vdrv);\
-static const char *vdrvinit_ ## nm ## _alias;\
-static void vdrvinitfn_ ##vdrv(void)\
-{\
- (vdrv).driver.name = RTE_STR(nm);\
- (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\
- rte_vdev_register(&vdrv);\
-} \
-RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
-
-#define RTE_PMD_REGISTER_ALIAS(nm, alias)\
-static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index a69a7075..d08cf48a 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -61,7 +61,7 @@ extern "C" {
/**
* Minor version/month number i.e. the mm in yy.mm.z
*/
-#define RTE_VER_MONTH 8
+#define RTE_VER_MONTH 11
/**
* Patch level number i.e. the z in yy.mm.z
@@ -71,14 +71,14 @@ extern "C" {
/**
* Extra string to be appended to version number
*/
-#define RTE_VER_SUFFIX ""
+#define RTE_VER_SUFFIX "-rc"
/**
* Patch release number
* 0-15 = release candidates
* 16 = release
*/
-#define RTE_VER_RELEASE 16
+#define RTE_VER_RELEASE 3
/**
* Macro to compute a version number usable for comparisons
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
new file mode 100644
index 00000000..a69c4ff6
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -0,0 +1,153 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 6WIND S.A. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_VFIO_H_
+#define _RTE_VFIO_H_
+
+/*
+ * determine if VFIO is present on the system
+ */
+#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO)
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
+#define VFIO_PRESENT
+#endif /* kernel version >= 3.6.0 */
+#endif /* RTE_EAL_VFIO */
+
+#ifdef VFIO_PRESENT
+
+#include <linux/vfio.h>
+
+#define VFIO_DIR "/dev/vfio"
+#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
+#define VFIO_GROUP_FMT "/dev/vfio/%u"
+#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
+#define VFIO_GET_REGION_IDX(x) (x >> 40)
+#define VFIO_NOIOMMU_MODE \
+ "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
+
+/**
+ * Setup vfio_cfg for the device identified by its address.
+ * It discovers the configured I/O MMU groups or sets a new one for the device.
+ * If a new groups is assigned, the DMA mapping is performed.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ * sysfs path prefix.
+ *
+ * @param dev_addr
+ * device location.
+ *
+ * @param vfio_dev_fd
+ * VFIO fd.
+ *
+ * @param device_info
+ * Device information.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ * >1 if the device cannot be managed this way.
+ */
+int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+ int *vfio_dev_fd, struct vfio_device_info *device_info);
+
+/**
+ * Release a device mapped to a VFIO-managed I/O MMU group.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ * sysfs path prefix.
+ *
+ * @param dev_addr
+ * device location.
+ *
+ * @param fd
+ * VFIO fd.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ */
+int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
+
+/**
+ * Enable a VFIO-related kmod.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param modname
+ * kernel module name.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ */
+int rte_vfio_enable(const char *modname);
+
+/**
+ * Check whether a VFIO-related kmod is enabled.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param modname
+ * kernel module name.
+ *
+ * @return
+ * !0 if true.
+ * 0 otherwise.
+ */
+int rte_vfio_is_enabled(const char *modname);
+
+/**
+ * Whether VFIO NOIOMMU mode is enabled.
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @return
+ * !0 if true.
+ * 0 otherwise.
+ */
+int rte_vfio_noiommu_is_enabled(void);
+
+#endif /* VFIO_PRESENT */
+
+#endif /* _RTE_VFIO_H_ */
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 15076905..889dffd2 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -275,14 +275,14 @@ malloc_elem_free(struct malloc_elem *elem)
return -1;
rte_spinlock_lock(&(elem->heap->lock));
- size_t sz = elem->size - sizeof(*elem);
+ size_t sz = elem->size - sizeof(*elem) - MALLOC_ELEM_TRAILER_LEN;
uint8_t *ptr = (uint8_t *)&elem[1];
struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
if (next->state == ELEM_FREE){
/* remove from free list, join to this one */
elem_free_list_remove(next);
join_elem(elem, next);
- sz += sizeof(*elem);
+ sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
}
/* check if previous element is free, if so join with it and return,
@@ -291,8 +291,8 @@ malloc_elem_free(struct malloc_elem *elem)
if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
elem_free_list_remove(elem->prev);
join_elem(elem->prev, elem);
- sz += sizeof(*elem);
- ptr -= sizeof(*elem);
+ sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
+ ptr -= (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
elem = elem->prev;
}
malloc_elem_free_list_insert(elem);
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index f04b2d1e..ce39129d 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -53,13 +53,13 @@ struct malloc_elem {
volatile enum elem_state state;
uint32_t pad;
size_t size;
-#ifdef RTE_LIBRTE_MALLOC_DEBUG
+#ifdef RTE_MALLOC_DEBUG
uint64_t header_cookie; /* Cookie marking start of data */
/* trailer cookie at start + size */
#endif
} __rte_cache_aligned;
-#ifndef RTE_LIBRTE_MALLOC_DEBUG
+#ifndef RTE_MALLOC_DEBUG
static const unsigned MALLOC_ELEM_TRAILER_LEN = 0;
/* dummy function - just check if pointer is non-null */
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index 5c0627bf..fe2278bc 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -246,15 +246,22 @@ rte_malloc_set_limit(__rte_unused const char *type,
}
/*
- * Return the physical address of a virtual address obtained through rte_malloc
+ * Return the IO address of a virtual address obtained through rte_malloc
*/
-phys_addr_t
-rte_malloc_virt2phy(const void *addr)
+rte_iova_t
+rte_malloc_virt2iova(const void *addr)
{
+ rte_iova_t iova;
const struct malloc_elem *elem = malloc_elem_from_data(addr);
if (elem == NULL)
- return RTE_BAD_PHYS_ADDR;
- if (elem->ms->phys_addr == RTE_BAD_PHYS_ADDR)
- return RTE_BAD_PHYS_ADDR;
- return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr);
+ return RTE_BAD_IOVA;
+ if (elem->ms->iova == RTE_BAD_IOVA)
+ return RTE_BAD_IOVA;
+
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ iova = (uintptr_t)addr;
+ else
+ iova = elem->ms->iova +
+ RTE_PTR_DIFF(addr, elem->ms->addr);
+ return iova;
}
diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 7efb76dc..09b758c9 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -54,6 +54,7 @@
#define SERVICE_F_REGISTERED (1 << 0)
#define SERVICE_F_STATS_ENABLED (1 << 1)
+#define SERVICE_F_START_CHECK (1 << 2)
/* runstates for services and lcores, denoting if they are active or not */
#define RUNSTATE_STOPPED 0
@@ -71,11 +72,12 @@ struct rte_service_spec_impl {
rte_atomic32_t execute_lock;
/* API set/get-able variables */
- int32_t runstate;
+ int8_t app_runstate;
+ int8_t comp_runstate;
uint8_t internal_flags;
/* per service statistics */
- uint32_t num_mapped_cores;
+ rte_atomic32_t num_mapped_cores;
uint64_t calls;
uint64_t cycles_spent;
} __rte_cache_aligned;
@@ -144,6 +146,13 @@ service_valid(uint32_t id)
return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED);
}
+/* validate ID and retrieve service pointer, or return error value */
+#define SERVICE_VALID_GET_OR_ERR_RET(id, service, retval) do { \
+ if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id)) \
+ return retval; \
+ service = &rte_services[id]; \
+} while (0)
+
/* returns 1 if statistics should be colleced for service
* Returns 0 if statistics should not be collected for service
*/
@@ -156,21 +165,31 @@ service_stats_enabled(struct rte_service_spec_impl *impl)
static inline int
service_mt_safe(struct rte_service_spec_impl *s)
{
- return s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE;
+ return !!(s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE);
}
-int32_t rte_service_set_stats_enable(struct rte_service_spec *service,
- int32_t enabled)
+int32_t rte_service_set_stats_enable(uint32_t id, int32_t enabled)
{
- struct rte_service_spec_impl *impl =
- (struct rte_service_spec_impl *)service;
- if (!impl)
- return -EINVAL;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
if (enabled)
- impl->internal_flags |= SERVICE_F_STATS_ENABLED;
+ s->internal_flags |= SERVICE_F_STATS_ENABLED;
else
- impl->internal_flags &= ~(SERVICE_F_STATS_ENABLED);
+ s->internal_flags &= ~(SERVICE_F_STATS_ENABLED);
+
+ return 0;
+}
+
+int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+
+ if (enabled)
+ s->internal_flags |= SERVICE_F_START_CHECK;
+ else
+ s->internal_flags &= ~(SERVICE_F_START_CHECK);
return 0;
}
@@ -181,58 +200,42 @@ rte_service_get_count(void)
return rte_service_count;
}
-struct rte_service_spec *
-rte_service_get_by_id(uint32_t id)
+int32_t rte_service_get_by_name(const char *name, uint32_t *service_id)
{
- struct rte_service_spec *service = NULL;
- if (id < rte_service_count)
- service = (struct rte_service_spec *)&rte_services[id];
-
- return service;
-}
+ if (!service_id)
+ return -EINVAL;
-struct rte_service_spec *rte_service_get_by_name(const char *name)
-{
- struct rte_service_spec *service = NULL;
int i;
for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
if (service_valid(i) &&
strcmp(name, rte_services[i].spec.name) == 0) {
- service = (struct rte_service_spec *)&rte_services[i];
- break;
+ *service_id = i;
+ return 0;
}
}
- return service;
+ return -ENODEV;
}
const char *
-rte_service_get_name(const struct rte_service_spec *service)
+rte_service_get_name(uint32_t id)
{
- return service->name;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, 0);
+ return s->spec.name;
}
int32_t
-rte_service_probe_capability(const struct rte_service_spec *service,
- uint32_t capability)
+rte_service_probe_capability(uint32_t id, uint32_t capability)
{
- return service->capabilities & capability;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ return !!(s->spec.capabilities & capability);
}
int32_t
-rte_service_is_running(const struct rte_service_spec *spec)
-{
- const struct rte_service_spec_impl *impl =
- (const struct rte_service_spec_impl *)spec;
- if (!impl)
- return -EINVAL;
-
- return (impl->runstate == RUNSTATE_RUNNING) &&
- (impl->num_mapped_cores > 0);
-}
-
-int32_t
-rte_service_register(const struct rte_service_spec *spec)
+rte_service_component_register(const struct rte_service_spec *spec,
+ uint32_t *id_ptr)
{
uint32_t i;
int32_t free_slot = -1;
@@ -252,68 +255,161 @@ rte_service_register(const struct rte_service_spec *spec)
struct rte_service_spec_impl *s = &rte_services[free_slot];
s->spec = *spec;
- s->internal_flags |= SERVICE_F_REGISTERED;
+ s->internal_flags |= SERVICE_F_REGISTERED | SERVICE_F_START_CHECK;
rte_smp_wmb();
rte_service_count++;
+ if (id_ptr)
+ *id_ptr = free_slot;
+
return 0;
}
int32_t
-rte_service_unregister(struct rte_service_spec *spec)
+rte_service_component_unregister(uint32_t id)
{
- struct rte_service_spec_impl *s = NULL;
- struct rte_service_spec_impl *spec_impl =
- (struct rte_service_spec_impl *)spec;
-
uint32_t i;
- uint32_t service_id;
- for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
- if (&rte_services[i] == spec_impl) {
- s = spec_impl;
- service_id = i;
- break;
- }
- }
-
- if (!s)
- return -EINVAL;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
rte_service_count--;
rte_smp_wmb();
s->internal_flags &= ~(SERVICE_F_REGISTERED);
+ /* clear the run-bit in all cores */
for (i = 0; i < RTE_MAX_LCORE; i++)
- lcore_states[i].service_mask &= ~(UINT64_C(1) << service_id);
+ lcore_states[i].service_mask &= ~(UINT64_C(1) << id);
- memset(&rte_services[service_id], 0,
- sizeof(struct rte_service_spec_impl));
+ memset(&rte_services[id], 0, sizeof(struct rte_service_spec_impl));
return 0;
}
int32_t
-rte_service_start(struct rte_service_spec *service)
+rte_service_component_runstate_set(uint32_t id, uint32_t runstate)
{
- struct rte_service_spec_impl *s =
- (struct rte_service_spec_impl *)service;
- s->runstate = RUNSTATE_RUNNING;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ if (runstate)
+ s->comp_runstate = RUNSTATE_RUNNING;
+ else
+ s->comp_runstate = RUNSTATE_STOPPED;
+
rte_smp_wmb();
return 0;
}
int32_t
-rte_service_stop(struct rte_service_spec *service)
+rte_service_runstate_set(uint32_t id, uint32_t runstate)
{
- struct rte_service_spec_impl *s =
- (struct rte_service_spec_impl *)service;
- s->runstate = RUNSTATE_STOPPED;
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+
+ if (runstate)
+ s->app_runstate = RUNSTATE_RUNNING;
+ else
+ s->app_runstate = RUNSTATE_STOPPED;
+
rte_smp_wmb();
return 0;
}
+int32_t
+rte_service_runstate_get(uint32_t id)
+{
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ rte_smp_rmb();
+
+ int check_disabled = !(s->internal_flags & SERVICE_F_START_CHECK);
+ int lcore_mapped = (rte_atomic32_read(&s->num_mapped_cores) > 0);
+
+ return (s->app_runstate == RUNSTATE_RUNNING) &&
+ (s->comp_runstate == RUNSTATE_RUNNING) &&
+ (check_disabled | lcore_mapped);
+}
+
+static inline void
+rte_service_runner_do_callback(struct rte_service_spec_impl *s,
+ struct core_state *cs, uint32_t service_idx)
+{
+ void *userdata = s->spec.callback_userdata;
+
+ if (service_stats_enabled(s)) {
+ uint64_t start = rte_rdtsc();
+ s->spec.callback(userdata);
+ uint64_t end = rte_rdtsc();
+ s->cycles_spent += end - start;
+ cs->calls_per_service[service_idx]++;
+ s->calls++;
+ } else
+ s->spec.callback(userdata);
+}
+
+
+static inline int32_t
+service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
+{
+ if (!service_valid(i))
+ return -EINVAL;
+ struct rte_service_spec_impl *s = &rte_services[i];
+ if (s->comp_runstate != RUNSTATE_RUNNING ||
+ s->app_runstate != RUNSTATE_RUNNING ||
+ !(service_mask & (UINT64_C(1) << i)))
+ return -ENOEXEC;
+
+ /* check do we need cmpset, if MT safe or <= 1 core
+ * mapped, atomic ops are not required.
+ */
+ const int use_atomics = (service_mt_safe(s) == 0) &&
+ (rte_atomic32_read(&s->num_mapped_cores) > 1);
+ if (use_atomics) {
+ if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1))
+ return -EBUSY;
+
+ rte_service_runner_do_callback(s, cs, i);
+ rte_atomic32_clear(&s->execute_lock);
+ } else
+ rte_service_runner_do_callback(s, cs, i);
+
+ return 0;
+}
+
+int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
+ uint32_t serialize_mt_unsafe)
+{
+ /* run service on calling core, using all-ones as the service mask */
+ if (!service_valid(id))
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[rte_lcore_id()];
+ struct rte_service_spec_impl *s = &rte_services[id];
+
+ /* Atomically add this core to the mapped cores first, then examine if
+ * we can run the service. This avoids a race condition between
+ * checking the value, and atomically adding to the mapped count.
+ */
+ if (serialize_mt_unsafe)
+ rte_atomic32_inc(&s->num_mapped_cores);
+
+ if (service_mt_safe(s) == 0 &&
+ rte_atomic32_read(&s->num_mapped_cores) > 1) {
+ if (serialize_mt_unsafe)
+ rte_atomic32_dec(&s->num_mapped_cores);
+ return -EBUSY;
+ }
+
+ int ret = service_run(id, cs, UINT64_MAX);
+
+ if (serialize_mt_unsafe)
+ rte_atomic32_dec(&s->num_mapped_cores);
+
+ return ret;
+}
+
static int32_t
rte_service_runner_func(void *arg)
{
@@ -324,35 +420,10 @@ rte_service_runner_func(void *arg)
while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) {
const uint64_t service_mask = cs->service_mask;
- for (i = 0; i < rte_service_count; i++) {
- struct rte_service_spec_impl *s = &rte_services[i];
- if (s->runstate != RUNSTATE_RUNNING ||
- !(service_mask & (UINT64_C(1) << i)))
- continue;
- /* check do we need cmpset, if MT safe or <= 1 core
- * mapped, atomic ops are not required.
- */
- const int need_cmpset = !((service_mt_safe(s) == 0) &&
- (s->num_mapped_cores > 1));
- uint32_t *lock = (uint32_t *)&s->execute_lock;
-
- if (need_cmpset || rte_atomic32_cmpset(lock, 0, 1)) {
- void *userdata = s->spec.callback_userdata;
-
- if (service_stats_enabled(s)) {
- uint64_t start = rte_rdtsc();
- s->spec.callback(userdata);
- uint64_t end = rte_rdtsc();
- s->cycles_spent += end - start;
- cs->calls_per_service[i]++;
- s->calls++;
- } else
- s->spec.callback(userdata);
-
- if (need_cmpset)
- rte_atomic32_clear(&s->execute_lock);
- }
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ /* return value ignored as no change to code flow */
+ service_run(i, cs, service_mask);
}
rte_smp_rmb();
@@ -397,6 +468,19 @@ rte_service_lcore_list(uint32_t array[], uint32_t n)
}
int32_t
+rte_service_lcore_count_services(uint32_t lcore)
+{
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ struct core_state *cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ return __builtin_popcountll(cs->service_mask);
+}
+
+int32_t
rte_service_start_with_defaults(void)
{
/* create a default mapping from cores to services, then start the
@@ -407,7 +491,7 @@ rte_service_start_with_defaults(void)
uint32_t count = rte_service_get_count();
int32_t lcore_iter = 0;
- uint32_t ids[RTE_MAX_LCORE];
+ uint32_t ids[RTE_MAX_LCORE] = {0};
int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
if (lcore_count == 0)
@@ -417,16 +501,12 @@ rte_service_start_with_defaults(void)
rte_service_lcore_start(ids[i]);
for (i = 0; i < count; i++) {
- struct rte_service_spec *s = rte_service_get_by_id(i);
- if (!s)
- return -EINVAL;
-
/* do 1:1 core mapping here, with each service getting
* assigned a single core by default. Adding multiple services
* should multiplex to a single core, or 1:1 if there are the
* same amount of services as service-cores
*/
- ret = rte_service_enable_on_lcore(s, ids[lcore_iter]);
+ ret = rte_service_map_lcore_set(i, ids[lcore_iter], 1);
if (ret)
return -ENODEV;
@@ -434,7 +514,7 @@ rte_service_start_with_defaults(void)
if (lcore_iter >= lcore_count)
lcore_iter = 0;
- ret = rte_service_start(s);
+ ret = rte_service_runstate_set(i, 1);
if (ret)
return -ENOEXEC;
}
@@ -467,43 +547,40 @@ service_update(struct rte_service_spec *service, uint32_t lcore,
if (set) {
if (*set) {
lcore_states[lcore].service_mask |= sid_mask;
- rte_services[sid].num_mapped_cores++;
+ rte_atomic32_inc(&rte_services[sid].num_mapped_cores);
} else {
lcore_states[lcore].service_mask &= ~(sid_mask);
- rte_services[sid].num_mapped_cores--;
+ rte_atomic32_dec(&rte_services[sid].num_mapped_cores);
}
}
if (enabled)
- *enabled = (lcore_states[lcore].service_mask & (sid_mask));
+ *enabled = !!(lcore_states[lcore].service_mask & (sid_mask));
rte_smp_wmb();
return 0;
}
-int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service,
- uint32_t lcore)
-{
- uint32_t enabled;
- int ret = service_update(service, lcore, 0, &enabled);
- if (ret == 0)
- return enabled;
- return -EINVAL;
-}
-
int32_t
-rte_service_enable_on_lcore(struct rte_service_spec *service, uint32_t lcore)
+rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
{
- uint32_t on = 1;
- return service_update(service, lcore, &on, 0);
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ uint32_t on = enabled > 0;
+ return service_update(&s->spec, lcore, &on, 0);
}
int32_t
-rte_service_disable_on_lcore(struct rte_service_spec *service, uint32_t lcore)
+rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
{
- uint32_t off = 0;
- return service_update(service, lcore, &off, 0);
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
+ uint32_t enabled;
+ int ret = service_update(&s->spec, lcore, 0, &enabled);
+ if (ret == 0)
+ return enabled;
+ return ret;
}
int32_t rte_service_lcore_reset_all(void)
@@ -516,7 +593,7 @@ int32_t rte_service_lcore_reset_all(void)
lcore_states[i].runstate = RUNSTATE_STOPPED;
}
for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
- rte_services[i].num_mapped_cores = 0;
+ rte_atomic32_set(&rte_services[i].num_mapped_cores, 0);
rte_smp_wmb();
@@ -552,7 +629,8 @@ rte_service_lcore_add(uint32_t lcore)
lcore_states[lcore].runstate = RUNSTATE_STOPPED;
rte_smp_wmb();
- return 0;
+
+ return rte_eal_wait_lcore(lcore);
}
int32_t
@@ -607,12 +685,12 @@ rte_service_lcore_stop(uint32_t lcore)
return -EALREADY;
uint32_t i;
+ uint64_t service_mask = lcore_states[lcore].service_mask;
for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
- int32_t enabled =
- lcore_states[i].service_mask & (UINT64_C(1) << i);
- int32_t service_running = rte_services[i].runstate !=
- RUNSTATE_STOPPED;
- int32_t only_core = rte_services[i].num_mapped_cores == 1;
+ int32_t enabled = service_mask & (UINT64_C(1) << i);
+ int32_t service_running = rte_service_runstate_get(i);
+ int32_t only_core = (1 ==
+ rte_atomic32_read(&rte_services[i].num_mapped_cores));
/* if the core is mapped, and the service is running, and this
* is the only core that is mapped, the service would cease to
@@ -667,28 +745,34 @@ service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
fprintf(f, "\n");
}
-int32_t rte_service_dump(FILE *f, struct rte_service_spec *service)
+int32_t rte_service_dump(FILE *f, uint32_t id)
{
uint32_t i;
+ int print_one = (id != UINT32_MAX);
uint64_t total_cycles = 0;
- for (i = 0; i < rte_service_count; i++) {
+
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
if (!service_valid(i))
continue;
total_cycles += rte_services[i].cycles_spent;
}
- if (service) {
- struct rte_service_spec_impl *s =
- (struct rte_service_spec_impl *)service;
+ /* print only the specified service */
+ if (print_one) {
+ struct rte_service_spec_impl *s;
+ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL);
fprintf(f, "Service %s Summary\n", s->spec.name);
uint32_t reset = 0;
rte_service_dump_one(f, s, total_cycles, reset);
return 0;
}
+ /* print all services, as UINT32_MAX was passed as id */
fprintf(f, "Services Summary\n");
- for (i = 0; i < rte_service_count; i++) {
+ for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
+ if (!service_valid(i))
+ continue;
uint32_t reset = 1;
rte_service_dump_one(f, &rte_services[i], total_cycles, reset);
}
@@ -698,7 +782,7 @@ int32_t rte_service_dump(FILE *f, struct rte_service_spec *service)
if (lcore_config[i].core_role != ROLE_SERVICE)
continue;
- uint32_t reset = 0;
+ uint32_t reset = 1;
service_dump_calls_per_lcore(f, i, reset);
}
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index 4794696b..2ebdf313 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -35,7 +35,5 @@ DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal
DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio
DIRS-$(CONFIG_RTE_KNI_KMOD) += kni
DEPDIRS-kni := eal
-DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0
-DEPDIRS-xen_dom0 := eal
include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 90bca4d6..5a7b8b2a 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -34,10 +34,10 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_eal.a
ARCH_DIR ?= $(RTE_ARCH)
-EXPORT_MAP := rte_eal_version.map
+EXPORT_MAP := ../../rte_eal_version.map
VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
-LIBABIVER := 5
+LIBABIVER := 6
VPATH += $(RTE_SDK)/lib/librte_eal/common
@@ -58,16 +58,10 @@ endif
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c
-ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c
-endif
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
@@ -80,9 +74,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_vdev.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c
@@ -104,6 +95,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c
# from arch dir
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c
SRCS-$(CONFIG_RTE_ARCH_X86) += rte_spinlock.c
+SRCS-y += rte_cycles.c
CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
@@ -116,13 +108,11 @@ CFLAGS_eal_thread.o := -D_GNU_SOURCE
CFLAGS_eal_log.o := -D_GNU_SOURCE
CFLAGS_eal_common_log.o := -D_GNU_SOURCE
CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
-CFLAGS_eal_pci.o := -D_GNU_SOURCE
-CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE
-CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE
CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
CFLAGS_eal_common_options.o := -D_GNU_SOURCE
CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE
+CFLAGS_rte_cycles.o := -D_GNU_SOURCE
# workaround for a gcc bug with noreturn attribute
# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
@@ -130,7 +120,7 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
CFLAGS_eal_thread.o += -Wno-return-type
endif
-INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
+INC := rte_kni_common.h
SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
$(addprefix include/exec-env/,$(INC))
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 48f12f44..229eec9f 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -56,7 +56,6 @@
#include <rte_common.h>
#include <rte_debug.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
@@ -71,12 +70,12 @@
#include <rte_cpuflags.h>
#include <rte_interrupts.h>
#include <rte_bus.h>
-#include <rte_pci.h>
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_version.h>
#include <rte_atomic.h>
#include <malloc_heap.h>
+#include <rte_vfio.h>
#include "eal_private.h"
#include "eal_thread.h"
@@ -121,6 +120,13 @@ struct internal_config internal_config;
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
+/* Return mbuf pool ops name */
+const char *
+rte_eal_mbuf_default_mempool_ops(void)
+{
+ return internal_config.mbuf_pool_ops_name;
+}
+
/* Return a pointer to the configuration structure */
struct rte_config *
rte_eal_get_configuration(void)
@@ -128,6 +134,12 @@ rte_eal_get_configuration(void)
return &rte_config;
}
+enum rte_iova_mode
+rte_eal_iova_mode(void)
+{
+ return rte_eal_get_configuration()->iova_mode;
+}
+
/* parse a sysfs (or other) file containing one integer value */
int
eal_parse_sysfs_value(const char *filename, unsigned long *val)
@@ -354,7 +366,6 @@ eal_usage(const char *prgname)
" --"OPT_BASE_VIRTADDR" Base virtual address\n"
" --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n"
" --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n"
- " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n"
"\n");
/* Allow the application to print its usage message too if hook is set */
if ( rte_application_usage_hook ) {
@@ -555,25 +566,12 @@ eal_parse_args(int argc, char **argv)
eal_usage(prgname);
exit(EXIT_SUCCESS);
- /* long options */
- case OPT_XEN_DOM0_NUM:
-#ifdef RTE_LIBRTE_XEN_DOM0
- internal_config.xen_dom0_support = 1;
-#else
- RTE_LOG(ERR, EAL, "Can't support DPDK app "
- "running on Dom0, please configure"
- " RTE_LIBRTE_XEN_DOM0=y\n");
- ret = -1;
- goto out;
-#endif
- break;
-
case OPT_HUGE_DIR_NUM:
- internal_config.hugepage_dir = optarg;
+ internal_config.hugepage_dir = strdup(optarg);
break;
case OPT_FILE_PREFIX_NUM:
- internal_config.hugefile_prefix = optarg;
+ internal_config.hugefile_prefix = strdup(optarg);
break;
case OPT_SOCKET_MEM_NUM:
@@ -610,6 +608,10 @@ eal_parse_args(int argc, char **argv)
internal_config.create_uio_dev = 1;
break;
+ case OPT_MBUF_POOL_OPS_NAME_NUM:
+ internal_config.mbuf_pool_ops_name = optarg;
+ break;
+
default:
if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
RTE_LOG(ERR, EAL, "Option %c is not supported "
@@ -641,15 +643,6 @@ eal_parse_args(int argc, char **argv)
goto out;
}
- /* --xen-dom0 doesn't make sense with --socket-mem */
- if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
- RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified "
- "together with --"OPT_XEN_DOM0"\n");
- eal_usage(prgname);
- ret = -1;
- goto out;
- }
-
if (optind >= 0)
argv[optind-1] = prgname;
ret = optind-1;
@@ -716,10 +709,9 @@ static int rte_eal_vfio_setup(void)
{
int vfio_enabled = 0;
- if (!internal_config.no_pci) {
- pci_vfio_enable();
- vfio_enabled |= pci_vfio_is_enabled();
- }
+ if (rte_vfio_enable("vfio"))
+ return -1;
+ vfio_enabled = rte_vfio_is_enabled("vfio");
if (vfio_enabled) {
@@ -792,9 +784,40 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ if (eal_plugins_init() < 0) {
+ rte_eal_init_alert("Cannot init plugins\n");
+ rte_errno = EINVAL;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (eal_option_device_parse()) {
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ if (rte_bus_scan()) {
+ rte_eal_init_alert("Cannot scan the buses for devices\n");
+ rte_errno = ENODEV;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
+
+ /* autodetect the iova mapping mode (default is iova_pa) */
+ rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
+
+ /* Workaround for KNI which requires physical address to work */
+ if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
+ rte_eal_check_module("rte_kni") == 1) {
+ rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
+ RTE_LOG(WARNING, EAL,
+ "Some devices want IOVA as VA but PA will be used because.. "
+ "KNI module inserted\n");
+ }
+
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
- internal_config.xen_dom0_support == 0 &&
eal_hugepage_info_init() < 0) {
rte_eal_init_alert("Cannot get hugepage information.");
rte_errno = EACCES;
@@ -873,9 +896,6 @@ rte_eal_init(int argc, char **argv)
eal_check_mem_on_local_socket();
- if (eal_plugins_init() < 0)
- rte_eal_init_alert("Cannot init plugins\n");
-
eal_thread_init_master(rte_config.master_lcore);
ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
@@ -889,17 +909,6 @@ rte_eal_init(int argc, char **argv)
return -1;
}
- if (eal_option_device_parse()) {
- rte_errno = ENODEV;
- return -1;
- }
-
- if (rte_bus_scan()) {
- rte_eal_init_alert("Cannot scan the buses for devices\n");
- rte_errno = ENODEV;
- return -1;
- }
-
RTE_LCORE_FOREACH_SLAVE(i) {
/*
@@ -983,6 +992,22 @@ int rte_eal_has_hugepages(void)
return ! internal_config.no_hugetlbfs;
}
+int rte_eal_has_pci(void)
+{
+ return !internal_config.no_pci;
+}
+
+int rte_eal_create_uio_dev(void)
+{
+ return internal_config.create_uio_dev;
+}
+
+enum rte_intr_mode
+rte_eal_vfio_intr_mode(void)
+{
+ return internal_config.vfio_intr_mode;
+}
+
int
rte_eal_check_module(const char *module_name)
{
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
index fbae4613..8e4a775b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -40,7 +40,6 @@
#include <sys/timerfd.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_interrupts.h>
#include <rte_alarm.h>
#include <rte_common.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 7a21e8f6..86e174fc 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -46,7 +46,6 @@
#include <sys/queue.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_per_lcore.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 3e9ac41e..1c20693d 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -51,7 +51,6 @@
#include <rte_common.h>
#include <rte_interrupts.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
@@ -60,7 +59,6 @@
#include <rte_branch_prediction.h>
#include <rte_debug.h>
#include <rte_log.h>
-#include <rte_pci.h>
#include <rte_malloc.h>
#include <rte_errno.h>
#include <rte_spinlock.h>
@@ -914,7 +912,7 @@ static void
eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
{
union rte_intr_read_buffer buf;
- int bytes_read = 1;
+ int bytes_read = 0;
int nbytes;
switch (intr_handle->type) {
@@ -930,11 +928,9 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
break;
#endif
case RTE_INTR_HANDLE_VDEV:
- /* for vdev, fd points to:
- * a. eventfd which does not need to read out;
- * b. datapath fd which needs PMD to read out.
- */
- return;
+ bytes_read = intr_handle->efd_counter_size;
+ /* For vdev, number of bytes to read is set by driver */
+ break;
case RTE_INTR_HANDLE_EXT:
return;
default:
@@ -947,6 +943,8 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
* read out to clear the ready-to-be-read flag
* for epoll_wait.
*/
+ if (bytes_read == 0)
+ return;
do {
nbytes = read(fd, &buf, bytes_read);
if (nbytes < 0) {
@@ -1206,7 +1204,12 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
intr_handle->nb_efd = n;
intr_handle->max_intr = NB_OTHER_INTR + n;
} else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) {
- /* do nothing, and let vdev driver to initialize this struct */
+ /* only check, initialization would be done in vdev driver.*/
+ if (intr_handle->efd_counter_size >
+ sizeof(union rte_intr_read_buffer)) {
+ RTE_LOG(ERR, EAL, "the efd_counter_size is oversized");
+ return -EINVAL;
+ }
} else {
intr_handle->efds[0] = intr_handle->fd;
intr_handle->nb_efd = RTE_MIN(nb_efd, 1U);
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
index e3a50aa3..c088bd9b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_log.c
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -39,7 +39,6 @@
#include <sys/queue.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_per_lcore.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 52791282..a54b822a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -59,7 +59,6 @@
#include <rte_log.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
@@ -75,13 +74,6 @@
#define PFN_MASK_SIZE 8
-#ifdef RTE_LIBRTE_XEN_DOM0
-int rte_xen_dom0_supported(void)
-{
- return internal_config.xen_dom0_support;
-}
-#endif
-
/**
* @file
* Huge page mapping under linux
@@ -106,10 +98,6 @@ test_phys_addrs_available(void)
uint64_t tmp;
phys_addr_t physaddr;
- /* For dom0, phys addresses can always be available */
- if (rte_xen_dom0_supported())
- return;
-
if (!rte_eal_has_hugepages()) {
RTE_LOG(ERR, EAL,
"Started without hugepages support, physical addresses not available\n");
@@ -119,10 +107,11 @@ test_phys_addrs_available(void)
physaddr = rte_mem_virt2phy(&tmp);
if (physaddr == RTE_BAD_PHYS_ADDR) {
- RTE_LOG(ERR, EAL,
- "Cannot obtain physical addresses: %s. "
- "Only vfio will function.\n",
- strerror(errno));
+ if (rte_eal_iova_mode() == RTE_IOVA_PA)
+ RTE_LOG(ERR, EAL,
+ "Cannot obtain physical addresses: %s. "
+ "Only vfio will function.\n",
+ strerror(errno));
phys_addrs_available = false;
}
}
@@ -139,32 +128,9 @@ rte_mem_virt2phy(const void *virtaddr)
int page_size;
off_t offset;
- /* when using dom0, /proc/self/pagemap always returns 0, check in
- * dpdk memory by browsing the memsegs */
- if (rte_xen_dom0_supported()) {
- struct rte_mem_config *mcfg;
- struct rte_memseg *memseg;
- unsigned i;
-
- mcfg = rte_eal_get_configuration()->mem_config;
- for (i = 0; i < RTE_MAX_MEMSEG; i++) {
- memseg = &mcfg->memseg[i];
- if (memseg->addr == NULL)
- break;
- if (virtaddr > memseg->addr &&
- virtaddr < RTE_PTR_ADD(memseg->addr,
- memseg->len)) {
- return memseg->phys_addr +
- RTE_PTR_DIFF(virtaddr, memseg->addr);
- }
- }
-
- return RTE_BAD_PHYS_ADDR;
- }
-
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
if (!phys_addrs_available)
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
/* standard page size */
page_size = getpagesize();
@@ -173,7 +139,7 @@ rte_mem_virt2phy(const void *virtaddr)
if (fd < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
__func__, strerror(errno));
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
}
virt_pfn = (unsigned long)virtaddr / page_size;
@@ -182,7 +148,7 @@ rte_mem_virt2phy(const void *virtaddr)
RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
__func__, strerror(errno));
close(fd);
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
}
retval = read(fd, &page, PFN_MASK_SIZE);
@@ -190,12 +156,12 @@ rte_mem_virt2phy(const void *virtaddr)
if (retval < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
__func__, strerror(errno));
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
} else if (retval != PFN_MASK_SIZE) {
RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap "
"but expected %d:\n",
__func__, retval, PFN_MASK_SIZE);
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
}
/*
@@ -203,7 +169,7 @@ rte_mem_virt2phy(const void *virtaddr)
* pagemap.txt in linux Documentation)
*/
if ((page & 0x7fffffffffffffULL) == 0)
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ ((unsigned long)virtaddr % page_size);
@@ -211,6 +177,14 @@ rte_mem_virt2phy(const void *virtaddr)
return physaddr;
}
+rte_iova_t
+rte_mem_virt2iova(const void *virtaddr)
+{
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ return (uintptr_t)virtaddr;
+ return rte_mem_virt2phy(virtaddr);
+}
+
/*
* For each hugepage in hugepg_tbl, fill the physaddr value. We find
* it by browsing the /proc/self/pagemap special file.
@@ -716,6 +690,8 @@ create_shared_memory(const char *filename, const size_t mem_size)
}
retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
close(fd);
+ if (retval == MAP_FAILED)
+ return NULL;
return retval;
}
@@ -1059,7 +1035,10 @@ rte_eal_hugepage_init(void)
strerror(errno));
return -1;
}
- mcfg->memseg[0].phys_addr = RTE_BAD_PHYS_ADDR;
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ mcfg->memseg[0].iova = (uintptr_t)addr;
+ else
+ mcfg->memseg[0].iova = RTE_BAD_IOVA;
mcfg->memseg[0].addr = addr;
mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
mcfg->memseg[0].len = internal_config.memory;
@@ -1067,17 +1046,6 @@ rte_eal_hugepage_init(void)
return 0;
}
-/* check if app runs on Xen Dom0 */
- if (internal_config.xen_dom0_support) {
-#ifdef RTE_LIBRTE_XEN_DOM0
- /* use dom0_mm kernel driver to init memory */
- if (rte_xen_dom0_memory_init() < 0)
- return -1;
- else
- return 0;
-#endif
- }
-
/* calculate total number of hugepages available. at this point we haven't
* yet started sorting them so they all are on socket 0 */
for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
@@ -1319,7 +1287,7 @@ rte_eal_hugepage_init(void)
if (j == RTE_MAX_MEMSEG)
break;
- mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+ mcfg->memseg[j].iova = hugepage[i].physaddr;
mcfg->memseg[j].addr = hugepage[i].final_va;
mcfg->memseg[j].len = hugepage[i].size;
mcfg->memseg[j].socket_id = hugepage[i].socket_id;
@@ -1330,7 +1298,7 @@ rte_eal_hugepage_init(void)
#ifdef RTE_ARCH_PPC_64
/* Use the phy and virt address of the last page as segment
* address for IBM Power architecture */
- mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+ mcfg->memseg[j].iova = hugepage[i].physaddr;
mcfg->memseg[j].addr = hugepage[i].final_va;
#endif
mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz;
@@ -1400,17 +1368,6 @@ rte_eal_hugepage_attach(void)
test_phys_addrs_available();
- if (internal_config.xen_dom0_support) {
-#ifdef RTE_LIBRTE_XEN_DOM0
- if (rte_xen_dom0_memory_attach() < 0) {
- RTE_LOG(ERR, EAL, "Failed to attach memory segments of primary "
- "process\n");
- return -1;
- }
- return 0;
-#endif
- }
-
fd_zero = open("/dev/zero", O_RDONLY);
if (fd_zero < 0) {
RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
@@ -1542,7 +1499,7 @@ error:
return -1;
}
-bool
+int
rte_eal_using_phys_addrs(void)
{
return phys_addrs_available;
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
deleted file mode 100644
index 8951ce74..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ /dev/null
@@ -1,722 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include <dirent.h>
-
-#include <rte_log.h>
-#include <rte_bus.h>
-#include <rte_pci.h>
-#include <rte_eal_memconfig.h>
-#include <rte_malloc.h>
-#include <rte_devargs.h>
-#include <rte_memcpy.h>
-
-#include "eal_filesystem.h"
-#include "eal_private.h"
-#include "eal_pci_init.h"
-
-/**
- * @file
- * PCI probing under linux
- *
- * This code is used to simulate a PCI probe by parsing information in sysfs.
- * When a registered device matches a driver, it is then initialized with
- * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it).
- */
-
-extern struct rte_pci_bus rte_pci_bus;
-
-static int
-pci_get_kernel_driver_by_path(const char *filename, char *dri_name)
-{
- int count;
- char path[PATH_MAX];
- char *name;
-
- if (!filename || !dri_name)
- return -1;
-
- count = readlink(filename, path, PATH_MAX);
- if (count >= PATH_MAX)
- return -1;
-
- /* For device does not have a driver */
- if (count < 0)
- return 1;
-
- path[count] = '\0';
-
- name = strrchr(path, '/');
- if (name) {
- strncpy(dri_name, name + 1, strlen(name + 1) + 1);
- return 0;
- }
-
- return -1;
-}
-
-/* Map pci device */
-int
-rte_pci_map_device(struct rte_pci_device *dev)
-{
- int ret = -1;
-
- /* try mapping the NIC resources using VFIO if it exists */
- switch (dev->kdrv) {
- case RTE_KDRV_VFIO:
-#ifdef VFIO_PRESENT
- if (pci_vfio_is_enabled())
- ret = pci_vfio_map_resource(dev);
-#endif
- break;
- case RTE_KDRV_IGB_UIO:
- case RTE_KDRV_UIO_GENERIC:
- if (rte_eal_using_phys_addrs()) {
- /* map resources for devices that use uio */
- ret = pci_uio_map_resource(dev);
- }
- break;
- default:
- RTE_LOG(DEBUG, EAL,
- " Not managed by a supported kernel driver, skipped\n");
- ret = 1;
- break;
- }
-
- return ret;
-}
-
-/* Unmap pci device */
-void
-rte_pci_unmap_device(struct rte_pci_device *dev)
-{
- /* try unmapping the NIC resources using VFIO if it exists */
- switch (dev->kdrv) {
- case RTE_KDRV_VFIO:
-#ifdef VFIO_PRESENT
- if (pci_vfio_is_enabled())
- pci_vfio_unmap_resource(dev);
-#endif
- break;
- case RTE_KDRV_IGB_UIO:
- case RTE_KDRV_UIO_GENERIC:
- /* unmap resources for devices that use uio */
- pci_uio_unmap_resource(dev);
- break;
- default:
- RTE_LOG(DEBUG, EAL,
- " Not managed by a supported kernel driver, skipped\n");
- break;
- }
-}
-
-void *
-pci_find_max_end_va(void)
-{
- const struct rte_memseg *seg = rte_eal_get_physmem_layout();
- const struct rte_memseg *last = seg;
- unsigned i = 0;
-
- for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
- if (seg->addr == NULL)
- break;
-
- if (seg->addr > last->addr)
- last = seg;
-
- }
- return RTE_PTR_ADD(last->addr, last->len);
-}
-
-/* parse one line of the "resource" sysfs file (note that the 'line'
- * string is modified)
- */
-int
-pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr,
- uint64_t *end_addr, uint64_t *flags)
-{
- union pci_resource_info {
- struct {
- char *phys_addr;
- char *end_addr;
- char *flags;
- };
- char *ptrs[PCI_RESOURCE_FMT_NVAL];
- } res_info;
-
- if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) {
- RTE_LOG(ERR, EAL,
- "%s(): bad resource format\n", __func__);
- return -1;
- }
- errno = 0;
- *phys_addr = strtoull(res_info.phys_addr, NULL, 16);
- *end_addr = strtoull(res_info.end_addr, NULL, 16);
- *flags = strtoull(res_info.flags, NULL, 16);
- if (errno != 0) {
- RTE_LOG(ERR, EAL,
- "%s(): bad resource format\n", __func__);
- return -1;
- }
-
- return 0;
-}
-
-/* parse the "resource" sysfs file */
-static int
-pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
-{
- FILE *f;
- char buf[BUFSIZ];
- int i;
- uint64_t phys_addr, end_addr, flags;
-
- f = fopen(filename, "r");
- if (f == NULL) {
- RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n");
- return -1;
- }
-
- for (i = 0; i<PCI_MAX_RESOURCE; i++) {
-
- if (fgets(buf, sizeof(buf), f) == NULL) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot read resource\n", __func__);
- goto error;
- }
- if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
- &end_addr, &flags) < 0)
- goto error;
-
- if (flags & IORESOURCE_MEM) {
- dev->mem_resource[i].phys_addr = phys_addr;
- dev->mem_resource[i].len = end_addr - phys_addr + 1;
- /* not mapped for now */
- dev->mem_resource[i].addr = NULL;
- }
- }
- fclose(f);
- return 0;
-
-error:
- fclose(f);
- return -1;
-}
-
-/* Scan one pci sysfs entry, and fill the devices list from it. */
-static int
-pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
-{
- char filename[PATH_MAX];
- unsigned long tmp;
- struct rte_pci_device *dev;
- char driver[PATH_MAX];
- int ret;
-
- dev = malloc(sizeof(*dev));
- if (dev == NULL)
- return -1;
-
- memset(dev, 0, sizeof(*dev));
- dev->addr = *addr;
-
- /* get vendor id */
- snprintf(filename, sizeof(filename), "%s/vendor", dirname);
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- dev->id.vendor_id = (uint16_t)tmp;
-
- /* get device id */
- snprintf(filename, sizeof(filename), "%s/device", dirname);
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- dev->id.device_id = (uint16_t)tmp;
-
- /* get subsystem_vendor id */
- snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
- dirname);
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- dev->id.subsystem_vendor_id = (uint16_t)tmp;
-
- /* get subsystem_device id */
- snprintf(filename, sizeof(filename), "%s/subsystem_device",
- dirname);
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- dev->id.subsystem_device_id = (uint16_t)tmp;
-
- /* get class_id */
- snprintf(filename, sizeof(filename), "%s/class",
- dirname);
- if (eal_parse_sysfs_value(filename, &tmp) < 0) {
- free(dev);
- return -1;
- }
- /* the least 24 bits are valid: class, subclass, program interface */
- dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID;
-
- /* get max_vfs */
- dev->max_vfs = 0;
- snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
- if (!access(filename, F_OK) &&
- eal_parse_sysfs_value(filename, &tmp) == 0)
- dev->max_vfs = (uint16_t)tmp;
- else {
- /* for non igb_uio driver, need kernel version >= 3.8 */
- snprintf(filename, sizeof(filename),
- "%s/sriov_numvfs", dirname);
- if (!access(filename, F_OK) &&
- eal_parse_sysfs_value(filename, &tmp) == 0)
- dev->max_vfs = (uint16_t)tmp;
- }
-
- /* get numa node, default to 0 if not present */
- snprintf(filename, sizeof(filename), "%s/numa_node",
- dirname);
-
- if (access(filename, F_OK) != -1) {
- if (eal_parse_sysfs_value(filename, &tmp) == 0)
- dev->device.numa_node = tmp;
- else
- dev->device.numa_node = -1;
- } else {
- dev->device.numa_node = 0;
- }
-
- pci_name_set(dev);
-
- /* parse resources */
- snprintf(filename, sizeof(filename), "%s/resource", dirname);
- if (pci_parse_sysfs_resource(filename, dev) < 0) {
- RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__);
- free(dev);
- return -1;
- }
-
- /* parse driver */
- snprintf(filename, sizeof(filename), "%s/driver", dirname);
- ret = pci_get_kernel_driver_by_path(filename, driver);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "Fail to get kernel driver\n");
- free(dev);
- return -1;
- }
-
- if (!ret) {
- if (!strcmp(driver, "vfio-pci"))
- dev->kdrv = RTE_KDRV_VFIO;
- else if (!strcmp(driver, "igb_uio"))
- dev->kdrv = RTE_KDRV_IGB_UIO;
- else if (!strcmp(driver, "uio_pci_generic"))
- dev->kdrv = RTE_KDRV_UIO_GENERIC;
- else
- dev->kdrv = RTE_KDRV_UNKNOWN;
- } else
- dev->kdrv = RTE_KDRV_NONE;
-
- /* device is valid, add in list (sorted) */
- if (TAILQ_EMPTY(&rte_pci_bus.device_list)) {
- rte_pci_add_device(dev);
- } else {
- struct rte_pci_device *dev2;
- int ret;
-
- TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) {
- ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
- if (ret > 0)
- continue;
-
- if (ret < 0) {
- rte_pci_insert_device(dev2, dev);
- } else { /* already registered */
- dev2->kdrv = dev->kdrv;
- dev2->max_vfs = dev->max_vfs;
- pci_name_set(dev2);
- memmove(dev2->mem_resource, dev->mem_resource,
- sizeof(dev->mem_resource));
- free(dev);
- }
- return 0;
- }
-
- rte_pci_add_device(dev);
- }
-
- return 0;
-}
-
-int
-pci_update_device(const struct rte_pci_addr *addr)
-{
- char filename[PATH_MAX];
-
- snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT,
- pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
- addr->function);
-
- return pci_scan_one(filename, addr);
-}
-
-/*
- * split up a pci address into its constituent parts.
- */
-static int
-parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr)
-{
- /* first split on ':' */
- union splitaddr {
- struct {
- char *domain;
- char *bus;
- char *devid;
- char *function;
- };
- char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */
- } splitaddr;
-
- char *buf_copy = strndup(buf, bufsize);
- if (buf_copy == NULL)
- return -1;
-
- if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':')
- != PCI_FMT_NVAL - 1)
- goto error;
- /* final split is on '.' between devid and function */
- splitaddr.function = strchr(splitaddr.devid,'.');
- if (splitaddr.function == NULL)
- goto error;
- *splitaddr.function++ = '\0';
-
- /* now convert to int values */
- errno = 0;
- addr->domain = strtoul(splitaddr.domain, NULL, 16);
- addr->bus = strtoul(splitaddr.bus, NULL, 16);
- addr->devid = strtoul(splitaddr.devid, NULL, 16);
- addr->function = strtoul(splitaddr.function, NULL, 10);
- if (errno != 0)
- goto error;
-
- free(buf_copy); /* free the copy made with strdup */
- return 0;
-error:
- free(buf_copy);
- return -1;
-}
-
-/*
- * Scan the content of the PCI bus, and the devices in the devices
- * list
- */
-int
-rte_pci_scan(void)
-{
- struct dirent *e;
- DIR *dir;
- char dirname[PATH_MAX];
- struct rte_pci_addr addr;
-
- /* for debug purposes, PCI can be disabled */
- if (internal_config.no_pci)
- return 0;
-
- dir = opendir(pci_get_sysfs_path());
- if (dir == NULL) {
- RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
- __func__, strerror(errno));
- return -1;
- }
-
- while ((e = readdir(dir)) != NULL) {
- if (e->d_name[0] == '.')
- continue;
-
- if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0)
- continue;
-
- snprintf(dirname, sizeof(dirname), "%s/%s",
- pci_get_sysfs_path(), e->d_name);
-
- if (pci_scan_one(dirname, &addr) < 0)
- goto error;
- }
- closedir(dir);
- return 0;
-
-error:
- closedir(dir);
- return -1;
-}
-
-/* Read PCI config space. */
-int rte_pci_read_config(const struct rte_pci_device *device,
- void *buf, size_t len, off_t offset)
-{
- const struct rte_intr_handle *intr_handle = &device->intr_handle;
-
- switch (intr_handle->type) {
- case RTE_INTR_HANDLE_UIO:
- case RTE_INTR_HANDLE_UIO_INTX:
- return pci_uio_read_config(intr_handle, buf, len, offset);
-
-#ifdef VFIO_PRESENT
- case RTE_INTR_HANDLE_VFIO_MSIX:
- case RTE_INTR_HANDLE_VFIO_MSI:
- case RTE_INTR_HANDLE_VFIO_LEGACY:
- return pci_vfio_read_config(intr_handle, buf, len, offset);
-#endif
- default:
- RTE_LOG(ERR, EAL,
- "Unknown handle type of fd %d\n",
- intr_handle->fd);
- return -1;
- }
-}
-
-/* Write PCI config space. */
-int rte_pci_write_config(const struct rte_pci_device *device,
- const void *buf, size_t len, off_t offset)
-{
- const struct rte_intr_handle *intr_handle = &device->intr_handle;
-
- switch (intr_handle->type) {
- case RTE_INTR_HANDLE_UIO:
- case RTE_INTR_HANDLE_UIO_INTX:
- return pci_uio_write_config(intr_handle, buf, len, offset);
-
-#ifdef VFIO_PRESENT
- case RTE_INTR_HANDLE_VFIO_MSIX:
- case RTE_INTR_HANDLE_VFIO_MSI:
- case RTE_INTR_HANDLE_VFIO_LEGACY:
- return pci_vfio_write_config(intr_handle, buf, len, offset);
-#endif
- default:
- RTE_LOG(ERR, EAL,
- "Unknown handle type of fd %d\n",
- intr_handle->fd);
- return -1;
- }
-}
-
-#if defined(RTE_ARCH_X86)
-static int
-pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
- struct rte_pci_ioport *p)
-{
- uint16_t start, end;
- FILE *fp;
- char *line = NULL;
- char pci_id[16];
- int found = 0;
- size_t linesz;
-
- snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
- dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
-
- fp = fopen("/proc/ioports", "r");
- if (fp == NULL) {
- RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
- return -1;
- }
-
- while (getdelim(&line, &linesz, '\n', fp) > 0) {
- char *ptr = line;
- char *left;
- int n;
-
- n = strcspn(ptr, ":");
- ptr[n] = 0;
- left = &ptr[n + 1];
-
- while (*left && isspace(*left))
- left++;
-
- if (!strncmp(left, pci_id, strlen(pci_id))) {
- found = 1;
-
- while (*ptr && isspace(*ptr))
- ptr++;
-
- sscanf(ptr, "%04hx-%04hx", &start, &end);
-
- break;
- }
- }
-
- free(line);
- fclose(fp);
-
- if (!found)
- return -1;
-
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
- p->base = start;
- RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
-
- return 0;
-}
-#endif
-
-int
-rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
-{
- int ret = -1;
-
- switch (dev->kdrv) {
-#ifdef VFIO_PRESENT
- case RTE_KDRV_VFIO:
- if (pci_vfio_is_enabled())
- ret = pci_vfio_ioport_map(dev, bar, p);
- break;
-#endif
- case RTE_KDRV_IGB_UIO:
- ret = pci_uio_ioport_map(dev, bar, p);
- break;
- case RTE_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
- ret = pci_ioport_map(dev, bar, p);
-#else
- ret = pci_uio_ioport_map(dev, bar, p);
-#endif
- break;
- case RTE_KDRV_NONE:
-#if defined(RTE_ARCH_X86)
- ret = pci_ioport_map(dev, bar, p);
-#endif
- break;
- default:
- break;
- }
-
- if (!ret)
- p->dev = dev;
-
- return ret;
-}
-
-void
-rte_pci_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
-{
- switch (p->dev->kdrv) {
-#ifdef VFIO_PRESENT
- case RTE_KDRV_VFIO:
- pci_vfio_ioport_read(p, data, len, offset);
- break;
-#endif
- case RTE_KDRV_IGB_UIO:
- pci_uio_ioport_read(p, data, len, offset);
- break;
- case RTE_KDRV_UIO_GENERIC:
- pci_uio_ioport_read(p, data, len, offset);
- break;
- case RTE_KDRV_NONE:
-#if defined(RTE_ARCH_X86)
- pci_uio_ioport_read(p, data, len, offset);
-#endif
- break;
- default:
- break;
- }
-}
-
-void
-rte_pci_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
-{
- switch (p->dev->kdrv) {
-#ifdef VFIO_PRESENT
- case RTE_KDRV_VFIO:
- pci_vfio_ioport_write(p, data, len, offset);
- break;
-#endif
- case RTE_KDRV_IGB_UIO:
- pci_uio_ioport_write(p, data, len, offset);
- break;
- case RTE_KDRV_UIO_GENERIC:
- pci_uio_ioport_write(p, data, len, offset);
- break;
- case RTE_KDRV_NONE:
-#if defined(RTE_ARCH_X86)
- pci_uio_ioport_write(p, data, len, offset);
-#endif
- break;
- default:
- break;
- }
-}
-
-int
-rte_pci_ioport_unmap(struct rte_pci_ioport *p)
-{
- int ret = -1;
-
- switch (p->dev->kdrv) {
-#ifdef VFIO_PRESENT
- case RTE_KDRV_VFIO:
- if (pci_vfio_is_enabled())
- ret = pci_vfio_ioport_unmap(p);
- break;
-#endif
- case RTE_KDRV_IGB_UIO:
- ret = pci_uio_ioport_unmap(p);
- break;
- case RTE_KDRV_UIO_GENERIC:
-#if defined(RTE_ARCH_X86)
- ret = 0;
-#else
- ret = pci_uio_ioport_unmap(p);
-#endif
- break;
- case RTE_KDRV_NONE:
-#if defined(RTE_ARCH_X86)
- ret = 0;
-#endif
- break;
- default:
- break;
- }
-
- return ret;
-}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
deleted file mode 100644
index ae2980d6..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef EAL_PCI_INIT_H_
-#define EAL_PCI_INIT_H_
-
-#include "eal_vfio.h"
-
-/** IO resource type: */
-#define IORESOURCE_IO 0x00000100
-#define IORESOURCE_MEM 0x00000200
-
-/*
- * Helper function to map PCI resources right after hugepages in virtual memory
- */
-extern void *pci_map_addr;
-void *pci_find_max_end_va(void);
-
-/* parse one line of the "resource" sysfs file (note that the 'line'
- * string is modified)
- */
-int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr,
- uint64_t *end_addr, uint64_t *flags);
-
-int pci_uio_alloc_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource **uio_res);
-void pci_uio_free_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource *uio_res);
-int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
- struct mapped_pci_resource *uio_res, int map_idx);
-
-int pci_uio_read_config(const struct rte_intr_handle *intr_handle,
- void *buf, size_t len, off_t offs);
-int pci_uio_write_config(const struct rte_intr_handle *intr_handle,
- const void *buf, size_t len, off_t offs);
-
-int pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p);
-void pci_uio_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset);
-void pci_uio_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset);
-int pci_uio_ioport_unmap(struct rte_pci_ioport *p);
-
-#ifdef VFIO_PRESENT
-
-/* access config space */
-int pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
- void *buf, size_t len, off_t offs);
-int pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
- const void *buf, size_t len, off_t offs);
-
-int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p);
-void pci_vfio_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset);
-void pci_vfio_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset);
-int pci_vfio_ioport_unmap(struct rte_pci_ioport *p);
-
-/* map/unmap VFIO resource prototype */
-int pci_vfio_map_resource(struct rte_pci_device *dev);
-int pci_vfio_unmap_resource(struct rte_pci_device *dev);
-
-#endif
-
-#endif /* EAL_PCI_INIT_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
deleted file mode 100644
index fa10329f..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <dirent.h>
-#include <inttypes.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/sysmacros.h>
-#include <linux/pci_regs.h>
-
-#if defined(RTE_ARCH_X86)
-#include <sys/io.h>
-#endif
-
-#include <rte_log.h>
-#include <rte_pci.h>
-#include <rte_eal_memconfig.h>
-#include <rte_common.h>
-#include <rte_malloc.h>
-
-#include "eal_filesystem.h"
-#include "eal_pci_init.h"
-
-void *pci_map_addr = NULL;
-
-#define OFF_MAX ((uint64_t)(off_t)-1)
-
-int
-pci_uio_read_config(const struct rte_intr_handle *intr_handle,
- void *buf, size_t len, off_t offset)
-{
- return pread(intr_handle->uio_cfg_fd, buf, len, offset);
-}
-
-int
-pci_uio_write_config(const struct rte_intr_handle *intr_handle,
- const void *buf, size_t len, off_t offset)
-{
- return pwrite(intr_handle->uio_cfg_fd, buf, len, offset);
-}
-
-static int
-pci_uio_set_bus_master(int dev_fd)
-{
- uint16_t reg;
- int ret;
-
- ret = pread(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL,
- "Cannot read command from PCI config space!\n");
- return -1;
- }
-
- /* return if bus mastering is already on */
- if (reg & PCI_COMMAND_MASTER)
- return 0;
-
- reg |= PCI_COMMAND_MASTER;
-
- ret = pwrite(dev_fd, &reg, sizeof(reg), PCI_COMMAND);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL,
- "Cannot write command to PCI config space!\n");
- return -1;
- }
-
- return 0;
-}
-
-static int
-pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num)
-{
- FILE *f;
- char filename[PATH_MAX];
- int ret;
- unsigned major, minor;
- dev_t dev;
-
- /* get the name of the sysfs file that contains the major and minor
- * of the uio device and read its content */
- snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path);
-
- f = fopen(filename, "r");
- if (f == NULL) {
- RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n",
- __func__);
- return -1;
- }
-
- ret = fscanf(f, "%u:%u", &major, &minor);
- if (ret != 2) {
- RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n",
- __func__);
- fclose(f);
- return -1;
- }
- fclose(f);
-
- /* create the char device "mknod /dev/uioX c major minor" */
- snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
- dev = makedev(major, minor);
- ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev);
- if (ret != 0) {
- RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n",
- __func__, strerror(errno));
- return -1;
- }
-
- return ret;
-}
-
-/*
- * Return the uioX char device used for a pci device. On success, return
- * the UIO number and fill dstbuf string with the path of the device in
- * sysfs. On error, return a negative value. In this case dstbuf is
- * invalid.
- */
-static int
-pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf,
- unsigned int buflen, int create)
-{
- struct rte_pci_addr *loc = &dev->addr;
- unsigned int uio_num;
- struct dirent *e;
- DIR *dir;
- char dirname[PATH_MAX];
-
- /* depending on kernel version, uio can be located in uio/uioX
- * or uio:uioX */
-
- snprintf(dirname, sizeof(dirname),
- "%s/" PCI_PRI_FMT "/uio", pci_get_sysfs_path(),
- loc->domain, loc->bus, loc->devid, loc->function);
-
- dir = opendir(dirname);
- if (dir == NULL) {
- /* retry with the parent directory */
- snprintf(dirname, sizeof(dirname),
- "%s/" PCI_PRI_FMT, pci_get_sysfs_path(),
- loc->domain, loc->bus, loc->devid, loc->function);
- dir = opendir(dirname);
-
- if (dir == NULL) {
- RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
- return -1;
- }
- }
-
- /* take the first file starting with "uio" */
- while ((e = readdir(dir)) != NULL) {
- /* format could be uio%d ...*/
- int shortprefix_len = sizeof("uio") - 1;
- /* ... or uio:uio%d */
- int longprefix_len = sizeof("uio:uio") - 1;
- char *endptr;
-
- if (strncmp(e->d_name, "uio", 3) != 0)
- continue;
-
- /* first try uio%d */
- errno = 0;
- uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
- if (errno == 0 && endptr != (e->d_name + shortprefix_len)) {
- snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
- break;
- }
-
- /* then try uio:uio%d */
- errno = 0;
- uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
- if (errno == 0 && endptr != (e->d_name + longprefix_len)) {
- snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num);
- break;
- }
- }
- closedir(dir);
-
- /* No uio resource found */
- if (e == NULL)
- return -1;
-
- /* create uio device if we've been asked to */
- if (internal_config.create_uio_dev && create &&
- pci_mknod_uio_dev(dstbuf, uio_num) < 0)
- RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num);
-
- return uio_num;
-}
-
-void
-pci_uio_free_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource *uio_res)
-{
- rte_free(uio_res);
-
- if (dev->intr_handle.uio_cfg_fd >= 0) {
- close(dev->intr_handle.uio_cfg_fd);
- dev->intr_handle.uio_cfg_fd = -1;
- }
- if (dev->intr_handle.fd >= 0) {
- close(dev->intr_handle.fd);
- dev->intr_handle.fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
- }
-}
-
-int
-pci_uio_alloc_resource(struct rte_pci_device *dev,
- struct mapped_pci_resource **uio_res)
-{
- char dirname[PATH_MAX];
- char cfgname[PATH_MAX];
- char devname[PATH_MAX]; /* contains the /dev/uioX */
- int uio_num;
- struct rte_pci_addr *loc;
-
- loc = &dev->addr;
-
- /* find uio resource */
- uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1);
- if (uio_num < 0) {
- RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, "
- "skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
- return 1;
- }
- snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
-
- /* save fd if in primary process */
- dev->intr_handle.fd = open(devname, O_RDWR);
- if (dev->intr_handle.fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- devname, strerror(errno));
- goto error;
- }
-
- snprintf(cfgname, sizeof(cfgname),
- "/sys/class/uio/uio%u/device/config", uio_num);
- dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
- if (dev->intr_handle.uio_cfg_fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- cfgname, strerror(errno));
- goto error;
- }
-
- if (dev->kdrv == RTE_KDRV_IGB_UIO)
- dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
- else {
- dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
-
- /* set bus master that is not done by uio_pci_generic */
- if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) {
- RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
- goto error;
- }
- }
-
- /* allocate the mapping details for secondary processes*/
- *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
- if (*uio_res == NULL) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot store uio mmap details\n", __func__);
- goto error;
- }
-
- snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname);
- memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
-
- return 0;
-
-error:
- pci_uio_free_resource(dev, *uio_res);
- return -1;
-}
-
-int
-pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
- struct mapped_pci_resource *uio_res, int map_idx)
-{
- int fd;
- char devname[PATH_MAX];
- void *mapaddr;
- struct rte_pci_addr *loc;
- struct pci_map *maps;
-
- loc = &dev->addr;
- maps = uio_res->maps;
-
- /* update devname for mmap */
- snprintf(devname, sizeof(devname),
- "%s/" PCI_PRI_FMT "/resource%d",
- pci_get_sysfs_path(),
- loc->domain, loc->bus, loc->devid,
- loc->function, res_idx);
-
- /* allocate memory to keep path */
- maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
- if (maps[map_idx].path == NULL) {
- RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
- strerror(errno));
- return -1;
- }
-
- /*
- * open resource file, to mmap it
- */
- fd = open(devname, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- devname, strerror(errno));
- goto error;
- }
-
- /* try mapping somewhere close to the end of hugepages */
- if (pci_map_addr == NULL)
- pci_map_addr = pci_find_max_end_va();
-
- mapaddr = pci_map_resource(pci_map_addr, fd, 0,
- (size_t)dev->mem_resource[res_idx].len, 0);
- close(fd);
- if (mapaddr == MAP_FAILED)
- goto error;
-
- pci_map_addr = RTE_PTR_ADD(mapaddr,
- (size_t)dev->mem_resource[res_idx].len);
-
- maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
- maps[map_idx].size = dev->mem_resource[res_idx].len;
- maps[map_idx].addr = mapaddr;
- maps[map_idx].offset = 0;
- strcpy(maps[map_idx].path, devname);
- dev->mem_resource[res_idx].addr = mapaddr;
-
- return 0;
-
-error:
- rte_free(maps[map_idx].path);
- return -1;
-}
-
-#if defined(RTE_ARCH_X86)
-int
-pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
-{
- char dirname[PATH_MAX];
- char filename[PATH_MAX];
- int uio_num;
- unsigned long start;
-
- uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
- if (uio_num < 0)
- return -1;
-
- /* get portio start */
- snprintf(filename, sizeof(filename),
- "%s/portio/port%d/start", dirname, bar);
- if (eal_parse_sysfs_value(filename, &start) < 0) {
- RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
- __func__);
- return -1;
- }
- /* ensure we don't get anything funny here, read/write will cast to
- * uin16_t */
- if (start > UINT16_MAX)
- return -1;
-
- /* FIXME only for primary process ? */
- if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
-
- snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
- dev->intr_handle.fd = open(filename, O_RDWR);
- if (dev->intr_handle.fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
- filename, strerror(errno));
- return -1;
- }
- dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
- }
-
- RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
-
- p->base = start;
- p->len = 0;
- return 0;
-}
-#else
-int
-pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
-{
- FILE *f;
- char buf[BUFSIZ];
- char filename[PATH_MAX];
- uint64_t phys_addr, end_addr, flags;
- int fd, i;
- void *addr;
-
- /* open and read addresses of the corresponding resource in sysfs */
- snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
- pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- f = fopen(filename, "r");
- if (f == NULL) {
- RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
- strerror(errno));
- return -1;
- }
- for (i = 0; i < bar + 1; i++) {
- if (fgets(buf, sizeof(buf), f) == NULL) {
- RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
- goto error;
- }
- }
- if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
- &end_addr, &flags) < 0)
- goto error;
- if ((flags & IORESOURCE_IO) == 0) {
- RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar);
- goto error;
- }
- snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d",
- pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function, bar);
-
- /* mmap the pci resource */
- fd = open(filename, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
- strerror(errno));
- goto error;
- }
- addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE,
- MAP_SHARED, fd, 0);
- close(fd);
- if (addr == MAP_FAILED) {
- RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n",
- strerror(errno));
- goto error;
- }
-
- /* strangely, the base address is mmap addr + phys_addr */
- p->base = (uintptr_t)addr + phys_addr;
- p->len = end_addr + 1;
- RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base);
- fclose(f);
-
- return 0;
-
-error:
- fclose(f);
- return -1;
-}
-#endif
-
-void
-pci_uio_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
-{
- uint8_t *d;
- int size;
- uintptr_t reg = p->base + offset;
-
- for (d = data; len > 0; d += size, reg += size, len -= size) {
- if (len >= 4) {
- size = 4;
-#if defined(RTE_ARCH_X86)
- *(uint32_t *)d = inl(reg);
-#else
- *(uint32_t *)d = *(volatile uint32_t *)reg;
-#endif
- } else if (len >= 2) {
- size = 2;
-#if defined(RTE_ARCH_X86)
- *(uint16_t *)d = inw(reg);
-#else
- *(uint16_t *)d = *(volatile uint16_t *)reg;
-#endif
- } else {
- size = 1;
-#if defined(RTE_ARCH_X86)
- *d = inb(reg);
-#else
- *d = *(volatile uint8_t *)reg;
-#endif
- }
- }
-}
-
-void
-pci_uio_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
-{
- const uint8_t *s;
- int size;
- uintptr_t reg = p->base + offset;
-
- for (s = data; len > 0; s += size, reg += size, len -= size) {
- if (len >= 4) {
- size = 4;
-#if defined(RTE_ARCH_X86)
- outl_p(*(const uint32_t *)s, reg);
-#else
- *(volatile uint32_t *)reg = *(const uint32_t *)s;
-#endif
- } else if (len >= 2) {
- size = 2;
-#if defined(RTE_ARCH_X86)
- outw_p(*(const uint16_t *)s, reg);
-#else
- *(volatile uint16_t *)reg = *(const uint16_t *)s;
-#endif
- } else {
- size = 1;
-#if defined(RTE_ARCH_X86)
- outb_p(*s, reg);
-#else
- *(volatile uint8_t *)reg = *s;
-#endif
- }
- }
-}
-
-int
-pci_uio_ioport_unmap(struct rte_pci_ioport *p)
-{
-#if defined(RTE_ARCH_X86)
- RTE_SET_USED(p);
- /* FIXME close intr fd ? */
- return 0;
-#else
- return munmap((void *)(uintptr_t)p->base, p->len);
-#endif
-}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
deleted file mode 100644
index aa9d96ed..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ /dev/null
@@ -1,674 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <string.h>
-#include <fcntl.h>
-#include <linux/pci_regs.h>
-#include <sys/eventfd.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <stdbool.h>
-
-#include <rte_log.h>
-#include <rte_pci.h>
-#include <rte_eal_memconfig.h>
-#include <rte_malloc.h>
-
-#include "eal_filesystem.h"
-#include "eal_pci_init.h"
-#include "eal_vfio.h"
-#include "eal_private.h"
-
-/**
- * @file
- * PCI probing under linux (VFIO version)
- *
- * This code tries to determine if the PCI device is bound to VFIO driver,
- * and initialize it (map BARs, set up interrupts) if that's the case.
- *
- * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y".
- */
-
-#ifdef VFIO_PRESENT
-
-#define PAGE_SIZE (sysconf(_SC_PAGESIZE))
-#define PAGE_MASK (~(PAGE_SIZE - 1))
-
-static struct rte_tailq_elem rte_vfio_tailq = {
- .name = "VFIO_RESOURCE_LIST",
-};
-EAL_REGISTER_TAILQ(rte_vfio_tailq)
-
-int
-pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
- void *buf, size_t len, off_t offs)
-{
- return pread64(intr_handle->vfio_dev_fd, buf, len,
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
-}
-
-int
-pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
- const void *buf, size_t len, off_t offs)
-{
- return pwrite64(intr_handle->vfio_dev_fd, buf, len,
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs);
-}
-
-/* get PCI BAR number where MSI-X interrupts are */
-static int
-pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
- uint32_t *msix_table_size)
-{
- int ret;
- uint32_t reg;
- uint16_t flags;
- uint8_t cap_id, cap_offset;
-
- /* read PCI capability pointer from config space */
- ret = pread64(fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- PCI_CAPABILITY_LIST);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
- "config space!\n");
- return -1;
- }
-
- /* we need first byte */
- cap_offset = reg & 0xFF;
-
- while (cap_offset) {
-
- /* read PCI capability ID */
- ret = pread64(fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI "
- "config space!\n");
- return -1;
- }
-
- /* we need first byte */
- cap_id = reg & 0xFF;
-
- /* if we haven't reached MSI-X, check next capability */
- if (cap_id != PCI_CAP_ID_MSIX) {
- ret = pread64(fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI "
- "config space!\n");
- return -1;
- }
-
- /* we need second byte */
- cap_offset = (reg & 0xFF00) >> 8;
-
- continue;
- }
- /* else, read table offset */
- else {
- /* table offset resides in the next 4 bytes */
- ret = pread64(fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset + 4);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config "
- "space!\n");
- return -1;
- }
-
- ret = pread64(fd, &flags, sizeof(flags),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- cap_offset + 2);
- if (ret != sizeof(flags)) {
- RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config "
- "space!\n");
- return -1;
- }
-
- *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
- *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
- *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
-
- return 0;
- }
- }
- return 0;
-}
-
-/* set PCI bus mastering */
-static int
-pci_vfio_set_bus_master(int dev_fd, bool op)
-{
- uint16_t reg;
- int ret;
-
- ret = pread64(dev_fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- PCI_COMMAND);
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n");
- return -1;
- }
-
- if (op)
- /* set the master bit */
- reg |= PCI_COMMAND_MASTER;
- else
- reg &= ~(PCI_COMMAND_MASTER);
-
- ret = pwrite64(dev_fd, &reg, sizeof(reg),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
- PCI_COMMAND);
-
- if (ret != sizeof(reg)) {
- RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n");
- return -1;
- }
-
- return 0;
-}
-
-/* set up interrupt support (but not enable interrupts) */
-static int
-pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
-{
- int i, ret, intr_idx;
-
- /* default to invalid index */
- intr_idx = VFIO_PCI_NUM_IRQS;
-
- /* get interrupt type from internal config (MSI-X by default, can be
- * overridden from the command line
- */
- switch (internal_config.vfio_intr_mode) {
- case RTE_INTR_MODE_MSIX:
- intr_idx = VFIO_PCI_MSIX_IRQ_INDEX;
- break;
- case RTE_INTR_MODE_MSI:
- intr_idx = VFIO_PCI_MSI_IRQ_INDEX;
- break;
- case RTE_INTR_MODE_LEGACY:
- intr_idx = VFIO_PCI_INTX_IRQ_INDEX;
- break;
- /* don't do anything if we want to automatically determine interrupt type */
- case RTE_INTR_MODE_NONE:
- break;
- default:
- RTE_LOG(ERR, EAL, " unknown default interrupt type!\n");
- return -1;
- }
-
- /* start from MSI-X interrupt type */
- for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) {
- struct vfio_irq_info irq = { .argsz = sizeof(irq) };
- int fd = -1;
-
- /* skip interrupt modes we don't want */
- if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE &&
- i != intr_idx)
- continue;
-
- irq.index = i;
-
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, " cannot get IRQ info, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
- }
-
- /* if this vector cannot be used with eventfd, fail if we explicitly
- * specified interrupt type, otherwise continue */
- if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) {
- if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) {
- RTE_LOG(ERR, EAL,
- " interrupt vector does not support eventfd!\n");
- return -1;
- } else
- continue;
- }
-
- /* set up an eventfd for interrupts */
- fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, " cannot set up eventfd, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
- }
-
- dev->intr_handle.fd = fd;
- dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
-
- switch (i) {
- case VFIO_PCI_MSIX_IRQ_INDEX:
- internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX;
- dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX;
- break;
- case VFIO_PCI_MSI_IRQ_INDEX:
- internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI;
- dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI;
- break;
- case VFIO_PCI_INTX_IRQ_INDEX:
- internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY;
- dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY;
- break;
- default:
- RTE_LOG(ERR, EAL, " unknown interrupt type!\n");
- return -1;
- }
-
- return 0;
- }
-
- /* if we're here, we haven't found a suitable interrupt vector */
- return -1;
-}
-
-/*
- * map the PCI resources of a PCI device in virtual memory (VFIO version).
- * primary and secondary processes follow almost exactly the same path
- */
-int
-pci_vfio_map_resource(struct rte_pci_device *dev)
-{
- struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
- char pci_addr[PATH_MAX] = {0};
- int vfio_dev_fd;
- struct rte_pci_addr *loc = &dev->addr;
- int i, ret, msix_bar;
- struct mapped_pci_resource *vfio_res = NULL;
- struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
-
- struct pci_map *maps;
- uint32_t msix_table_offset = 0;
- uint32_t msix_table_size = 0;
- uint32_t ioport_bar;
-
- dev->intr_handle.fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
-
- /* store PCI address string */
- snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
- loc->domain, loc->bus, loc->devid, loc->function);
-
- if ((ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
- &vfio_dev_fd, &device_info)))
- return ret;
-
- /* get MSI-X BAR, if any (we have to know where it is because we can't
- * easily mmap it when using VFIO) */
- msix_bar = -1;
- ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar,
- &msix_table_offset, &msix_table_size);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr);
- close(vfio_dev_fd);
- return -1;
- }
-
- /* if we're in a primary process, allocate vfio_res and get region info */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0);
- if (vfio_res == NULL) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot store uio mmap details\n", __func__);
- close(vfio_dev_fd);
- return -1;
- }
- memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
-
- /* get number of registers (up to BAR5) */
- vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,
- VFIO_PCI_BAR5_REGION_INDEX + 1);
- } else {
- /* if we're in a secondary process, just find our tailq entry */
- TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (rte_eal_compare_pci_addr(&vfio_res->pci_addr,
- &dev->addr))
- continue;
- break;
- }
- /* if we haven't found our tailq entry, something's wrong */
- if (vfio_res == NULL) {
- RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
- pci_addr);
- close(vfio_dev_fd);
- return -1;
- }
- }
-
- /* map BARs */
- maps = vfio_res->maps;
-
- for (i = 0; i < (int) vfio_res->nb_maps; i++) {
- struct vfio_region_info reg = { .argsz = sizeof(reg) };
- void *bar_addr;
- struct memreg {
- unsigned long offset, size;
- } memreg[2] = {};
-
- reg.index = i;
-
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
-
- if (ret) {
- RTE_LOG(ERR, EAL, " %s cannot get device region info "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
- close(vfio_dev_fd);
- if (internal_config.process_type == RTE_PROC_PRIMARY)
- rte_free(vfio_res);
- return -1;
- }
-
- /* chk for io port region */
- ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
- + PCI_BASE_ADDRESS_0 + i*4);
-
- if (ret != sizeof(ioport_bar)) {
- RTE_LOG(ERR, EAL,
- "Cannot read command (%x) from config space!\n",
- PCI_BASE_ADDRESS_0 + i*4);
- return -1;
- }
-
- if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) {
- RTE_LOG(INFO, EAL,
- "Ignore mapping IO port bar(%d) addr: %x\n",
- i, ioport_bar);
- continue;
- }
-
- /* skip non-mmapable BARs */
- if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
- continue;
-
- if (i == msix_bar) {
- /*
- * VFIO will not let us map the MSI-X table,
- * but we can map around it.
- */
- uint32_t table_start = msix_table_offset;
- uint32_t table_end = table_start + msix_table_size;
- table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
- table_start &= PAGE_MASK;
-
- if (table_start == 0 && table_end >= reg.size) {
- /* Cannot map this BAR */
- RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i);
- continue;
- } else {
- memreg[0].offset = reg.offset;
- memreg[0].size = table_start;
- memreg[1].offset = reg.offset + table_end;
- memreg[1].size = reg.size - table_end;
-
- RTE_LOG(DEBUG, EAL,
- "Trying to map BAR %d that contains the MSI-X "
- "table. Trying offsets: "
- "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i,
- memreg[0].offset, memreg[0].size,
- memreg[1].offset, memreg[1].size);
- }
- } else {
- memreg[0].offset = reg.offset;
- memreg[0].size = reg.size;
- }
-
- /* try to figure out an address */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- /* try mapping somewhere close to the end of hugepages */
- if (pci_map_addr == NULL)
- pci_map_addr = pci_find_max_end_va();
-
- bar_addr = pci_map_addr;
- pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
- } else {
- bar_addr = maps[i].addr;
- }
-
- /* reserve the address using an inaccessible mapping */
- bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
- MAP_ANONYMOUS, -1, 0);
- if (bar_addr != MAP_FAILED) {
- void *map_addr = NULL;
- if (memreg[0].size) {
- /* actual map of first part */
- map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
- memreg[0].offset,
- memreg[0].size,
- MAP_FIXED);
- }
-
- /* if there's a second part, try to map it */
- if (map_addr != MAP_FAILED
- && memreg[1].offset && memreg[1].size) {
- void *second_addr = RTE_PTR_ADD(bar_addr,
- memreg[1].offset -
- (uintptr_t)reg.offset);
- map_addr = pci_map_resource(second_addr,
- vfio_dev_fd, memreg[1].offset,
- memreg[1].size,
- MAP_FIXED);
- }
-
- if (map_addr == MAP_FAILED || !map_addr) {
- munmap(bar_addr, reg.size);
- bar_addr = MAP_FAILED;
- }
- }
-
- if (bar_addr == MAP_FAILED ||
- (internal_config.process_type == RTE_PROC_SECONDARY &&
- bar_addr != maps[i].addr)) {
- RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i,
- strerror(errno));
- close(vfio_dev_fd);
- if (internal_config.process_type == RTE_PROC_PRIMARY)
- rte_free(vfio_res);
- return -1;
- }
-
- maps[i].addr = bar_addr;
- maps[i].offset = reg.offset;
- maps[i].size = reg.size;
- maps[i].path = NULL; /* vfio doesn't have per-resource paths */
- dev->mem_resource[i].addr = bar_addr;
- }
-
- /* if secondary process, do not set up interrupts */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
- RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr);
- close(vfio_dev_fd);
- rte_free(vfio_res);
- return -1;
- }
-
- /* set bus mastering for the device */
- if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
- RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr);
- close(vfio_dev_fd);
- rte_free(vfio_res);
- return -1;
- }
-
- /* Reset the device */
- ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
- }
-
- if (internal_config.process_type == RTE_PROC_PRIMARY)
- TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
-
- return 0;
-}
-
-int
-pci_vfio_unmap_resource(struct rte_pci_device *dev)
-{
- char pci_addr[PATH_MAX] = {0};
- struct rte_pci_addr *loc = &dev->addr;
- int i, ret;
- struct mapped_pci_resource *vfio_res = NULL;
- struct mapped_pci_res_list *vfio_res_list;
-
- struct pci_map *maps;
-
- /* store PCI address string */
- snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
- loc->domain, loc->bus, loc->devid, loc->function);
-
-
- if (close(dev->intr_handle.fd) < 0) {
- RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
- pci_addr);
- return -1;
- }
-
- if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) {
- RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n",
- pci_addr);
- return -1;
- }
-
- ret = vfio_release_device(pci_get_sysfs_path(), pci_addr,
- dev->intr_handle.vfio_dev_fd);
- if (ret < 0) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot release device\n", __func__);
- return ret;
- }
-
- vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
- /* Get vfio_res */
- TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
- continue;
- break;
- }
- /* if we haven't found our tailq entry, something's wrong */
- if (vfio_res == NULL) {
- RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
- pci_addr);
- return -1;
- }
-
- /* unmap BARs */
- maps = vfio_res->maps;
-
- RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",
- pci_addr);
- for (i = 0; i < (int) vfio_res->nb_maps; i++) {
-
- /*
- * We do not need to be aware of MSI-X table BAR mappings as
- * when mapping. Just using current maps array is enough
- */
- if (maps[i].addr) {
- RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",
- pci_addr, maps[i].addr);
- pci_unmap_resource(maps[i].addr, maps[i].size);
- }
- }
-
- TAILQ_REMOVE(vfio_res_list, vfio_res, next);
-
- return 0;
-}
-
-int
-pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
-{
- if (bar < VFIO_PCI_BAR0_REGION_INDEX ||
- bar > VFIO_PCI_BAR5_REGION_INDEX) {
- RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar);
- return -1;
- }
-
- p->dev = dev;
- p->base = VFIO_GET_REGION_ADDR(bar);
- return 0;
-}
-
-void
-pci_vfio_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
-{
- const struct rte_intr_handle *intr_handle = &p->dev->intr_handle;
-
- if (pread64(intr_handle->vfio_dev_fd, data,
- len, p->base + offset) <= 0)
- RTE_LOG(ERR, EAL,
- "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n",
- VFIO_GET_REGION_IDX(p->base), (int)offset);
-}
-
-void
-pci_vfio_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
-{
- const struct rte_intr_handle *intr_handle = &p->dev->intr_handle;
-
- if (pwrite64(intr_handle->vfio_dev_fd, data,
- len, p->base + offset) <= 0)
- RTE_LOG(ERR, EAL,
- "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n",
- VFIO_GET_REGION_IDX(p->base), (int)offset);
-}
-
-int
-pci_vfio_ioport_unmap(struct rte_pci_ioport *p)
-{
- RTE_SET_USED(p);
- return -1;
-}
-
-int
-pci_vfio_enable(void)
-{
- return vfio_enable("vfio_pci");
-}
-
-int
-pci_vfio_is_enabled(void)
-{
- return vfio_is_enabled("vfio_pci");
-}
-#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
index 6481eeea..e9a579e4 100644
--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -46,7 +46,6 @@
#include <rte_launch.h>
#include <rte_log.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_per_lcore.h>
#include <rte_eal.h>
#include <rte_lcore.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c
index afa32f5c..24349dab 100644
--- a/lib/librte_eal/linuxapp/eal/eal_timer.c
+++ b/lib/librte_eal/linuxapp/eal/eal_timer.c
@@ -49,7 +49,6 @@
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_eal.h>
#include <rte_debug.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 946df7e3..58f0123e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -39,6 +39,7 @@
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_eal_memconfig.h>
+#include <rte_vfio.h>
#include "eal_filesystem.h"
#include "eal_vfio.h"
@@ -68,8 +69,8 @@ vfio_get_group_fd(int iommu_group_no)
{
int i;
int vfio_group_fd;
- int group_idx = -1;
char filename[PATH_MAX];
+ struct vfio_group *cur_grp;
/* check if we already have the group descriptor open */
for (i = 0; i < VFIO_MAX_GROUPS; i++)
@@ -85,12 +86,12 @@ vfio_get_group_fd(int iommu_group_no)
/* Now lets get an index for the new group */
for (i = 0; i < VFIO_MAX_GROUPS; i++)
if (vfio_cfg.vfio_groups[i].group_no == -1) {
- group_idx = i;
+ cur_grp = &vfio_cfg.vfio_groups[i];
break;
}
/* This should not happen */
- if (group_idx == -1) {
+ if (i == VFIO_MAX_GROUPS) {
RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
return -1;
}
@@ -123,8 +124,8 @@ vfio_get_group_fd(int iommu_group_no)
/* noiommu group found */
}
- vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no;
- vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd;
+ cur_grp->group_no = iommu_group_no;
+ cur_grp->fd = vfio_group_fd;
vfio_cfg.vfio_active_groups++;
return vfio_group_fd;
}
@@ -157,9 +158,12 @@ vfio_get_group_fd(int iommu_group_no)
return 0;
case SOCKET_OK:
vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
- /* if we got the fd, return it */
+ /* if we got the fd, store it and return it */
if (vfio_group_fd > 0) {
close(socket_fd);
+ cur_grp->group_no = iommu_group_no;
+ cur_grp->fd = vfio_group_fd;
+ vfio_cfg.vfio_active_groups++;
return vfio_group_fd;
}
/* fall-through on error */
@@ -280,7 +284,7 @@ clear_group(int vfio_group_fd)
}
int
-vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
int *vfio_dev_fd, struct vfio_device_info *device_info)
{
struct vfio_group_status group_status = {
@@ -412,7 +416,7 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr,
}
int
-vfio_release_device(const char *sysfs_base, const char *dev_addr,
+rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
int vfio_dev_fd)
{
struct vfio_group_status group_status = {
@@ -474,7 +478,7 @@ vfio_release_device(const char *sysfs_base, const char *dev_addr,
}
int
-vfio_enable(const char *modname)
+rte_vfio_enable(const char *modname)
{
/* initialize group list */
int i;
@@ -489,7 +493,7 @@ vfio_enable(const char *modname)
/* inform the user that we are probing for VFIO */
RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
- /* check if vfio-pci module is loaded */
+ /* check if vfio module is loaded */
vfio_available = rte_eal_check_module(modname);
/* return error directly */
@@ -519,7 +523,7 @@ vfio_enable(const char *modname)
}
int
-vfio_is_enabled(const char *modname)
+rte_vfio_is_enabled(const char *modname)
{
const int mod_available = rte_eal_check_module(modname);
return vfio_cfg.vfio_enabled && mod_available;
@@ -706,7 +710,10 @@ vfio_type1_dma_map(int vfio_container_fd)
dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
dma_map.vaddr = ms[i].addr_64;
dma_map.size = ms[i].len;
- dma_map.iova = ms[i].phys_addr;
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ dma_map.iova = dma_map.vaddr;
+ else
+ dma_map.iova = ms[i].iova;
dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
@@ -759,10 +766,19 @@ vfio_spapr_dma_map(int vfio_container_fd)
return -1;
}
- /* calculate window size based on number of hugepages configured */
- create.window_size = rte_eal_get_physmem_size();
+ /* create DMA window from 0 to max(phys_addr + len) */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ if (ms[i].addr == NULL)
+ break;
+
+ create.window_size = RTE_MAX(create.window_size,
+ ms[i].iova + ms[i].len);
+ }
+
+ /* sPAPR requires window size to be a power of 2 */
+ create.window_size = rte_align64pow2(create.window_size);
create.page_shift = __builtin_ctzll(ms->hugepage_sz);
- create.levels = 2;
+ create.levels = 1;
ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
if (ret) {
@@ -771,6 +787,11 @@ vfio_spapr_dma_map(int vfio_container_fd)
return -1;
}
+ if (create.start_addr != 0) {
+ RTE_LOG(ERR, EAL, " DMA window start address != 0\n");
+ return -1;
+ }
+
/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
for (i = 0; i < RTE_MAX_MEMSEG; i++) {
struct vfio_iommu_type1_dma_map dma_map;
@@ -792,7 +813,10 @@ vfio_spapr_dma_map(int vfio_container_fd)
dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
dma_map.vaddr = ms[i].addr_64;
dma_map.size = ms[i].len;
- dma_map.iova = ms[i].phys_addr;
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ dma_map.iova = dma_map.vaddr;
+ else
+ dma_map.iova = ms[i].iova;
dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
VFIO_DMA_MAP_FLAG_WRITE;
@@ -816,4 +840,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
return 0;
}
+int
+rte_vfio_noiommu_is_enabled(void)
+{
+ int fd, ret, cnt __rte_unused;
+ char c;
+
+ ret = -1;
+ fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ cnt = read(fd, &c, 1);
+ if (c == 'Y')
+ ret = 1;
+
+ close(fd);
+ return ret;
+}
+
#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 5ff63e5d..ba7892b7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -37,20 +37,18 @@
/*
* determine if VFIO is present on the system
*/
-#ifdef RTE_EAL_VFIO
+#if !defined(VFIO_PRESENT) && defined(RTE_EAL_VFIO)
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
-#include <linux/vfio.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
-#define RTE_PCI_MSIX_TABLE_BIR 0x7
-#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8
-#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff
+#define VFIO_PRESENT
#else
-#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR
-#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET
-#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE
-#endif
+#pragma message("VFIO configured but not supported by this kernel, disabling.")
+#endif /* kernel version >= 3.6.0 */
+#endif /* RTE_EAL_VFIO */
+
+#ifdef VFIO_PRESENT
+
+#include <linux/vfio.h>
#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU
@@ -144,13 +142,6 @@ struct vfio_config {
struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
};
-#define VFIO_DIR "/dev/vfio"
-#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
-#define VFIO_GROUP_FMT "/dev/vfio/%u"
-#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
-#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
-#define VFIO_GET_REGION_IDX(x) (x >> 40)
-
/* DMA mapping function prototype.
* Takes VFIO container fd as a parameter.
* Returns 0 on success, -1 on error.
@@ -190,24 +181,6 @@ vfio_get_group_fd(int iommu_group_no);
int
clear_group(int vfio_group_fd);
-/**
- * Setup vfio_cfg for the device identified by its address. It discovers
- * the configured I/O MMU groups or sets a new one for the device. If a new
- * groups is assigned, the DMA mapping is performed.
- * Returns 0 on success, a negative value on failure and a positive value in
- * case the given device cannot be managed this way.
- */
-int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
- int *vfio_dev_fd, struct vfio_device_info *device_info);
-
-int vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
-
-int vfio_enable(const char *modname);
-int vfio_is_enabled(const char *modname);
-
-int pci_vfio_enable(void);
-int pci_vfio_is_enabled(void);
-
int vfio_mp_sync_setup(void);
#define SOCKET_REQ_CONTAINER 0x100
@@ -217,8 +190,6 @@ int vfio_mp_sync_setup(void);
#define SOCKET_NO_FD 0x1
#define SOCKET_ERR 0xFF
-#define VFIO_PRESENT
-#endif /* kernel version */
-#endif /* RTE_EAL_VFIO */
+#endif /* VFIO_PRESENT */
#endif /* EAL_VFIO_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index 7e8095cb..b53ed7eb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -49,12 +49,12 @@
#endif
#include <rte_log.h>
-#include <rte_pci.h>
#include <rte_eal_memconfig.h>
#include <rte_malloc.h>
+#include <rte_vfio.h>
#include "eal_filesystem.h"
-#include "eal_pci_init.h"
+#include "eal_vfio.h"
#include "eal_thread.h"
/**
@@ -301,7 +301,8 @@ vfio_mp_sync_thread(void __rte_unused * arg)
vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
else
vfio_mp_sync_send_fd(conn_sock, fd);
- close(fd);
+ if (fd >= 0)
+ close(fd);
break;
case SOCKET_REQ_GROUP:
/* wait for group number */
diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
deleted file mode 100644
index 19db1cb5..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
+++ /dev/null
@@ -1,381 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <errno.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/queue.h>
-#include <sys/file.h>
-#include <unistd.h>
-#include <limits.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_launch.h>
-#include <rte_eal.h>
-#include <rte_eal_memconfig.h>
-#include <rte_per_lcore.h>
-#include <rte_lcore.h>
-#include <rte_common.h>
-#include <rte_string_fns.h>
-
-#include "eal_private.h"
-#include "eal_internal_cfg.h"
-#include "eal_filesystem.h"
-#include <exec-env/rte_dom0_common.h>
-
-#define PAGE_SIZE RTE_PGSIZE_4K
-#define DEFAUL_DOM0_NAME "dom0-mem"
-
-static int xen_fd = -1;
-static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB";
-
-/*
- * Try to mmap *size bytes in /dev/zero. If it is successful, return the
- * pointer to the mmap'd area and keep *size unmodified. Else, retry
- * with a smaller zone: decrease *size by mem_size until it reaches
- * 0. In this case, return NULL. Note: this function returns an address
- * which is a multiple of mem_size size.
- */
-static void *
-xen_get_virtual_area(size_t *size, size_t mem_size)
-{
- void *addr;
- int fd;
- long aligned_addr;
-
- RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size);
-
- fd = open("/dev/zero", O_RDONLY);
- if (fd < 0){
- RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
- return NULL;
- }
- do {
- addr = mmap(NULL, (*size) + mem_size, PROT_READ,
- MAP_PRIVATE, fd, 0);
- if (addr == MAP_FAILED)
- *size -= mem_size;
- } while (addr == MAP_FAILED && *size > 0);
-
- if (addr == MAP_FAILED) {
- close(fd);
- RTE_LOG(ERR, EAL, "Cannot get a virtual area\n");
- return NULL;
- }
-
- munmap(addr, (*size) + mem_size);
- close(fd);
-
- /* align addr to a mem_size boundary */
- aligned_addr = (uintptr_t)addr;
- aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size);
- addr = (void *)(aligned_addr);
-
- RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
- addr, *size);
-
- return addr;
-}
-
-/**
- * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm
- * /memsize-mB/memsize file, and the size unit is mB.
- */
-static int
-get_xen_memory_size(void)
-{
- char path[PATH_MAX];
- unsigned long mem_size = 0;
- static const char *file_name;
-
- file_name = "memsize";
- snprintf(path, sizeof(path), "%s/%s",
- sys_dir_path, file_name);
-
- if (eal_parse_sysfs_value(path, &mem_size) < 0)
- return -1;
-
- if (mem_size == 0)
- rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not"
- " configured.\n",sys_dir_path, file_name);
- if (mem_size % 2)
- rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be"
- " even number.\n",sys_dir_path, file_name);
-
- if (mem_size > DOM0_CONFIG_MEMSIZE)
- rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger"
- " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE);
-
- return mem_size;
-}
-
-/**
- * Based on physical address to caculate MFN in Xen Dom0.
- */
-phys_addr_t
-rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr)
-{
- int mfn_id, i;
- uint64_t mfn, mfn_offset;
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- struct rte_memseg *memseg = mcfg->memseg;
-
- /* find the memory segment owning the physical address */
- if (memseg_id == -1) {
- for (i = 0; i < RTE_MAX_MEMSEG; i++) {
- if ((phy_addr >= memseg[i].phys_addr) &&
- (phy_addr < memseg[i].phys_addr +
- memseg[i].len)) {
- memseg_id = i;
- break;
- }
- }
- if (memseg_id == -1)
- return RTE_BAD_PHYS_ADDR;
- }
-
- mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M;
-
- /*the MFN is contiguous in 2M */
- mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) %
- RTE_PGSIZE_2M / PAGE_SIZE;
- mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id];
-
- /** return mechine address */
- return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE;
-}
-
-int
-rte_xen_dom0_memory_init(void)
-{
- void *vir_addr, *vma_addr = NULL;
- int err, ret = 0;
- uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0;
- size_t vma_len = 0;
- struct memory_info meminfo;
- struct memseg_info seginfo[RTE_MAX_MEMSEG];
- int flags, page_size = getpagesize();
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- struct rte_memseg *memseg = mcfg->memseg;
- uint64_t total_mem = internal_config.memory;
-
- memset(seginfo, 0, sizeof(seginfo));
- memset(&meminfo, 0, sizeof(struct memory_info));
-
- mem_size = get_xen_memory_size();
- requested = (unsigned) (total_mem / 0x100000);
- if (requested > mem_size)
- /* if we didn't satisfy total memory requirements */
- rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB,"
- " available: %uMB\n", requested, mem_size);
- else if (total_mem != 0)
- mem_size = requested;
-
- /* Check FD and open once */
- if (xen_fd < 0) {
- xen_fd = open(DOM0_MM_DEV, O_RDWR);
- if (xen_fd < 0) {
- RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
- return -1;
- }
- }
-
- meminfo.size = mem_size;
-
- /* construct memory mangement name for Dom0 */
- snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s",
- internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
-
- /* Notify kernel driver to allocate memory */
- ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n");
- err = -EIO;
- goto fail;
- }
-
- /* Get number of memory segment from driver */
- ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n");
- err = -EIO;
- goto fail;
- }
-
- if(num_memseg > RTE_MAX_MEMSEG){
- RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater"
- " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG);
- err = -EIO;
- goto fail;
- }
-
- /* get all memory segements information */
- ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n");
- err = -EIO;
- goto fail;
- }
-
- /* map all memory segments to contiguous user space */
- for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++)
- {
- vma_len = seginfo[memseg_idx].size;
-
- /**
- * get the biggest virtual memory area up to vma_len. If it fails,
- * vma_addr is NULL, so let the kernel provide the address.
- */
- vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M);
- if (vma_addr == NULL) {
- flags = MAP_SHARED;
- vma_len = RTE_PGSIZE_2M;
- } else
- flags = MAP_SHARED | MAP_FIXED;
-
- seginfo[memseg_idx].size = vma_len;
- vir_addr = mmap(vma_addr, seginfo[memseg_idx].size,
- PROT_READ|PROT_WRITE, flags, xen_fd,
- memseg_idx * page_size);
- if (vir_addr == MAP_FAILED) {
- RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n",
- DOM0_MM_DEV);
- err = -EIO;
- goto fail;
- }
-
- memseg[memseg_idx].addr = vir_addr;
- memseg[memseg_idx].phys_addr = page_size *
- seginfo[memseg_idx].pfn ;
- memseg[memseg_idx].len = seginfo[memseg_idx].size;
- for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++)
- memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i];
-
- /* MFNs are continuous in 2M, so assume that page size is 2M */
- memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M;
-
- memseg[memseg_idx].nchannel = mcfg->nchannel;
- memseg[memseg_idx].nrank = mcfg->nrank;
-
- /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/
- memseg[memseg_idx].socket_id = 0;
- }
-
- return 0;
-fail:
- if (xen_fd > 0) {
- close(xen_fd);
- xen_fd = -1;
- }
- return err;
-}
-
-/*
- * This creates the memory mappings in the secondary process to match that of
- * the server process. It goes through each memory segment in the DPDK runtime
- * configuration, mapping them in order to form a contiguous block in the
- * virtual memory space
- */
-int
-rte_xen_dom0_memory_attach(void)
-{
- const struct rte_mem_config *mcfg;
- unsigned s = 0; /* s used to track the segment number */
- int xen_fd = -1;
- int ret = -1;
- void *vir_addr;
- char name[DOM0_NAME_MAX] = {0};
- int page_size = getpagesize();
-
- mcfg = rte_eal_get_configuration()->mem_config;
-
- /* Check FD and open once */
- if (xen_fd < 0) {
- xen_fd = open(DOM0_MM_DEV, O_RDWR);
- if (xen_fd < 0) {
- RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
- goto error;
- }
- }
-
- /* construct memory mangement name for Dom0 */
- snprintf(name, DOM0_NAME_MAX, "%s-%s",
- internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
- /* attach to memory segments of primary process */
- ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name);
- if (ret) {
- RTE_LOG(ERR, EAL,"attach memory segments fail.\n");
- goto error;
- }
-
- /* map all segments into memory to make sure we get the addrs */
- for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
-
- /*
- * the first memory segment with len==0 is the one that
- * follows the last valid segment.
- */
- if (mcfg->memseg[s].len == 0)
- break;
-
- vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
- PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd,
- s * page_size);
- if (vir_addr == MAP_FAILED) {
- RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
- "in %s to requested address [%p]\n",
- (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV,
- mcfg->memseg[s].addr);
- goto error;
- }
- }
- return 0;
-
-error:
- if (xen_fd >= 0) {
- close(xen_fd);
- xen_fd = -1;
- }
- return -1;
-}
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h
deleted file mode 100644
index d9707780..00000000
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*-
- * This file is provided under a dual BSD/LGPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GNU LESSER GENERAL PUBLIC LICENSE
- *
- * Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Contact Information:
- * Intel Corporation
- *
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _RTE_DOM0_COMMON_H_
-#define _RTE_DOM0_COMMON_H_
-
-#ifdef __KERNEL__
-#include <linux/if.h>
-#endif
-
-#define DOM0_NAME_MAX 256
-#define DOM0_MM_DEV "/dev/dom0_mm"
-
-#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */
-#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */
-#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */
-#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */
-#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
-
-#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info)
-#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *)
-#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int)
-#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *)
-
-/**
- * A structure used to store memory information.
- */
-struct memory_info {
- char name[DOM0_NAME_MAX];
- uint64_t size;
-};
-
-/**
- * A structure used to store memory segment information.
- */
-struct memseg_info {
- uint32_t idx;
- uint64_t pfn;
- uint64_t size;
- uint64_t mfn[DOM0_NUM_MEMBLOCK];
-};
-
-/**
- * A structure used to store memory block information.
- */
-struct memblock_info {
- uint8_t exchange_flag;
- uint8_t used;
- uint64_t vir_addr;
- uint64_t pfn;
- uint64_t mfn;
-};
-#endif /* _RTE_DOM0_COMMON_H_ */
diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h
index b800a53c..ce456d4b 100644
--- a/lib/librte_eal/linuxapp/igb_uio/compat.h
+++ b/lib/librte_eal/linuxapp/igb_uio/compat.h
@@ -16,12 +16,9 @@
#endif
#ifndef PCI_MSIX_ENTRY_SIZE
-#define PCI_MSIX_ENTRY_SIZE 16
-#define PCI_MSIX_ENTRY_LOWER_ADDR 0
-#define PCI_MSIX_ENTRY_UPPER_ADDR 4
-#define PCI_MSIX_ENTRY_DATA 8
-#define PCI_MSIX_ENTRY_VECTOR_CTRL 12
-#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1
+#define PCI_MSIX_ENTRY_SIZE 16
+#define PCI_MSIX_ENTRY_VECTOR_CTRL 12
+#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1
#endif
/*
@@ -124,6 +121,14 @@ static bool pci_check_and_mask_intx(struct pci_dev *pdev)
#endif /* < 3.3.0 */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)
-#define HAVE_PCI_ENABLE_MSIX
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
+#define HAVE_ALLOC_IRQ_VECTORS 1
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
+#define HAVE_MSI_LIST_IN_GENERIC_DEVICE 1
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
+#define HAVE_PCI_MSI_MASK_IRQ 1
#endif
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
index 07a19a31..a3a98c17 100644
--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -29,13 +29,11 @@
#include <linux/pci.h>
#include <linux/uio_driver.h>
#include <linux/io.h>
+#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/version.h>
#include <linux/slab.h>
-#ifdef CONFIG_XEN_DOM0
-#include <xen/xen.h>
-#endif
#include <rte_pci_dev_features.h>
#include "compat.h"
@@ -51,7 +49,6 @@ struct rte_uio_pci_dev {
static char *intr_mode;
static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
-
/* sriov sysfs */
static ssize_t
show_max_vfs(struct device *dev, struct device_attribute *attr,
@@ -91,14 +88,16 @@ static struct attribute *dev_attrs[] = {
static const struct attribute_group dev_attr_grp = {
.attrs = dev_attrs,
};
+
+#ifndef HAVE_PCI_MSI_MASK_IRQ
/*
* It masks the msix on/off of generating MSI-X messages.
*/
static void
-igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state)
+igbuio_msix_mask_irq(struct msi_desc *desc, s32 state)
{
u32 mask_bits = desc->masked;
- unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+ unsigned int offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
PCI_MSIX_ENTRY_VECTOR_CTRL;
if (state != 0)
@@ -113,6 +112,52 @@ igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state)
}
}
+/*
+ * It masks the msi on/off of generating MSI messages.
+ */
+static void
+igbuio_msi_mask_irq(struct pci_dev *pdev, struct msi_desc *desc, int32_t state)
+{
+ u32 mask_bits = desc->masked;
+ u32 offset = desc->irq - pdev->irq;
+ u32 mask = 1 << offset;
+
+ if (!desc->msi_attrib.maskbit)
+ return;
+
+ if (state != 0)
+ mask_bits &= ~mask;
+ else
+ mask_bits |= mask;
+
+ if (mask_bits != desc->masked) {
+ pci_write_config_dword(pdev, desc->mask_pos, mask_bits);
+ desc->masked = mask_bits;
+ }
+}
+
+static void
+igbuio_mask_irq(struct pci_dev *pdev, enum rte_intr_mode mode, s32 irq_state)
+{
+ struct msi_desc *desc;
+ struct list_head *msi_list;
+
+#ifdef HAVE_MSI_LIST_IN_GENERIC_DEVICE
+ msi_list = &pdev->dev.msi_list;
+#else
+ msi_list = &pdev->msi_list;
+#endif
+
+ if (mode == RTE_INTR_MODE_MSIX) {
+ list_for_each_entry(desc, msi_list, list)
+ igbuio_msix_mask_irq(desc, irq_state);
+ } else if (mode == RTE_INTR_MODE_MSI) {
+ list_for_each_entry(desc, msi_list, list)
+ igbuio_msi_mask_irq(pdev, desc, irq_state);
+ }
+}
+#endif
+
/**
* This is the irqcontrol callback to be registered to uio_info.
* It can be used to disable/enable interrupt from user space processes.
@@ -132,21 +177,26 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *pdev = udev->pdev;
- pci_cfg_access_lock(pdev);
- if (udev->mode == RTE_INTR_MODE_LEGACY)
- pci_intx(pdev, !!irq_state);
+#ifdef HAVE_PCI_MSI_MASK_IRQ
+ struct irq_data *irq = irq_get_irq_data(udev->info.irq);
+#endif
- else if (udev->mode == RTE_INTR_MODE_MSIX) {
- struct msi_desc *desc;
+ pci_cfg_access_lock(pdev);
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0))
- list_for_each_entry(desc, &pdev->msi_list, list)
- igbuio_msix_mask_irq(desc, irq_state);
+ if (udev->mode == RTE_INTR_MODE_MSIX || udev->mode == RTE_INTR_MODE_MSI) {
+#ifdef HAVE_PCI_MSI_MASK_IRQ
+ if (irq_state == 1)
+ pci_msi_unmask_irq(irq);
+ else
+ pci_msi_mask_irq(irq);
#else
- list_for_each_entry(desc, &pdev->dev.msi_list, list)
- igbuio_msix_mask_irq(desc, irq_state);
+ igbuio_mask_irq(pdev, udev->mode, irq_state);
#endif
}
+
+ if (udev->mode == RTE_INTR_MODE_LEGACY)
+ pci_intx(pdev, !!irq_state);
+
pci_cfg_access_unlock(pdev);
return 0;
@@ -157,19 +207,125 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
* If yes, disable it here and will be enable later.
*/
static irqreturn_t
-igbuio_pci_irqhandler(int irq, struct uio_info *info)
+igbuio_pci_irqhandler(int irq, void *dev_id)
{
- struct rte_uio_pci_dev *udev = info->priv;
+ struct rte_uio_pci_dev *udev = (struct rte_uio_pci_dev *)dev_id;
+ struct uio_info *info = &udev->info;
/* Legacy mode need to mask in hardware */
if (udev->mode == RTE_INTR_MODE_LEGACY &&
!pci_check_and_mask_intx(udev->pdev))
return IRQ_NONE;
+ uio_event_notify(info);
+
/* Message signal mode, no share IRQ and automasked */
return IRQ_HANDLED;
}
+static int
+igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev)
+{
+ int err = 0;
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ struct msix_entry msix_entry;
+#endif
+
+ switch (igbuio_intr_mode_preferred) {
+ case RTE_INTR_MODE_MSIX:
+ /* Only 1 msi-x vector needed */
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ msix_entry.entry = 0;
+ if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) {
+ dev_dbg(&udev->pdev->dev, "using MSI-X");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = msix_entry.vector;
+ udev->mode = RTE_INTR_MODE_MSIX;
+ break;
+ }
+#else
+ if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) {
+ dev_dbg(&udev->pdev->dev, "using MSI-X");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = pci_irq_vector(udev->pdev, 0);
+ udev->mode = RTE_INTR_MODE_MSIX;
+ break;
+ }
+#endif
+
+ /* fall back to MSI */
+ case RTE_INTR_MODE_MSI:
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ if (pci_enable_msi(udev->pdev) == 0) {
+ dev_dbg(&udev->pdev->dev, "using MSI");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = udev->pdev->irq;
+ udev->mode = RTE_INTR_MODE_MSI;
+ break;
+ }
+#else
+ if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) {
+ dev_dbg(&udev->pdev->dev, "using MSI");
+ udev->info.irq_flags = IRQF_NO_THREAD;
+ udev->info.irq = pci_irq_vector(udev->pdev, 0);
+ udev->mode = RTE_INTR_MODE_MSI;
+ break;
+ }
+#endif
+ /* fall back to INTX */
+ case RTE_INTR_MODE_LEGACY:
+ if (pci_intx_mask_supported(udev->pdev)) {
+ dev_dbg(&udev->pdev->dev, "using INTX");
+ udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD;
+ udev->info.irq = udev->pdev->irq;
+ udev->mode = RTE_INTR_MODE_LEGACY;
+ break;
+ }
+ dev_notice(&udev->pdev->dev, "PCI INTX mask not supported\n");
+ /* fall back to no IRQ */
+ case RTE_INTR_MODE_NONE:
+ udev->mode = RTE_INTR_MODE_NONE;
+ udev->info.irq = UIO_IRQ_NONE;
+ break;
+
+ default:
+ dev_err(&udev->pdev->dev, "invalid IRQ mode %u",
+ igbuio_intr_mode_preferred);
+ udev->info.irq = UIO_IRQ_NONE;
+ err = -EINVAL;
+ }
+
+ if (udev->info.irq != UIO_IRQ_NONE)
+ err = request_irq(udev->info.irq, igbuio_pci_irqhandler,
+ udev->info.irq_flags, udev->info.name,
+ udev);
+ dev_info(&udev->pdev->dev, "uio device registered with irq %lx\n",
+ udev->info.irq);
+
+ return err;
+}
+
+static void
+igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev)
+{
+ if (udev->info.irq) {
+ free_irq(udev->info.irq, udev);
+ udev->info.irq = 0;
+ }
+
+#ifndef HAVE_ALLOC_IRQ_VECTORS
+ if (udev->mode == RTE_INTR_MODE_MSIX)
+ pci_disable_msix(udev->pdev);
+ if (udev->mode == RTE_INTR_MODE_MSI)
+ pci_disable_msi(udev->pdev);
+#else
+ if (udev->mode == RTE_INTR_MODE_MSIX ||
+ udev->mode == RTE_INTR_MODE_MSI)
+ pci_free_irq_vectors(udev->pdev);
+#endif
+}
+
+
/**
* This gets called while opening uio device file.
*/
@@ -178,12 +334,17 @@ igbuio_pci_open(struct uio_info *info, struct inode *inode)
{
struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *dev = udev->pdev;
-
- pci_reset_function(dev);
+ int err;
/* set bus master, which was cleared by the reset function */
pci_set_master(dev);
+ /* enable interrupts */
+ err = igbuio_pci_enable_interrupts(udev);
+ if (err) {
+ dev_err(&dev->dev, "Enable interrupt fails\n");
+ return err;
+ }
return 0;
}
@@ -193,60 +354,15 @@ igbuio_pci_release(struct uio_info *info, struct inode *inode)
struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *dev = udev->pdev;
+ /* disable interrupts */
+ igbuio_pci_disable_interrupts(udev);
+
/* stop the device from further DMA */
pci_clear_master(dev);
- pci_reset_function(dev);
-
return 0;
}
-#ifdef CONFIG_XEN_DOM0
-static int
-igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma)
-{
- int idx;
-
- idx = (int)vma->vm_pgoff;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-#ifdef HAVE_PTE_MASK_PAGE_IOMAP
- vma->vm_page_prot.pgprot |= _PAGE_IOMAP;
-#endif
-
- return remap_pfn_range(vma,
- vma->vm_start,
- info->mem[idx].addr >> PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
-}
-
-/**
- * This is uio device mmap method which will use igbuio mmap for Xen
- * Dom0 environment.
- */
-static int
-igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
-{
- int idx;
-
- if (vma->vm_pgoff >= MAX_UIO_MAPS)
- return -EINVAL;
-
- if (info->mem[vma->vm_pgoff].size == 0)
- return -EINVAL;
-
- idx = (int)vma->vm_pgoff;
- switch (info->mem[idx].memtype) {
- case UIO_MEM_PHYS:
- return igbuio_dom0_mmap_phys(info, vma);
- case UIO_MEM_LOGICAL:
- case UIO_MEM_VIRTUAL:
- default:
- return -EINVAL;
- }
-}
-#endif
-
/* Remap pci resources described by bar #pci_bar in uio resource n. */
static int
igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
@@ -356,9 +472,6 @@ static int
igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
struct rte_uio_pci_dev *udev;
-#ifdef HAVE_PCI_ENABLE_MSIX
- struct msix_entry msix_entry;
-#endif
dma_addr_t map_dma_addr;
void *map_addr;
int err;
@@ -401,61 +514,12 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
/* fill uio infos */
udev->info.name = "igb_uio";
udev->info.version = "0.1";
- udev->info.handler = igbuio_pci_irqhandler;
udev->info.irqcontrol = igbuio_pci_irqcontrol;
udev->info.open = igbuio_pci_open;
udev->info.release = igbuio_pci_release;
-#ifdef CONFIG_XEN_DOM0
- /* check if the driver run on Xen Dom0 */
- if (xen_initial_domain())
- udev->info.mmap = igbuio_dom0_pci_mmap;
-#endif
udev->info.priv = udev;
udev->pdev = dev;
- switch (igbuio_intr_mode_preferred) {
- case RTE_INTR_MODE_MSIX:
- /* Only 1 msi-x vector needed */
-#ifdef HAVE_PCI_ENABLE_MSIX
- msix_entry.entry = 0;
- if (pci_enable_msix(dev, &msix_entry, 1) == 0) {
- dev_dbg(&dev->dev, "using MSI-X");
- udev->info.irq_flags = IRQF_NO_THREAD;
- udev->info.irq = msix_entry.vector;
- udev->mode = RTE_INTR_MODE_MSIX;
- break;
- }
-#else
- if (pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_MSIX) == 1) {
- dev_dbg(&dev->dev, "using MSI-X");
- udev->info.irq = pci_irq_vector(dev, 0);
- udev->mode = RTE_INTR_MODE_MSIX;
- break;
- }
-#endif
- /* fall back to INTX */
- case RTE_INTR_MODE_LEGACY:
- if (pci_intx_mask_supported(dev)) {
- dev_dbg(&dev->dev, "using INTX");
- udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD;
- udev->info.irq = dev->irq;
- udev->mode = RTE_INTR_MODE_LEGACY;
- break;
- }
- dev_notice(&dev->dev, "PCI INTX mask not supported\n");
- /* fall back to no IRQ */
- case RTE_INTR_MODE_NONE:
- udev->mode = RTE_INTR_MODE_NONE;
- udev->info.irq = 0;
- break;
-
- default:
- dev_err(&dev->dev, "invalid IRQ mode %u",
- igbuio_intr_mode_preferred);
- err = -EINVAL;
- goto fail_release_iomem;
- }
-
err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
if (err != 0)
goto fail_release_iomem;
@@ -467,9 +531,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
pci_set_drvdata(dev, udev);
- dev_info(&dev->dev, "uio device registered with irq %lx\n",
- udev->info.irq);
-
/*
* Doing a harmless dma mapping for attaching the device to
* the iommu identity mapping if kernel boots with iommu=pt.
@@ -497,8 +558,6 @@ fail_remove_group:
sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
fail_release_iomem:
igbuio_pci_release_iomem(&udev->info);
- if (udev->mode == RTE_INTR_MODE_MSIX)
- pci_disable_msix(udev->pdev);
pci_disable_device(dev);
fail_free:
kfree(udev);
@@ -514,8 +573,6 @@ igbuio_pci_remove(struct pci_dev *dev)
sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
uio_unregister_device(&udev->info);
igbuio_pci_release_iomem(&udev->info);
- if (udev->mode == RTE_INTR_MODE_MSIX)
- pci_disable_msix(dev);
pci_disable_device(dev);
pci_set_drvdata(dev, NULL);
kfree(udev);
@@ -532,6 +589,9 @@ igbuio_config_intr_mode(char *intr_str)
if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) {
igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
pr_info("Use MSIX interrupt\n");
+ } else if (!strcmp(intr_str, RTE_INTR_MODE_MSI_NAME)) {
+ igbuio_intr_mode_preferred = RTE_INTR_MODE_MSI;
+ pr_info("Use MSI interrupt\n");
} else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) {
igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY;
pr_info("Use legacy interrupt\n");
@@ -575,6 +635,7 @@ module_param(intr_mode, charp, S_IRUGO);
MODULE_PARM_DESC(intr_mode,
"igb_uio interrupt mode (default=msix):\n"
" " RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n"
+" " RTE_INTR_MODE_MSI_NAME " Use MSI interrupt\n"
" " RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n"
"\n");
diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h
index 6a1587b4..3f8c0bc8 100644
--- a/lib/librte_eal/linuxapp/kni/compat.h
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -8,6 +8,34 @@
#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
#endif
+/* SuSE version macro is the same as Linux kernel version */
+#ifndef SLE_VERSION
+#define SLE_VERSION(a, b, c) KERNEL_VERSION(a, b, c)
+#endif
+#ifdef CONFIG_SUSE_KERNEL
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
+/* SLES12SP3 is at least 4.4.57+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 28))
+/* SLES12 is at least 3.12.28+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 0, 0)
+#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 61)) && \
+ (LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)))
+/* SLES11 SP3 is at least 3.0.61+ based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 3, 0)
+#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32))
+/* SLES11 SP1 is 2.6.32 based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 1, 0)
+#elif (LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 27))
+/* SLES11 GA is 2.6.27 based */
+#define SLE_VERSION_CODE SLE_VERSION(11, 0, 0)
+#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
+#endif /* CONFIG_SUSE_KERNEL */
+#ifndef SLE_VERSION_CODE
+#define SLE_VERSION_CODE 0
+#endif /* SLE_VERSION_CODE */
+
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
(!(defined(RHEL_RELEASE_CODE) && \
RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
@@ -55,7 +83,8 @@
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) || \
(defined(RHEL_RELEASE_CODE) && \
- RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4))
+ RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) || \
+ (SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(12, 3, 0))
#define HAVE_TRANS_START_HELPER
#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
index e0a03542..e38a7561 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
@@ -697,22 +697,22 @@ struct _kc_ethtool_pauseparam {
#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c)
#endif
#ifdef CONFIG_SUSE_KERNEL
-#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) )
-/* SLES11 GA is 2.6.27 based */
-#define SLE_VERSION_CODE SLE_VERSION(11,0,0)
-#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) )
-/* SLES11 SP1 is 2.6.32 based */
-#define SLE_VERSION_CODE SLE_VERSION(11,1,0)
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
+/* SLES12SP3 is at least 4.4.57+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
+#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) )
+/* SLES12 is at least 3.12.28+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12,0,0)
#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,61)) && \
(LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0)))
/* SLES11 SP3 is at least 3.0.61+ based */
#define SLE_VERSION_CODE SLE_VERSION(11,3,0)
-#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) )
-/* SLES12 is at least 3.12.28+ based */
-#define SLE_VERSION_CODE SLE_VERSION(12,0,0)
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
-/* SLES12SP3 is at least 4.4.57+ based */
-#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
+#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) )
+/* SLES11 SP1 is 2.6.32 based */
+#define SLE_VERSION_CODE SLE_VERSION(11,1,0)
+#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) )
+/* SLES11 GA is 2.6.27 based */
+#define SLE_VERSION_CODE SLE_VERSION(11,0,0)
#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
#endif /* CONFIG_SUSE_KERNEL */
#ifndef SLE_VERSION_CODE
diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_eal/linuxapp/xen_dom0/Makefile
deleted file mode 100644
index be51a82a..00000000
--- a/lib/librte_eal/linuxapp/xen_dom0/Makefile
+++ /dev/null
@@ -1,53 +0,0 @@
-# BSD LICENSE
-#
-# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in
-# the documentation and/or other materials provided with the
-# distribution.
-# * Neither the name of Intel Corporation nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-#
-# module name and path
-#
-MODULE = rte_dom0_mm
-
-#
-# CFLAGS
-#
-MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
-MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
-MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
-MODULE_CFLAGS += -Wall -Werror
-
-#
-# all source are stored in SRCS-y
-#
-
-SRCS-y += dom0_mm_misc.c
-
-include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/xen_dom0/compat.h b/lib/librte_eal/linuxapp/xen_dom0/compat.h
deleted file mode 100644
index e6eb97f2..00000000
--- a/lib/librte_eal/linuxapp/xen_dom0/compat.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Minimal wrappers to allow compiling xen_dom0 on older kernels.
- */
-
-#ifndef RHEL_RELEASE_VERSION
-#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \
- (!(defined(RHEL_RELEASE_CODE) && \
- RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4)))
-
-#define kstrtoul strict_strtoul
-
-#endif /* < 2.6.39 */
diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h
deleted file mode 100644
index 9d5ffb22..00000000
--- a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- * Intel Corporation
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#ifndef _DOM0_MM_DEV_H_
-#define _DOM0_MM_DEV_H_
-
-#include <linux/wait.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <exec-env/rte_dom0_common.h>
-
-#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/
-#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/
-#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE)
-#define MAX_NUM_ORDER (DOM0_CONTIG_NUM_ORDER + 1)
-#define SIZE_PER_BLOCK 2 /**< Size of memory block (2MB).*/
-
-/**
- * A structure describing the private information for a dom0 device.
- */
-struct dom0_mm_dev {
- struct miscdevice miscdev;
- uint8_t fail_times;
- uint32_t used_memsize;
- uint32_t num_mem_ctx;
- uint32_t config_memsize;
- uint32_t num_bigblock;
- struct dom0_mm_data *mm_data[NUM_MEM_CTX];
- struct mutex data_lock;
-};
-
-struct dom0_mm_data{
- uint32_t refcnt;
- uint32_t num_memseg; /**< Number of memory segment. */
- uint32_t mem_size; /**< Size of requesting memory. */
-
- char name[DOM0_NAME_MAX];
-
- /** Store global memory block IDs used by an instance */
- uint32_t block_num[DOM0_NUM_MEMBLOCK];
-
- /** Store memory block information.*/
- struct memblock_info block_info[DOM0_NUM_MEMBLOCK];
-
- /** Store memory segment information.*/
- struct memseg_info seg_info[DOM0_NUM_MEMSEG];
-};
-
-#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args)
-#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args)
-#endif
diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c
deleted file mode 100644
index 79630bad..00000000
--- a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c
+++ /dev/null
@@ -1,780 +0,0 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- * Intel Corporation
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/miscdevice.h>
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/version.h>
-
-#include <xen/xen.h>
-#include <xen/page.h>
-#include <xen/xen-ops.h>
-#include <xen/interface/memory.h>
-
-#include <exec-env/rte_dom0_common.h>
-
-#include "compat.h"
-#include "dom0_mm_dev.h"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0");
-
-static struct dom0_mm_dev dom0_dev;
-static struct kobject *dom0_kobj = NULL;
-
-static struct memblock_info *rsv_mm_info;
-
-/* Default configuration for reserved memory size(2048 MB). */
-static uint32_t rsv_memsize = 2048;
-
-static int dom0_open(struct inode *inode, struct file *file);
-static int dom0_release(struct inode *inode, struct file *file);
-static int dom0_ioctl(struct file *file, unsigned int ioctl_num,
- unsigned long ioctl_param);
-static int dom0_mmap(struct file *file, struct vm_area_struct *vma);
-static int dom0_memory_free(uint32_t size);
-static int dom0_memory_release(struct dom0_mm_data *mm_data);
-
-static const struct file_operations data_fops = {
- .owner = THIS_MODULE,
- .open = dom0_open,
- .release = dom0_release,
- .mmap = dom0_mmap,
- .unlocked_ioctl = (void *)dom0_ioctl,
-};
-
-static ssize_t
-show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf)
-{
- return snprintf(buf, 10, "%u\n", dom0_dev.used_memsize);
-}
-
-static ssize_t
-show_memsize(struct device *dev, struct device_attribute *attr, char *buf)
-{
- return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize);
-}
-
-static ssize_t
-store_memsize(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- int err = 0;
- unsigned long mem_size;
-
- if (0 != kstrtoul(buf, 0, &mem_size))
- return -EINVAL;
-
- mutex_lock(&dom0_dev.data_lock);
- if (0 == mem_size) {
- err = -EINVAL;
- goto fail;
- } else if (mem_size > (rsv_memsize - dom0_dev.used_memsize)) {
- XEN_ERR("configure memory size fail\n");
- err = -EINVAL;
- goto fail;
- } else
- dom0_dev.config_memsize = mem_size;
-
-fail:
- mutex_unlock(&dom0_dev.data_lock);
- return err ? err : count;
-}
-
-static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize);
-static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL);
-
-static struct attribute *dev_attrs[] = {
- &dev_attr_memsize.attr,
- &dev_attr_memsize_rsvd.attr,
- NULL,
-};
-
-/* the memory size unit is MB */
-static const struct attribute_group dev_attr_grp = {
- .name = "memsize-mB",
- .attrs = dev_attrs,
-};
-
-
-static void
-sort_viraddr(struct memblock_info *mb, int cnt)
-{
- int i,j;
- uint64_t tmp_pfn;
- uint64_t tmp_viraddr;
-
- /*sort virtual address and pfn */
- for(i = 0; i < cnt; i ++) {
- for(j = cnt - 1; j > i; j--) {
- if(mb[j].pfn < mb[j - 1].pfn) {
- tmp_pfn = mb[j - 1].pfn;
- mb[j - 1].pfn = mb[j].pfn;
- mb[j].pfn = tmp_pfn;
-
- tmp_viraddr = mb[j - 1].vir_addr;
- mb[j - 1].vir_addr = mb[j].vir_addr;
- mb[j].vir_addr = tmp_viraddr;
- }
- }
- }
-}
-
-static int
-dom0_find_memdata(const char * mem_name)
-{
- unsigned i;
- int idx = -1;
- for(i = 0; i< NUM_MEM_CTX; i++) {
- if(dom0_dev.mm_data[i] == NULL)
- continue;
- if (!strncmp(dom0_dev.mm_data[i]->name, mem_name,
- sizeof(char) * DOM0_NAME_MAX)) {
- idx = i;
- break;
- }
- }
-
- return idx;
-}
-
-static int
-dom0_find_mempos(void)
-{
- unsigned i;
- int idx = -1;
-
- for(i = 0; i< NUM_MEM_CTX; i++) {
- if(dom0_dev.mm_data[i] == NULL){
- idx = i;
- break;
- }
- }
-
- return idx;
-}
-
-static int
-dom0_memory_release(struct dom0_mm_data *mm_data)
-{
- int idx;
- uint32_t num_block, block_id;
-
- /* each memory block is 2M */
- num_block = mm_data->mem_size / SIZE_PER_BLOCK;
- if (num_block == 0)
- return -EINVAL;
-
- /* reset global memory data */
- idx = dom0_find_memdata(mm_data->name);
- if (idx >= 0) {
- dom0_dev.used_memsize -= mm_data->mem_size;
- dom0_dev.mm_data[idx] = NULL;
- dom0_dev.num_mem_ctx--;
- }
-
- /* reset these memory blocks status as free */
- for (idx = 0; idx < num_block; idx++) {
- block_id = mm_data->block_num[idx];
- rsv_mm_info[block_id].used = 0;
- }
-
- memset(mm_data, 0, sizeof(struct dom0_mm_data));
- vfree(mm_data);
- return 0;
-}
-
-static int
-dom0_memory_free(uint32_t rsv_size)
-{
- uint64_t vstart, vaddr;
- uint32_t i, num_block, size;
-
- if (!xen_pv_domain())
- return -1;
-
- /* each memory block is 2M */
- num_block = rsv_size / SIZE_PER_BLOCK;
- if (num_block == 0)
- return -EINVAL;
-
- /* free all memory blocks of size of 4M and destroy contiguous region */
- for (i = 0; i < dom0_dev.num_bigblock * 2; i += 2) {
- vstart = rsv_mm_info[i].vir_addr;
- if (vstart) {
- #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
- if (rsv_mm_info[i].exchange_flag)
- xen_destroy_contiguous_region(vstart,
- DOM0_CONTIG_NUM_ORDER);
- if (rsv_mm_info[i + 1].exchange_flag)
- xen_destroy_contiguous_region(vstart +
- DOM0_MEMBLOCK_SIZE,
- DOM0_CONTIG_NUM_ORDER);
- #else
- if (rsv_mm_info[i].exchange_flag)
- xen_destroy_contiguous_region(rsv_mm_info[i].pfn
- * PAGE_SIZE,
- DOM0_CONTIG_NUM_ORDER);
- if (rsv_mm_info[i + 1].exchange_flag)
- xen_destroy_contiguous_region(rsv_mm_info[i].pfn
- * PAGE_SIZE + DOM0_MEMBLOCK_SIZE,
- DOM0_CONTIG_NUM_ORDER);
- #endif
-
- size = DOM0_MEMBLOCK_SIZE * 2;
- vaddr = vstart;
- while (size > 0) {
- ClearPageReserved(virt_to_page(vaddr));
- vaddr += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
- free_pages(vstart, MAX_NUM_ORDER);
- }
- }
-
- /* free all memory blocks size of 2M and destroy contiguous region */
- for (; i < num_block; i++) {
- vstart = rsv_mm_info[i].vir_addr;
- if (vstart) {
- if (rsv_mm_info[i].exchange_flag)
- xen_destroy_contiguous_region(vstart,
- DOM0_CONTIG_NUM_ORDER);
-
- size = DOM0_MEMBLOCK_SIZE;
- vaddr = vstart;
- while (size > 0) {
- ClearPageReserved(virt_to_page(vaddr));
- vaddr += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
- free_pages(vstart, DOM0_CONTIG_NUM_ORDER);
- }
- }
-
- memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
- vfree(rsv_mm_info);
- rsv_mm_info = NULL;
-
- return 0;
-}
-
-static void
-find_free_memory(uint32_t count, struct dom0_mm_data *mm_data)
-{
- uint32_t i = 0;
- uint32_t j = 0;
-
- while ((i < count) && (j < rsv_memsize / SIZE_PER_BLOCK)) {
- if (rsv_mm_info[j].used == 0) {
- mm_data->block_info[i].pfn = rsv_mm_info[j].pfn;
- mm_data->block_info[i].vir_addr =
- rsv_mm_info[j].vir_addr;
- mm_data->block_info[i].mfn = rsv_mm_info[j].mfn;
- mm_data->block_info[i].exchange_flag =
- rsv_mm_info[j].exchange_flag;
- mm_data->block_num[i] = j;
- rsv_mm_info[j].used = 1;
- i++;
- }
- j++;
- }
-}
-
-/**
- * Find all memory segments in which physical addresses are contiguous.
- */
-static void
-find_memseg(int count, struct dom0_mm_data * mm_data)
-{
- int i = 0;
- int j, k, idx = 0;
- uint64_t zone_len, pfn, num_block;
-
- while(i < count) {
- if (mm_data->block_info[i].exchange_flag == 0) {
- i++;
- continue;
- }
- k = 0;
- pfn = mm_data->block_info[i].pfn;
- mm_data->seg_info[idx].pfn = pfn;
- mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn;
-
- for (j = i + 1; j < count; j++) {
-
- /* ignore exchange fail memory block */
- if (mm_data->block_info[j].exchange_flag == 0)
- break;
-
- if (mm_data->block_info[j].pfn !=
- (mm_data->block_info[j - 1].pfn +
- DOM0_MEMBLOCK_SIZE / PAGE_SIZE))
- break;
- ++k;
- mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn;
- }
-
- num_block = j - i;
- zone_len = num_block * DOM0_MEMBLOCK_SIZE;
- mm_data->seg_info[idx].size = zone_len;
-
- XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len);
- i = i+ num_block;
- idx++;
- if (idx == DOM0_NUM_MEMSEG)
- break;
- }
- mm_data->num_memseg = idx;
-}
-
-static int
-dom0_memory_reserve(uint32_t rsv_size)
-{
- uint64_t pfn, vstart, vaddr;
- uint32_t i, num_block, size, allocated_size = 0;
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
- dma_addr_t dma_handle;
-#endif
-
- /* 2M as memory block */
- num_block = rsv_size / SIZE_PER_BLOCK;
-
- rsv_mm_info = vmalloc(sizeof(struct memblock_info) * num_block);
- if (!rsv_mm_info) {
- XEN_ERR("Unable to allocate device memory information\n");
- return -ENOMEM;
- }
- memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block);
-
- /* try alloc size of 4M once */
- for (i = 0; i < num_block; i += 2) {
- vstart = (unsigned long)
- __get_free_pages(GFP_ATOMIC, MAX_NUM_ORDER);
- if (vstart == 0)
- break;
-
- dom0_dev.num_bigblock = i / 2 + 1;
- allocated_size = SIZE_PER_BLOCK * (i + 2);
-
- /* size of 4M */
- size = DOM0_MEMBLOCK_SIZE * 2;
-
- vaddr = vstart;
- while (size > 0) {
- SetPageReserved(virt_to_page(vaddr));
- vaddr += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
-
- pfn = virt_to_pfn(vstart);
- rsv_mm_info[i].pfn = pfn;
- rsv_mm_info[i].vir_addr = vstart;
- rsv_mm_info[i + 1].pfn =
- pfn + DOM0_MEMBLOCK_SIZE / PAGE_SIZE;
- rsv_mm_info[i + 1].vir_addr =
- vstart + DOM0_MEMBLOCK_SIZE;
- }
-
- /*if it failed to alloc 4M, and continue to alloc 2M once */
- for (; i < num_block; i++) {
- vstart = (unsigned long)
- __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER);
- if (vstart == 0) {
- XEN_ERR("allocate memory fail.\n");
- dom0_memory_free(allocated_size);
- return -ENOMEM;
- }
-
- allocated_size += SIZE_PER_BLOCK;
-
- size = DOM0_MEMBLOCK_SIZE;
- vaddr = vstart;
- while (size > 0) {
- SetPageReserved(virt_to_page(vaddr));
- vaddr += PAGE_SIZE;
- size -= PAGE_SIZE;
- }
- pfn = virt_to_pfn(vstart);
- rsv_mm_info[i].pfn = pfn;
- rsv_mm_info[i].vir_addr = vstart;
- }
-
- sort_viraddr(rsv_mm_info, num_block);
-
- for (i = 0; i< num_block; i++) {
-
- /*
- * This API is used to exchage MFN for getting a block of
- * contiguous physical addresses, its maximum size is 2M.
- */
- #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
- if (xen_create_contiguous_region(rsv_mm_info[i].vir_addr,
- DOM0_CONTIG_NUM_ORDER, 0) == 0) {
- #else
- if (xen_create_contiguous_region(rsv_mm_info[i].pfn * PAGE_SIZE,
- DOM0_CONTIG_NUM_ORDER, 0, &dma_handle) == 0) {
- #endif
- rsv_mm_info[i].exchange_flag = 1;
- rsv_mm_info[i].mfn =
- pfn_to_mfn(rsv_mm_info[i].pfn);
- rsv_mm_info[i].used = 0;
- } else {
- XEN_ERR("exchange memeory fail\n");
- rsv_mm_info[i].exchange_flag = 0;
- dom0_dev.fail_times++;
- if (dom0_dev.fail_times > MAX_EXCHANGE_FAIL_TIME) {
- dom0_memory_free(rsv_size);
- return -EFAULT;
- }
- }
- }
-
- return 0;
-}
-
-static int
-dom0_prepare_memsegs(struct memory_info *meminfo, struct dom0_mm_data *mm_data)
-{
- uint32_t num_block;
- int idx;
-
- /* check if there is a free name buffer */
- memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX);
- mm_data->name[DOM0_NAME_MAX - 1] = '\0';
- idx = dom0_find_mempos();
- if (idx < 0)
- return -1;
-
- num_block = meminfo->size / SIZE_PER_BLOCK;
- /* find free memory and new memory segments*/
- find_free_memory(num_block, mm_data);
- find_memseg(num_block, mm_data);
-
- /* update private memory data */
- mm_data->refcnt++;
- mm_data->mem_size = meminfo->size;
-
- /* update global memory data */
- dom0_dev.mm_data[idx] = mm_data;
- dom0_dev.num_mem_ctx++;
- dom0_dev.used_memsize += mm_data->mem_size;
-
- return 0;
-}
-
-static int
-dom0_check_memory (struct memory_info *meminfo)
-{
- int idx;
- uint64_t mem_size;
-
- /* round memory size to the next even number. */
- if (meminfo->size % 2)
- ++meminfo->size;
-
- mem_size = meminfo->size;
- if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) {
- XEN_ERR("Memory data space is full in Dom0 driver\n");
- return -1;
- }
- idx = dom0_find_memdata(meminfo->name);
- if (idx >= 0) {
- XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n",
- meminfo->name);
- return -1;
- }
- if ((dom0_dev.used_memsize + mem_size) > rsv_memsize) {
- XEN_ERR("Total size can't be larger than reserved size.\n");
- return -1;
- }
-
- return 0;
-}
-
-static int __init
-dom0_init(void)
-{
- if (!xen_domain())
- return -ENODEV;
-
- if (rsv_memsize > DOM0_CONFIG_MEMSIZE) {
- XEN_ERR("The reserved memory size cannot be greater than %d\n",
- DOM0_CONFIG_MEMSIZE);
- return -EINVAL;
- }
-
- /* Setup the misc device */
- dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR;
- dom0_dev.miscdev.name = "dom0_mm";
- dom0_dev.miscdev.fops = &data_fops;
-
- /* register misc char device */
- if (misc_register(&dom0_dev.miscdev) != 0) {
- XEN_ERR("Misc device registration failed\n");
- return -EPERM;
- }
-
- mutex_init(&dom0_dev.data_lock);
- dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj);
-
- if (!dom0_kobj) {
- XEN_ERR("dom0-mm object creation failed\n");
- misc_deregister(&dom0_dev.miscdev);
- return -ENOMEM;
- }
-
- if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) {
- kobject_put(dom0_kobj);
- misc_deregister(&dom0_dev.miscdev);
- return -EPERM;
- }
-
- if (dom0_memory_reserve(rsv_memsize) < 0) {
- sysfs_remove_group(dom0_kobj, &dev_attr_grp);
- kobject_put(dom0_kobj);
- misc_deregister(&dom0_dev.miscdev);
- return -ENOMEM;
- }
-
- XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n");
-
- return 0;
-}
-
-static void __exit
-dom0_exit(void)
-{
- if (rsv_mm_info != NULL)
- dom0_memory_free(rsv_memsize);
-
- sysfs_remove_group(dom0_kobj, &dev_attr_grp);
- kobject_put(dom0_kobj);
- misc_deregister(&dom0_dev.miscdev);
-
- XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n");
-}
-
-static int
-dom0_open(struct inode *inode, struct file *file)
-{
- file->private_data = NULL;
-
- XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n");
- return 0;
-}
-
-static int
-dom0_release(struct inode *inode, struct file *file)
-{
- int ret = 0;
- struct dom0_mm_data *mm_data = file->private_data;
-
- if (mm_data == NULL)
- return ret;
-
- mutex_lock(&dom0_dev.data_lock);
- if (--mm_data->refcnt == 0)
- ret = dom0_memory_release(mm_data);
- mutex_unlock(&dom0_dev.data_lock);
-
- file->private_data = NULL;
- XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n");
- return ret;
-}
-
-static int
-dom0_mmap(struct file *file, struct vm_area_struct *vm)
-{
- int status = 0;
- uint32_t idx = vm->vm_pgoff;
- uint64_t pfn, size = vm->vm_end - vm->vm_start;
- struct dom0_mm_data *mm_data = file->private_data;
-
- if(mm_data == NULL)
- return -EINVAL;
-
- mutex_lock(&dom0_dev.data_lock);
- if (idx >= mm_data->num_memseg) {
- mutex_unlock(&dom0_dev.data_lock);
- return -EINVAL;
- }
-
- if (size > mm_data->seg_info[idx].size){
- mutex_unlock(&dom0_dev.data_lock);
- return -EINVAL;
- }
-
- XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size);
-
- pfn = mm_data->seg_info[idx].pfn;
- mutex_unlock(&dom0_dev.data_lock);
-
- status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED);
-
- return status;
-}
-static int
-dom0_ioctl(struct file *file,
- unsigned int ioctl_num,
- unsigned long ioctl_param)
-{
- int idx, ret;
- char name[DOM0_NAME_MAX] = {0};
- struct memory_info meminfo;
- struct dom0_mm_data *mm_data = file->private_data;
-
- XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param);
-
- /**
- * Switch according to the ioctl called
- */
- switch _IOC_NR(ioctl_num) {
- case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG):
- ret = copy_from_user(&meminfo, (void *)ioctl_param,
- sizeof(struct memory_info));
- if (ret)
- return -EFAULT;
-
- if (mm_data != NULL) {
- XEN_ERR("Cannot create memory segment for the same"
- " file descriptor\n");
- return -EINVAL;
- }
-
- /* Allocate private data */
- mm_data = vmalloc(sizeof(struct dom0_mm_data));
- if (!mm_data) {
- XEN_ERR("Unable to allocate device private data\n");
- return -ENOMEM;
- }
- memset(mm_data, 0, sizeof(struct dom0_mm_data));
-
- mutex_lock(&dom0_dev.data_lock);
- /* check if we can allocate memory*/
- if (dom0_check_memory(&meminfo) < 0) {
- mutex_unlock(&dom0_dev.data_lock);
- vfree(mm_data);
- return -EINVAL;
- }
-
- /* allocate memory and created memory segments*/
- if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) {
- XEN_ERR("create memory segment fail.\n");
- mutex_unlock(&dom0_dev.data_lock);
- return -EIO;
- }
-
- file->private_data = mm_data;
- mutex_unlock(&dom0_dev.data_lock);
- break;
-
- /* support multiple process in term of memory mapping*/
- case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG):
- ret = copy_from_user(name, (void *)ioctl_param,
- sizeof(char) * DOM0_NAME_MAX);
- if (ret)
- return -EFAULT;
-
- mutex_lock(&dom0_dev.data_lock);
- idx = dom0_find_memdata(name);
- if (idx < 0) {
- mutex_unlock(&dom0_dev.data_lock);
- return -EINVAL;
- }
-
- mm_data = dom0_dev.mm_data[idx];
- mm_data->refcnt++;
- file->private_data = mm_data;
- mutex_unlock(&dom0_dev.data_lock);
- break;
-
- case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG):
- ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg,
- sizeof(int));
- if (ret)
- return -EFAULT;
- break;
-
- case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO):
- ret = copy_to_user((void *)ioctl_param,
- &mm_data->seg_info[0],
- sizeof(struct memseg_info) *
- mm_data->num_memseg);
- if (ret)
- return -EFAULT;
- break;
- default:
- XEN_PRINT("IOCTL default \n");
- break;
- }
-
- return 0;
-}
-
-module_init(dom0_init);
-module_exit(dom0_exit);
-
-module_param(rsv_memsize, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(rsv_memsize, "Xen-dom0 reserved memory size(MB).\n");
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 3a8f1540..f4f46c1b 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -44,8 +44,6 @@ DPDK_2.0 {
rte_free;
rte_get_hpet_cycles;
rte_get_hpet_hz;
- rte_get_log_level;
- rte_get_log_type;
rte_get_tsc_hz;
rte_hexdump;
rte_intr_callback_register;
@@ -62,9 +60,7 @@ DPDK_2.0 {
rte_malloc_set_limit;
rte_malloc_socket;
rte_malloc_validate;
- rte_malloc_virt2phy;
rte_mem_lock_page;
- rte_mem_phy2mch;
rte_mem_virt2phy;
rte_memdump;
rte_memory_get_nchannel;
@@ -78,8 +74,6 @@ DPDK_2.0 {
rte_openlog_stream;
rte_realloc;
rte_set_application_usage_hook;
- rte_set_log_level;
- rte_set_log_type;
rte_socket_id;
rte_strerror;
rte_strsplit;
@@ -87,8 +81,6 @@ DPDK_2.0 {
rte_thread_get_affinity;
rte_thread_set_affinity;
rte_vlog;
- rte_xen_dom0_memory_attach;
- rte_xen_dom0_memory_init;
rte_zmalloc;
rte_zmalloc_socket;
@@ -118,8 +110,6 @@ DPDK_2.2 {
rte_keepalive_dispatch_pings;
rte_keepalive_mark_alive;
rte_keepalive_register_core;
- rte_xen_dom0_supported;
- rte_xen_mem_phy2mch;
} DPDK_2.1;
@@ -134,7 +124,6 @@ DPDK_16.04 {
DPDK_16.07 {
global:
- pci_get_sysfs_path;
rte_keepalive_mark_sleep;
rte_keepalive_register_relay_callback;
rte_rtm_supported;
@@ -174,25 +163,6 @@ DPDK_17.05 {
rte_log_set_global_level;
rte_log_set_level;
rte_log_set_level_regexp;
- rte_pci_detach;
- rte_pci_dump;
- rte_pci_ioport_map;
- rte_pci_ioport_read;
- rte_pci_ioport_unmap;
- rte_pci_ioport_write;
- rte_pci_map_device;
- rte_pci_probe;
- rte_pci_probe_one;
- rte_pci_read_config;
- rte_pci_register;
- rte_pci_scan;
- rte_pci_unmap_device;
- rte_pci_unregister;
- rte_pci_write_config;
- rte_vdev_init;
- rte_vdev_register;
- rte_vdev_uninit;
- rte_vdev_unregister;
vfio_get_container_fd;
vfio_get_group_fd;
vfio_get_group_no;
@@ -209,6 +179,27 @@ DPDK_17.08 {
} DPDK_17.05;
+DPDK_17.11 {
+ global:
+
+ rte_eal_create_uio_dev;
+ rte_bus_get_iommu_class;
+ rte_eal_has_pci;
+ rte_eal_iova_mode;
+ rte_eal_mbuf_default_mempool_ops;
+ rte_eal_using_phys_addrs;
+ rte_eal_vfio_intr_mode;
+ rte_lcore_has_role;
+ rte_malloc_virt2iova;
+ rte_mem_virt2iova;
+ rte_vfio_enable;
+ rte_vfio_is_enabled;
+ rte_vfio_noiommu_is_enabled;
+ rte_vfio_release_device;
+ rte_vfio_setup_device;
+
+} DPDK_17.08;
+
EXPERIMENTAL {
global:
@@ -217,28 +208,31 @@ EXPERIMENTAL {
rte_eal_devargs_remove;
rte_eal_hotplug_add;
rte_eal_hotplug_remove;
- rte_service_disable_on_lcore;
+ rte_service_component_register;
+ rte_service_component_unregister;
+ rte_service_component_runstate_set;
rte_service_dump;
- rte_service_enable_on_lcore;
rte_service_get_by_id;
rte_service_get_by_name;
rte_service_get_count;
- rte_service_get_enabled_on_lcore;
- rte_service_is_running;
+ rte_service_get_name;
rte_service_lcore_add;
rte_service_lcore_count;
+ rte_service_lcore_count_services;
rte_service_lcore_del;
rte_service_lcore_list;
rte_service_lcore_reset_all;
rte_service_lcore_start;
rte_service_lcore_stop;
+ rte_service_map_lcore_get;
+ rte_service_map_lcore_set;
rte_service_probe_capability;
- rte_service_register;
rte_service_reset;
+ rte_service_run_iter_on_app_lcore;
+ rte_service_runstate_get;
+ rte_service_runstate_set;
+ rte_service_set_runstate_mapped_check;
rte_service_set_stats_enable;
- rte_service_start;
rte_service_start_with_defaults;
- rte_service_stop;
- rte_service_unregister;
-} DPDK_17.08;
+} DPDK_17.11;