diff options
author | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2017-05-16 14:51:32 +0200 |
---|---|---|
committer | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2017-05-16 16:20:45 +0200 |
commit | 7595afa4d30097c1177b69257118d8ad89a539be (patch) | |
tree | 4bfeadc905c977e45e54a90c42330553b8942e4e /lib/librte_eal | |
parent | ce3d555e43e3795b5d9507fcfc76b7a0a92fd0d6 (diff) |
Imported Upstream version 17.05
Change-Id: Id1e419c5a214e4a18739663b91f0f9a549f1fdc6
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
Diffstat (limited to 'lib/librte_eal')
87 files changed, 3688 insertions, 2560 deletions
diff --git a/lib/librte_eal/Makefile b/lib/librte_eal/Makefile index cf11a099..5690bb49 100644 --- a/lib/librte_eal/Makefile +++ b/lib/librte_eal/Makefile @@ -33,6 +33,8 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-y += common DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += linuxapp +DEPDIRS-linuxapp := common DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += bsdapp +DEPDIRS-bsdapp := common include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index a15b762b..a0f99502 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -48,7 +48,7 @@ LDLIBS += -lgcc_s EXPORT_MAP := rte_eal_version.map -LIBABIVER := 3 +LIBABIVER := 4 # specific to bsdapp exec-env SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c @@ -78,6 +78,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_cpuflags.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_bus.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c @@ -110,7 +111,4 @@ INC := rte_interrupts.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_BSDAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) -DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common -DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common/arch/$(ARCH_DIR) - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 35e3117a..05f0c1f9 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -56,6 +56,7 @@ #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> +#include <rte_errno.h> #include <rte_per_lcore.h> #include <rte_lcore.h> #include <rte_log.h> @@ -64,6 +65,7 @@ #include <rte_string_fns.h> #include <rte_cpuflags.h> #include <rte_interrupts.h> +#include <rte_bus.h> #include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> @@ -193,7 +195,7 @@ rte_eal_config_create(void) rte_panic("Cannot mmap memory for rte_config\n"); } memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); - rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + rte_config.mem_config = rte_mem_cfg_addr; } /* attach to an existing shared memory config */ @@ -218,7 +220,7 @@ rte_eal_config_attach(void) if (rte_mem_cfg_addr == MAP_FAILED) rte_panic("Cannot mmap memory for rte_config\n"); - rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + rte_config.mem_config = rte_mem_cfg_addr; } /* Detect if we are a primary or a secondary process */ @@ -321,8 +323,6 @@ eal_log_level_parse(int argc, char **argv) optind = 1; optreset = 1; - eal_reset_internal_config(&internal_config); - while ((opt = getopt_long(argc, argvopt, eal_short_options, eal_long_options, &option_index)) != EOF) { @@ -486,6 +486,12 @@ rte_eal_iopl_init(void) return 0; } +static void rte_eal_init_alert(const char *msg) +{ + fprintf(stderr, "EAL: FATAL: %s\n", msg); + RTE_LOG(ERR, EAL, "%s\n", msg); +} + /* Launch threads, called at application init(). */ int rte_eal_init(int argc, char **argv) @@ -497,29 +503,47 @@ rte_eal_init(int argc, char **argv) char thread_name[RTE_MAX_THREAD_NAME_LEN]; /* checks if the machine is adequate */ - rte_cpu_check_supported(); + if (!rte_cpu_is_supported()) { + rte_eal_init_alert("unsupported cpu type."); + rte_errno = ENOTSUP; + return -1; + } - if (!rte_atomic32_test_and_set(&run_once)) + if (!rte_atomic32_test_and_set(&run_once)) { + rte_eal_init_alert("already called initialization."); + rte_errno = EALREADY; return -1; + } thread_id = pthread_self(); - eal_log_level_parse(argc, argv); + eal_reset_internal_config(&internal_config); /* set log level as early as possible */ - rte_set_log_level(internal_config.log_level); + eal_log_level_parse(argc, argv); - if (rte_eal_cpu_init() < 0) - rte_panic("Cannot detect lcores\n"); + if (rte_eal_cpu_init() < 0) { + rte_eal_init_alert("Cannot detect lcores."); + rte_errno = ENOTSUP; + return -1; + } fctret = eal_parse_args(argc, argv); - if (fctret < 0) - exit(1); + if (fctret < 0) { + rte_eal_init_alert("Invalid 'command line' arguments."); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && - eal_hugepage_info_init() < 0) - rte_panic("Cannot get hugepage information\n"); + eal_hugepage_info_init() < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { if (internal_config.no_hugetlbfs) @@ -543,31 +567,45 @@ rte_eal_init(int argc, char **argv) rte_config_init(); - if (rte_eal_memory_init() < 0) - rte_panic("Cannot init memory\n"); - - if (rte_eal_memzone_init() < 0) - rte_panic("Cannot init memzone\n"); + if (rte_eal_memory_init() < 0) { + rte_eal_init_alert("Cannot init memory\n"); + rte_errno = ENOMEM; + return -1; + } - if (rte_eal_tailqs_init() < 0) - rte_panic("Cannot init tail queues for objects\n"); + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone\n"); + rte_errno = ENODEV; + return -1; + } - if (rte_eal_alarm_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_eal_tailqs_init() < 0) { + rte_eal_init_alert("Cannot init tail queues for objects\n"); + rte_errno = EFAULT; + return -1; + } - if (rte_eal_intr_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_eal_alarm_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + /* rte_eal_alarm_init sets rte_errno on failure. */ + return -1; + } - if (rte_eal_timer_init() < 0) - rte_panic("Cannot init HPET or TSC timers\n"); + if (rte_eal_intr_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + return -1; + } - if (rte_eal_pci_init() < 0) - rte_panic("Cannot init PCI\n"); + if (rte_eal_timer_init() < 0) { + rte_eal_init_alert("Cannot init HPET or TSC timers\n"); + rte_errno = ENOTSUP; + return -1; + } eal_check_mem_on_local_socket(); if (eal_plugins_init() < 0) - rte_panic("Cannot init plugins\n"); + rte_eal_init_alert("Cannot init plugins\n"); eal_thread_init_master(rte_config.master_lcore); @@ -577,8 +615,11 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, thread_id, cpuset, ret == 0 ? "" : "..."); - if (rte_eal_dev_init() < 0) - rte_panic("Cannot init pmd devices\n"); + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + return -1; + } RTE_LCORE_FOREACH_SLAVE(i) { @@ -612,9 +653,12 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); - /* Probe & Initialize PCI devices */ - if (rte_eal_pci_probe()) - rte_panic("Cannot probe PCI\n"); + /* Probe all the buses and devices/drivers on them */ + if (rte_bus_probe()) { + rte_eal_init_alert("Cannot probe devices\n"); + rte_errno = ENOTSUP; + return -1; + } rte_eal_mcfg_complete(); diff --git a/lib/librte_eal/bsdapp/eal/eal_debug.c b/lib/librte_eal/bsdapp/eal/eal_debug.c index 5fbc17c5..e1c75548 100644 --- a/lib/librte_eal/bsdapp/eal/eal_debug.c +++ b/lib/librte_eal/bsdapp/eal/eal_debug.c @@ -31,7 +31,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifdef RTE_BACKTRACE #include <execinfo.h> +#endif #include <stdarg.h> #include <signal.h> #include <stdlib.h> @@ -47,6 +49,7 @@ /* dump the stack of the calling core */ void rte_dump_stack(void) { +#ifdef RTE_BACKTRACE void *func[BACKTRACE_SIZE]; char **symb = NULL; int size; @@ -64,6 +67,7 @@ void rte_dump_stack(void) } free(symb); +#endif /* RTE_BACKTRACE */ } /* not implemented in this environment */ diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c index 836e4836..ea2afff4 100644 --- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c +++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c @@ -36,29 +36,37 @@ #include "eal_private.h" int -rte_intr_callback_register(struct rte_intr_handle *intr_handle __rte_unused, - rte_intr_callback_fn cb __rte_unused, - void *cb_arg __rte_unused) +rte_intr_callback_register(const struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, + void *cb_arg) { + RTE_SET_USED(intr_handle); + RTE_SET_USED(cb); + RTE_SET_USED(cb_arg); + return -ENOTSUP; } int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle __rte_unused, - rte_intr_callback_fn cb_fn __rte_unused, - void *cb_arg __rte_unused) +rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, + void *cb_arg) { + RTE_SET_USED(intr_handle); + RTE_SET_USED(cb); + RTE_SET_USED(cb_arg); + return -ENOTSUP; } int -rte_intr_enable(struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused) { return -ENOTSUP; } int -rte_intr_disable(struct rte_intr_handle *intr_handle __rte_unused) +rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused) { return -ENOTSUP; } diff --git a/lib/librte_eal/bsdapp/eal/eal_lcore.c b/lib/librte_eal/bsdapp/eal/eal_lcore.c index b8bfafde..bc584dd5 100644 --- a/lib/librte_eal/bsdapp/eal/eal_lcore.c +++ b/lib/librte_eal/bsdapp/eal/eal_lcore.c @@ -53,12 +53,14 @@ eal_cpu_core_id(__rte_unused unsigned lcore_id) static int eal_get_ncpus(void) { + static int ncpu = -1; int mib[2] = {CTL_HW, HW_NCPU}; - int ncpu; size_t len = sizeof(ncpu); - sysctl(mib, 2, &ncpu, &len, NULL, 0); - RTE_LOG(INFO, EAL, "Sysctl reports %d cpus\n", ncpu); + if (ncpu < 0) { + sysctl(mib, 2, &ncpu, &len, NULL, 0); + RTE_LOG(INFO, EAL, "Sysctl reports %d cpus\n", ncpu); + } return ncpu; } diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c index 8b3ed881..e321461d 100644 --- a/lib/librte_eal/bsdapp/eal/eal_pci.c +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c @@ -87,18 +87,11 @@ * enabling bus master. */ -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused) -{ - RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented " - "for BSD\n"); - return -ENOTSUP; -} +extern struct rte_pci_bus rte_pci_bus; /* Map pci device */ int -rte_eal_pci_map_device(struct rte_pci_device *dev) +rte_pci_map_device(struct rte_pci_device *dev) { int ret = -1; @@ -120,7 +113,7 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) /* Unmap pci device */ void -rte_eal_pci_unmap_device(struct rte_pci_device *dev) +rte_pci_unmap_device(struct rte_pci_device *dev) { /* try unmapping the NIC resources */ switch (dev->kdrv) { @@ -289,6 +282,9 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) /* FreeBSD has no NUMA support (yet) */ dev->device.numa_node = 0; + rte_pci_device_name(&dev->addr, dev->name, sizeof(dev->name)); + dev->device.name = dev->name; + /* FreeBSD has only one pass through driver */ dev->kdrv = RTE_KDRV_NIC_UIO; @@ -322,20 +318,19 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) } /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&pci_device_list)) { - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); } else { struct rte_pci_device *dev2 = NULL; int ret; - TAILQ_FOREACH(dev2, &pci_device_list, next) { + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); if (ret > 0) continue; else if (ret < 0) { - TAILQ_INSERT_BEFORE(dev2, dev, next); - return 0; + rte_pci_insert_device(dev2, dev); } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; @@ -343,10 +338,10 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) dev->mem_resource, sizeof(dev->mem_resource)); free(dev); - return 0; } + return 0; } - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + rte_pci_add_device(dev); } return 0; @@ -361,7 +356,7 @@ skipdev: * list. Call pci_scan_one() for each pci entry found. */ int -rte_eal_pci_scan(void) +rte_pci_scan(void) { int fd; unsigned dev_count = 0; @@ -374,6 +369,10 @@ rte_eal_pci_scan(void) .matches = &matches[0], }; + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; + fd = open("/dev/pci", O_RDONLY); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); @@ -456,10 +455,11 @@ error: } /* Read PCI config space. */ -int rte_eal_pci_read_config(const struct rte_pci_device *dev, - void *buf, size_t len, off_t offset) +int rte_pci_read_config(const struct rte_pci_device *dev, + void *buf, size_t len, off_t offset) { int fd = -1; + int size; struct pci_io pi = { .pi_sel = { .pc_domain = dev->addr.domain, @@ -468,25 +468,28 @@ int rte_eal_pci_read_config(const struct rte_pci_device *dev, .pc_func = dev->addr.function, }, .pi_reg = offset, - .pi_width = len, }; - if (len == 3 || len > sizeof(pi.pi_data)) { - RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__); - goto error; - } - fd = open("/dev/pci", O_RDWR); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); goto error; } - if (ioctl(fd, PCIOCREAD, &pi) < 0) - goto error; + while (len > 0) { + size = (len >= 4) ? 4 : ((len >= 2) ? 2 : 1); + pi.pi_width = size; + + if (ioctl(fd, PCIOCREAD, &pi) < 0) + goto error; + memcpy(buf, &pi.pi_data, size); + + buf = (char *)buf + size; + pi.pi_reg += size; + len -= size; + } close(fd); - memcpy(buf, &pi.pi_data, len); return 0; error: @@ -496,8 +499,8 @@ int rte_eal_pci_read_config(const struct rte_pci_device *dev, } /* Write PCI config space. */ -int rte_eal_pci_write_config(const struct rte_pci_device *dev, - const void *buf, size_t len, off_t offset) +int rte_pci_write_config(const struct rte_pci_device *dev, + const void *buf, size_t len, off_t offset) { int fd = -1; @@ -539,8 +542,8 @@ int rte_eal_pci_write_config(const struct rte_pci_device *dev, } int -rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) +rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) { int ret; @@ -567,7 +570,7 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, static void pci_uio_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) + void *data, size_t len, off_t offset) { #if defined(RTE_ARCH_X86) uint8_t *d; @@ -595,8 +598,8 @@ pci_uio_ioport_read(struct rte_pci_ioport *p, } void -rte_eal_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) +rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { case RTE_KDRV_NIC_UIO: @@ -609,7 +612,7 @@ rte_eal_pci_ioport_read(struct rte_pci_ioport *p, static void pci_uio_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) + const void *data, size_t len, off_t offset) { #if defined(RTE_ARCH_X86) const uint8_t *s; @@ -619,13 +622,13 @@ pci_uio_ioport_write(struct rte_pci_ioport *p, for (s = data; len > 0; s += size, reg += size, len -= size) { if (len >= 4) { size = 4; - outl(*(const uint32_t *)s, reg); + outl(reg, *(const uint32_t *)s); } else if (len >= 2) { size = 2; - outw(*(const uint16_t *)s, reg); + outw(reg, *(const uint16_t *)s); } else { size = 1; - outb(*s, reg); + outb(reg, *s); } } #else @@ -637,8 +640,8 @@ pci_uio_ioport_write(struct rte_pci_ioport *p, } void -rte_eal_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) +rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { case RTE_KDRV_NIC_UIO: @@ -650,7 +653,7 @@ rte_eal_pci_ioport_write(struct rte_pci_ioport *p, } int -rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) +rte_pci_ioport_unmap(struct rte_pci_ioport *p) { int ret; @@ -667,18 +670,3 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) return ret; } - -/* Init the PCI EAL subsystem */ -int -rte_eal_pci_init(void) -{ - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - if (rte_eal_pci_scan() < 0) { - RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); - return -1; - } - return 0; -} diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index 2f81f7c0..2e48a736 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -6,8 +6,6 @@ DPDK_2.0 { eal_parse_sysfs_value; eal_timer_source; lcore_config; - pci_device_list; - pci_driver_list; per_lcore__lcore_id; per_lcore__rte_errno; rte_calloc; @@ -22,12 +20,9 @@ DPDK_2.0 { rte_dump_tailq; rte_eal_alarm_cancel; rte_eal_alarm_set; - rte_eal_dev_init; rte_eal_devargs_add; rte_eal_devargs_dump; rte_eal_devargs_type_count; - rte_eal_driver_register; - rte_eal_driver_unregister; rte_eal_get_configuration; rte_eal_get_lcore_state; rte_eal_get_physmem_layout; @@ -40,18 +35,10 @@ DPDK_2.0 { rte_eal_mp_remote_launch; rte_eal_mp_wait_lcore; rte_eal_parse_devargs_str; - rte_eal_pci_dump; - rte_eal_pci_probe; - rte_eal_pci_probe_one; - rte_eal_pci_register; - rte_eal_pci_scan; - rte_eal_pci_unregister; rte_eal_process_type; rte_eal_remote_launch; rte_eal_tailq_lookup; rte_eal_tailq_register; - rte_eal_vdev_init; - rte_eal_vdev_uninit; rte_eal_wait_lcore; rte_exit; rte_free; @@ -66,11 +53,8 @@ DPDK_2.0 { rte_intr_disable; rte_intr_enable; rte_log; - rte_log_add_in_history; rte_log_cur_msg_loglevel; rte_log_cur_msg_logtype; - rte_log_dump_history; - rte_log_set_history; rte_logs; rte_malloc; rte_malloc_dump_stats; @@ -114,9 +98,6 @@ DPDK_2.0 { DPDK_2.1 { global: - rte_eal_pci_detach; - rte_eal_pci_read_config; - rte_eal_pci_write_config; rte_intr_allow_others; rte_intr_dp_is_en; rte_intr_efd_disable; @@ -142,12 +123,6 @@ DPDK_16.04 { global: rte_cpu_get_flag_name; - rte_eal_pci_ioport_map; - rte_eal_pci_ioport_read; - rte_eal_pci_ioport_unmap; - rte_eal_pci_ioport_write; - rte_eal_pci_map_device; - rte_eal_pci_unmap_device; rte_eal_primary_proc_alive; } DPDK_2.2; @@ -170,7 +145,51 @@ DPDK_16.11 { rte_delay_us_callback_register; rte_eal_dev_attach; rte_eal_dev_detach; - rte_eal_vdrv_register; - rte_eal_vdrv_unregister; } DPDK_16.07; + +DPDK_17.02 { + global: + + rte_bus_dump; + rte_bus_probe; + rte_bus_register; + rte_bus_scan; + rte_bus_unregister; + +} DPDK_16.11; + +DPDK_17.05 { + global: + + rte_cpu_is_supported; + rte_log_dump; + rte_log_register; + rte_log_get_global_level; + rte_log_set_global_level; + rte_log_set_level; + rte_log_set_level_regexp; + rte_pci_detach; + rte_pci_dump; + rte_pci_ioport_map; + rte_pci_ioport_read; + rte_pci_ioport_unmap; + rte_pci_ioport_write; + rte_pci_map_device; + rte_pci_probe; + rte_pci_probe_one; + rte_pci_read_config; + rte_pci_register; + rte_pci_scan; + rte_pci_unmap_device; + rte_pci_unregister; + rte_pci_write_config; + rte_vdev_init; + rte_vdev_register; + rte_vdev_uninit; + rte_vdev_unregister; + vfio_get_container_fd; + vfio_get_group_fd; + vfio_get_group_no; + +} DPDK_17.02; diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c index 99a4975c..4bd7545a 100644 --- a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c +++ b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c @@ -180,6 +180,10 @@ nic_uio_probe (device_t dev) unsigned int device = pci_get_slot(dev); unsigned int function = pci_get_function(dev); + char bdf_str[256]; + char *token, *remaining; + + /* First check if we found this on load */ for (i = 0; i < num_detached; i++) if (bus == pci_get_bus(detached_devices[i]) && device == pci_get_slot(detached_devices[i]) && @@ -188,6 +192,45 @@ nic_uio_probe (device_t dev) return BUS_PROBE_SPECIFIC; } + /* otherwise check if it's a new device and if it matches the BDF */ + memset(bdf_str, 0, sizeof(bdf_str)); + TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str)); + remaining = bdf_str; + while (1) { + if (remaining == NULL || remaining[0] == '\0') + break; + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + bus = strtol(token, NULL, 10); + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + device = strtol(token, NULL, 10); + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + function = strtol(token, NULL, 10); + + if (bus == pci_get_bus(dev) && + device == pci_get_slot(dev) && + function == pci_get_function(dev)) { + + if (num_detached < MAX_DETACHED_DEVICES) { + printf("%s: probed dev=%p\n", + __func__, dev); + detached_devices[num_detached++] = dev; + device_set_desc(dev, "DPDK PCI Device"); + return BUS_PROBE_SPECIFIC; + } else { + printf("%s: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n", + __func__, MAX_DETACHED_DEVICES, + dev); + break; + } + } + } + return ENXIO; } @@ -248,6 +291,7 @@ nic_uio_load(void) memset(bdf_str, 0, sizeof(bdf_str)); TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str)); remaining = bdf_str; + printf("nic_uio: hw.nic_uio.bdfs = '%s'\n", bdf_str); /* * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f". * But the code below does not try differentiate between : and , diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index dfd64aa5..a5bd1089 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -38,16 +38,14 @@ INC += rte_per_lcore.h rte_random.h INC += rte_tailq.h rte_interrupts.h rte_alarm.h INC += rte_string_fns.h rte_version.h INC += rte_eal_memconfig.h rte_malloc_heap.h -INC += rte_hexdump.h rte_devargs.h rte_dev.h rte_vdev.h +INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_vdev.h INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h -ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y) -INC += rte_warnings.h -endif - GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h +GENERIC_INC += rte_vect.h rte_io.h + # defined in mk/arch/$(RTE_ARCH)/rte.vars.mk ARCH_DIR ?= $(RTE_ARCH) ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h)) diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c new file mode 100644 index 00000000..8f9baf8b --- /dev/null +++ b/lib/librte_eal/common/eal_common_bus.c @@ -0,0 +1,147 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 NXP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of NXP nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <sys/queue.h> + +#include <rte_bus.h> + +#include "eal_private.h" + +struct rte_bus_list rte_bus_list = + TAILQ_HEAD_INITIALIZER(rte_bus_list); + +void +rte_bus_register(struct rte_bus *bus) +{ + RTE_VERIFY(bus); + RTE_VERIFY(bus->name && strlen(bus->name)); + /* A bus should mandatorily have the scan implemented */ + RTE_VERIFY(bus->scan); + RTE_VERIFY(bus->probe); + + TAILQ_INSERT_TAIL(&rte_bus_list, bus, next); + RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name); +} + +void +rte_bus_unregister(struct rte_bus *bus) +{ + TAILQ_REMOVE(&rte_bus_list, bus, next); + RTE_LOG(DEBUG, EAL, "Unregistered [%s] bus.\n", bus->name); +} + +/* Scan all the buses for registered devices */ +int +rte_bus_scan(void) +{ + int ret; + struct rte_bus *bus = NULL; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + ret = bus->scan(); + if (ret) { + RTE_LOG(ERR, EAL, "Scan for (%s) bus failed.\n", + bus->name); + return ret; + } + } + + return 0; +} + +/* Probe all devices of all buses */ +int +rte_bus_probe(void) +{ + int ret; + struct rte_bus *bus, *vbus = NULL; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + if (!strcmp(bus->name, "virtual")) { + vbus = bus; + continue; + } + + ret = bus->probe(); + if (ret) { + RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n", + bus->name); + return ret; + } + } + + if (vbus) { + ret = vbus->probe(); + if (ret) { + RTE_LOG(ERR, EAL, "Bus (%s) probe failed.\n", + vbus->name); + return ret; + } + } + + return 0; +} + +/* Dump information of a single bus */ +static int +bus_dump_one(FILE *f, struct rte_bus *bus) +{ + int ret; + + /* For now, dump only the bus name */ + ret = fprintf(f, " %s\n", bus->name); + + /* Error in case of inability in writing to stream */ + if (ret < 0) + return ret; + + return 0; +} + +void +rte_bus_dump(FILE *f) +{ + int ret; + struct rte_bus *bus; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { + ret = bus_dump_one(f, bus); + if (ret) { + RTE_LOG(ERR, EAL, "Unable to write to stream (%d)\n", + ret); + break; + } + } +} diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c index b5f76f7f..9a2d080a 100644 --- a/lib/librte_eal/common/eal_common_cpuflags.c +++ b/lib/librte_eal/common/eal_common_cpuflags.c @@ -43,6 +43,13 @@ void rte_cpu_check_supported(void) { + if (!rte_cpu_is_supported()) + exit(1); +} + +int +rte_cpu_is_supported(void) +{ /* This is generated at compile-time by the build system */ static const enum rte_cpu_flag_t compile_time_flags[] = { RTE_COMPILE_TIME_CPUFLAGS @@ -57,14 +64,16 @@ rte_cpu_check_supported(void) fprintf(stderr, "ERROR: CPU feature flag lookup failed with error %d\n", ret); - exit(1); + return 0; } if (!ret) { fprintf(stderr, "ERROR: This system does not support \"%s\".\n" "Please check that RTE_MACHINE is set correctly.\n", rte_cpu_get_flag_name(compile_time_flags[i])); - exit(1); + return 0; } } + + return 1; } diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index 4f3b4934..a400ddd0 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -45,65 +45,6 @@ #include "eal_private.h" -/** Global list of device drivers. */ -static struct rte_driver_list dev_driver_list = - TAILQ_HEAD_INITIALIZER(dev_driver_list); -/** Global list of device drivers. */ -static struct rte_device_list dev_device_list = - TAILQ_HEAD_INITIALIZER(dev_device_list); - -/* register a driver */ -void -rte_eal_driver_register(struct rte_driver *driver) -{ - TAILQ_INSERT_TAIL(&dev_driver_list, driver, next); -} - -/* unregister a driver */ -void -rte_eal_driver_unregister(struct rte_driver *driver) -{ - TAILQ_REMOVE(&dev_driver_list, driver, next); -} - -void rte_eal_device_insert(struct rte_device *dev) -{ - TAILQ_INSERT_TAIL(&dev_device_list, dev, next); -} - -void rte_eal_device_remove(struct rte_device *dev) -{ - TAILQ_REMOVE(&dev_device_list, dev, next); -} - -int -rte_eal_dev_init(void) -{ - struct rte_devargs *devargs; - - /* - * Note that the dev_driver_list is populated here - * from calls made to rte_eal_driver_register from constructor functions - * embedded into PMD modules via the RTE_PMD_REGISTER_VDEV macro - */ - - /* call the init function for each virtual device */ - TAILQ_FOREACH(devargs, &devargs_list, next) { - - if (devargs->type != RTE_DEVTYPE_VIRTUAL) - continue; - - if (rte_eal_vdev_init(devargs->virt.drv_name, - devargs->args)) { - RTE_LOG(ERR, EAL, "failed to initialize %s device\n", - devargs->virt.drv_name); - return -1; - } - } - - return 0; -} - int rte_eal_dev_attach(const char *name, const char *devargs) { struct rte_pci_addr addr; @@ -114,11 +55,11 @@ int rte_eal_dev_attach(const char *name, const char *devargs) } if (eal_parse_pci_DomBDF(name, &addr) == 0) { - if (rte_eal_pci_probe_one(&addr) < 0) + if (rte_pci_probe_one(&addr) < 0) goto err; } else { - if (rte_eal_vdev_init(name, devargs)) + if (rte_vdev_init(name, devargs)) goto err; } @@ -139,10 +80,10 @@ int rte_eal_dev_detach(const char *name) } if (eal_parse_pci_DomBDF(name, &addr) == 0) { - if (rte_eal_pci_detach(&addr) < 0) + if (rte_pci_detach(&addr) < 0) goto err; } else { - if (rte_eal_vdev_uninit(name)) + if (rte_vdev_uninit(name)) goto err; } return 0; diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c index 2cd41320..84fa0cb5 100644 --- a/lib/librte_eal/common/eal_common_lcore.c +++ b/lib/librte_eal/common/eal_common_lcore.c @@ -83,16 +83,17 @@ rte_eal_cpu_init(void) config->lcore_role[lcore_id] = ROLE_RTE; lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id); lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id); - if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) + if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) { #ifdef RTE_EAL_ALLOW_INV_SOCKET_ID lcore_config[lcore_id].socket_id = 0; #else - rte_panic("Socket ID (%u) is greater than " + RTE_LOG(ERR, EAL, "Socket ID (%u) is greater than " "RTE_MAX_NUMA_NODES (%d)\n", lcore_config[lcore_id].socket_id, RTE_MAX_NUMA_NODES); + return -1; #endif - + } RTE_LOG(DEBUG, EAL, "Detected lcore %u as " "core %u on socket %u\n", lcore_id, lcore_config[lcore_id].core_id, diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c index e45d3269..ddf65b7f 100644 --- a/lib/librte_eal/common/eal_common_log.c +++ b/lib/librte_eal/common/eal_common_log.c @@ -35,7 +35,11 @@ #include <stdint.h> #include <stdarg.h> #include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <regex.h> +#include <rte_eal.h> #include <rte_log.h> #include <rte_per_lcore.h> @@ -60,6 +64,11 @@ struct log_cur_msg { uint32_t logtype; /**< log type - see rte_log.h */ }; +struct rte_log_dynamic_type { + const char *name; + uint32_t loglevel; +}; + /* per core log */ static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg); @@ -75,35 +84,95 @@ rte_openlog_stream(FILE *f) /* Set global log level */ void -rte_set_log_level(uint32_t level) +rte_log_set_global_level(uint32_t level) { rte_logs.level = (uint32_t)level; } +/* Set global log level */ +/* replaced by rte_log_set_global_level */ +__rte_deprecated void +rte_set_log_level(uint32_t level) +{ + rte_log_set_global_level(level); +} + /* Get global log level */ uint32_t -rte_get_log_level(void) +rte_log_get_global_level(void) { return rte_logs.level; } +/* Get global log level */ +/* replaced by rte_log_get_global_level */ +uint32_t +rte_get_log_level(void) +{ + return rte_log_get_global_level(); +} + /* Set global log type */ -void +__rte_deprecated void rte_set_log_type(uint32_t type, int enable) { + if (type < RTE_LOGTYPE_FIRST_EXT_ID) { + if (enable) + rte_logs.type |= 1 << type; + else + rte_logs.type &= ~(1 << type); + } + if (enable) - rte_logs.type |= type; + rte_log_set_level(type, 0); else - rte_logs.type &= (~type); + rte_log_set_level(type, RTE_LOG_DEBUG); } /* Get global log type */ -uint32_t +__rte_deprecated uint32_t rte_get_log_type(void) { return rte_logs.type; } +int +rte_log_set_level(uint32_t type, uint32_t level) +{ + if (type >= rte_logs.dynamic_types_len) + return -1; + if (level > RTE_LOG_DEBUG) + return -1; + + rte_logs.dynamic_types[type].loglevel = level; + + return 0; +} + +/* set level */ +int +rte_log_set_level_regexp(const char *pattern, uint32_t level) +{ + regex_t r; + size_t i; + + if (level > RTE_LOG_DEBUG) + return -1; + + if (regcomp(&r, pattern, 0) != 0) + return -1; + + for (i = 0; i < rte_logs.dynamic_types_len; i++) { + if (rte_logs.dynamic_types[i].name == NULL) + continue; + if (regexec(&r, rte_logs.dynamic_types[i].name, 0, + NULL, 0) == 0) + rte_logs.dynamic_types[i].loglevel = level; + } + + return 0; +} + /* get the current loglevel for the message beeing processed */ int rte_log_cur_msg_loglevel(void) { @@ -116,6 +185,161 @@ int rte_log_cur_msg_logtype(void) return RTE_PER_LCORE(log_cur_msg).logtype; } +static int +rte_log_lookup(const char *name) +{ + size_t i; + + for (i = 0; i < rte_logs.dynamic_types_len; i++) { + if (rte_logs.dynamic_types[i].name == NULL) + continue; + if (strcmp(name, rte_logs.dynamic_types[i].name) == 0) + return i; + } + + return -1; +} + +/* register an extended log type, assuming table is large enough, and id + * is not yet registered. + */ +static int +__rte_log_register(const char *name, int id) +{ + char *dup_name = strdup(name); + + if (dup_name == NULL) + return -ENOMEM; + + rte_logs.dynamic_types[id].name = dup_name; + rte_logs.dynamic_types[id].loglevel = RTE_LOG_DEBUG; + + return id; +} + +/* register an extended log type */ +int +rte_log_register(const char *name) +{ + struct rte_log_dynamic_type *new_dynamic_types; + int id, ret; + + id = rte_log_lookup(name); + if (id >= 0) + return id; + + new_dynamic_types = realloc(rte_logs.dynamic_types, + sizeof(struct rte_log_dynamic_type) * + (rte_logs.dynamic_types_len + 1)); + if (new_dynamic_types == NULL) + return -ENOMEM; + rte_logs.dynamic_types = new_dynamic_types; + + ret = __rte_log_register(name, rte_logs.dynamic_types_len); + if (ret < 0) + return ret; + + rte_logs.dynamic_types_len++; + + return ret; +} + +struct logtype { + uint32_t log_id; + const char *logtype; +}; + +static const struct logtype logtype_strings[] = { + {RTE_LOGTYPE_EAL, "eal"}, + {RTE_LOGTYPE_MALLOC, "malloc"}, + {RTE_LOGTYPE_RING, "ring"}, + {RTE_LOGTYPE_MEMPOOL, "mempool"}, + {RTE_LOGTYPE_TIMER, "timer"}, + {RTE_LOGTYPE_PMD, "pmd"}, + {RTE_LOGTYPE_HASH, "hash"}, + {RTE_LOGTYPE_LPM, "lpm"}, + {RTE_LOGTYPE_KNI, "kni"}, + {RTE_LOGTYPE_ACL, "acl"}, + {RTE_LOGTYPE_POWER, "power"}, + {RTE_LOGTYPE_METER, "meter"}, + {RTE_LOGTYPE_SCHED, "sched"}, + {RTE_LOGTYPE_PORT, "port"}, + {RTE_LOGTYPE_TABLE, "table"}, + {RTE_LOGTYPE_PIPELINE, "pipeline"}, + {RTE_LOGTYPE_MBUF, "mbuf"}, + {RTE_LOGTYPE_CRYPTODEV, "cryptodev"}, + {RTE_LOGTYPE_EFD, "efd"}, + {RTE_LOGTYPE_EVENTDEV, "eventdev"}, + {RTE_LOGTYPE_USER1, "user1"}, + {RTE_LOGTYPE_USER2, "user2"}, + {RTE_LOGTYPE_USER3, "user3"}, + {RTE_LOGTYPE_USER4, "user4"}, + {RTE_LOGTYPE_USER5, "user5"}, + {RTE_LOGTYPE_USER6, "user6"}, + {RTE_LOGTYPE_USER7, "user7"}, + {RTE_LOGTYPE_USER8, "user8"} +}; + +RTE_INIT(rte_log_init); +static void +rte_log_init(void) +{ + uint32_t i; + +#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG + rte_log_set_global_level(RTE_LOG_INFO); +#else + rte_log_set_global_level(RTE_LOG_LEVEL); +#endif + + rte_logs.dynamic_types = calloc(RTE_LOGTYPE_FIRST_EXT_ID, + sizeof(struct rte_log_dynamic_type)); + if (rte_logs.dynamic_types == NULL) + return; + + /* register legacy log types */ + for (i = 0; i < RTE_DIM(logtype_strings); i++) + __rte_log_register(logtype_strings[i].logtype, + logtype_strings[i].log_id); + + rte_logs.dynamic_types_len = RTE_LOGTYPE_FIRST_EXT_ID; +} + +static const char * +loglevel_to_string(uint32_t level) +{ + switch (level) { + case 0: return "disabled"; + case RTE_LOG_EMERG: return "emerg"; + case RTE_LOG_ALERT: return "alert"; + case RTE_LOG_CRIT: return "critical"; + case RTE_LOG_ERR: return "error"; + case RTE_LOG_WARNING: return "warning"; + case RTE_LOG_NOTICE: return "notice"; + case RTE_LOG_INFO: return "info"; + case RTE_LOG_DEBUG: return "debug"; + default: return "unknown"; + } +} + +/* dump global level and registered log types */ +void +rte_log_dump(FILE *f) +{ + size_t i; + + fprintf(f, "global log level is %s\n", + loglevel_to_string(rte_log_get_global_level())); + + for (i = 0; i < rte_logs.dynamic_types_len; i++) { + if (rte_logs.dynamic_types[i].name == NULL) + continue; + fprintf(f, "id %zu: %s, level is %s\n", + i, rte_logs.dynamic_types[i].name, + loglevel_to_string(rte_logs.dynamic_types[i].loglevel)); + } +} + /* * Generates a log message The message will be sent in the stream * defined by the previous call to rte_openlog_stream(). @@ -139,7 +363,11 @@ rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) } } - if ((level > rte_logs.level) || !(logtype & rte_logs.type)) + if (level > rte_logs.level) + return 0; + if (logtype >= rte_logs.dynamic_types_len) + return -1; + if (level > rte_logs.dynamic_types[logtype].loglevel) return 0; /* save loglevel and logtype in a global per-lcore variable */ @@ -176,7 +404,8 @@ eal_log_set_default(FILE *default_log) { default_log_stream = default_log; -#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG - RTE_LOG(NOTICE, EAL, "Debug logs available - lower performance\n"); +#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG + RTE_LOG(NOTICE, EAL, + "Debug dataplane logs available - lower performance\n"); #endif } diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 6ca8af17..f470195f 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -118,7 +118,7 @@ static const char *default_solib_dir = RTE_EAL_PMD_PATH; /* * Stringified version of solib path used by dpdk-pmdinfo.py * Note: PLEASE DO NOT ALTER THIS without making a corresponding - * change to tools/dpdk-pmdinfo.py + * change to usertools/dpdk-pmdinfo.py */ static const char dpdk_solib_path[] __attribute__((used)) = "DPDK_PLUGIN_PATH=" RTE_EAL_PMD_PATH; @@ -126,6 +126,7 @@ static const char dpdk_solib_path[] __attribute__((used)) = static int master_lcore_parsed; static int mem_parsed; +static int core_parsed; void eal_reset_internal_config(struct internal_config *internal_cfg) @@ -147,12 +148,6 @@ eal_reset_internal_config(struct internal_config *internal_cfg) internal_cfg->base_virtaddr = 0; internal_cfg->syslog_facility = LOG_DAEMON; - /* default value from build option */ -#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG - internal_cfg->log_level = RTE_LOG_INFO; -#else - internal_cfg->log_level = RTE_LOG_LEVEL; -#endif internal_cfg->xen_dom0_support = 0; @@ -738,25 +733,49 @@ eal_parse_syslog(const char *facility, struct internal_config *conf) } static int -eal_parse_log_level(const char *level, uint32_t *log_level) +eal_parse_log_level(const char *arg) { - char *end; + char *end, *str, *type, *level; unsigned long tmp; + str = strdup(arg); + if (str == NULL) + return -1; + + if (strchr(str, ',') == NULL) { + type = NULL; + level = str; + } else { + type = strsep(&str, ","); + level = strsep(&str, ","); + } + errno = 0; tmp = strtoul(level, &end, 0); /* check for errors */ if ((errno != 0) || (level[0] == '\0') || - end == NULL || (*end != '\0')) - return -1; + end == NULL || (*end != '\0')) + goto fail; /* log_level is a uint32_t */ if (tmp >= UINT32_MAX) - return -1; + goto fail; + + if (type == NULL) { + rte_log_set_global_level(tmp); + } else if (rte_log_set_level_regexp(type, tmp) < 0) { + printf("cannot set log level %s,%lu\n", + type, tmp); + goto fail; + } - *log_level = tmp; + free(str); return 0; + +fail: + free(str); + return -1; } static enum rte_proc_type_t @@ -797,6 +816,7 @@ eal_parse_common_option(int opt, const char *optarg, RTE_LOG(ERR, EAL, "invalid coremask\n"); return -1; } + core_parsed = 1; break; /* corelist */ case 'l': @@ -804,6 +824,7 @@ eal_parse_common_option(int opt, const char *optarg, RTE_LOG(ERR, EAL, "invalid core list\n"); return -1; } + core_parsed = 1; break; /* size of memory */ case 'm': @@ -895,15 +916,12 @@ eal_parse_common_option(int opt, const char *optarg, break; case OPT_LOG_LEVEL_NUM: { - uint32_t log; - - if (eal_parse_log_level(optarg, &log) < 0) { + if (eal_parse_log_level(optarg) < 0) { RTE_LOG(ERR, EAL, "invalid parameters for --" OPT_LOG_LEVEL "\n"); return -1; } - conf->log_level = log; break; } case OPT_LCORES_NUM: @@ -912,6 +930,7 @@ eal_parse_common_option(int opt, const char *optarg, OPT_LCORES "\n"); return -1; } + core_parsed = 1; break; /* don't know what to do, leave this to caller */ @@ -923,12 +942,38 @@ eal_parse_common_option(int opt, const char *optarg, return 0; } +static void +eal_auto_detect_cores(struct rte_config *cfg) +{ + unsigned int lcore_id; + unsigned int removed = 0; + rte_cpuset_t affinity_set; + pthread_t tid = pthread_self(); + + if (pthread_getaffinity_np(tid, sizeof(rte_cpuset_t), + &affinity_set) < 0) + CPU_ZERO(&affinity_set); + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (cfg->lcore_role[lcore_id] == ROLE_RTE && + !CPU_ISSET(lcore_id, &affinity_set)) { + cfg->lcore_role[lcore_id] = ROLE_OFF; + removed++; + } + } + + cfg->lcore_count -= removed; +} + int eal_adjust_config(struct internal_config *internal_cfg) { int i; struct rte_config *cfg = rte_eal_get_configuration(); + if (!core_parsed) + eal_auto_detect_cores(cfg); + if (internal_config.process_type == RTE_PROC_AUTO) internal_config.process_type = eal_proc_type_detect(); @@ -1027,7 +1072,9 @@ eal_common_usage(void) " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n" " --"OPT_PROC_TYPE" Type of this process (primary|secondary|auto)\n" " --"OPT_SYSLOG" Set syslog facility\n" - " --"OPT_LOG_LEVEL" Set default log level\n" + " --"OPT_LOG_LEVEL"=<int> Set global log level\n" + " --"OPT_LOG_LEVEL"=<type-regexp>,<int>\n" + " Set specific log level\n" " -v Display version information on startup\n" " -h, --help This help\n" "\nEAL options for DEBUG use only:\n" diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c index 6bff6752..b7499913 100644 --- a/lib/librte_eal/common/eal_common_pci.c +++ b/lib/librte_eal/common/eal_common_pci.c @@ -69,8 +69,10 @@ #include <sys/queue.h> #include <sys/mman.h> +#include <rte_errno.h> #include <rte_interrupts.h> #include <rte_log.h> +#include <rte_bus.h> #include <rte_pci.h> #include <rte_per_lcore.h> #include <rte_memory.h> @@ -82,10 +84,7 @@ #include "eal_private.h" -struct pci_driver_list pci_driver_list = - TAILQ_HEAD_INITIALIZER(pci_driver_list); -struct pci_device_list pci_device_list = - TAILQ_HEAD_INITIALIZER(pci_device_list); +extern struct rte_pci_bus rte_pci_bus; #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" @@ -153,170 +152,154 @@ pci_unmap_resource(void *requested_addr, size_t size) } /* - * If vendor/device ID match, call the probe() function of the - * driver. + * Match the PCI Driver and Device using the ID Table + * + * @param pci_drv + * PCI driver from which ID table would be extracted + * @param pci_dev + * PCI device to match against the driver + * @return + * 1 for successful match + * 0 for unsuccessful match */ static int -rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev) +rte_pci_match(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev) { - int ret; const struct rte_pci_id *id_table; - for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) { - + for (id_table = pci_drv->id_table; id_table->vendor_id != 0; + id_table++) { /* check if device's identifiers match the driver's ones */ - if (id_table->vendor_id != dev->id.vendor_id && + if (id_table->vendor_id != pci_dev->id.vendor_id && id_table->vendor_id != PCI_ANY_ID) continue; - if (id_table->device_id != dev->id.device_id && + if (id_table->device_id != pci_dev->id.device_id && id_table->device_id != PCI_ANY_ID) continue; - if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && - id_table->subsystem_vendor_id != PCI_ANY_ID) + if (id_table->subsystem_vendor_id != + pci_dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) continue; - if (id_table->subsystem_device_id != dev->id.subsystem_device_id && - id_table->subsystem_device_id != PCI_ANY_ID) + if (id_table->subsystem_device_id != + pci_dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) continue; - if (id_table->class_id != dev->id.class_id && + if (id_table->class_id != pci_dev->id.class_id && id_table->class_id != RTE_CLASS_ANY_ID) continue; - struct rte_pci_addr *loc = &dev->addr; - - RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", - loc->domain, loc->bus, loc->devid, loc->function, - dev->device.numa_node); - - /* no initialization when blacklisted, return without error */ - if (dev->device.devargs != NULL && - dev->device.devargs->type == - RTE_DEVTYPE_BLACKLISTED_PCI) { - RTE_LOG(INFO, EAL, " Device is blacklisted, not initializing\n"); - return 1; - } - - RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->driver.name); - - if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { - /* map resources for devices that use igb_uio */ - ret = rte_eal_pci_map_device(dev); - if (ret != 0) - return ret; - } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND && - rte_eal_process_type() == RTE_PROC_PRIMARY) { - /* unbind current driver */ - if (pci_unbind_kernel_driver(dev) < 0) - return -1; - } - - /* reference driver structure */ - dev->driver = dr; - - /* call the driver probe() function */ - ret = dr->probe(dr, dev); - if (ret) - dev->driver = NULL; - - return ret; + return 1; } - /* return positive value if driver doesn't support this device */ - return 1; + + return 0; } /* - * If vendor/device ID match, call the remove() function of the + * If vendor/device ID match, call the probe() function of the * driver. */ static int -rte_eal_pci_detach_dev(struct rte_pci_driver *dr, - struct rte_pci_device *dev) +rte_pci_probe_one_driver(struct rte_pci_driver *dr, + struct rte_pci_device *dev) { - const struct rte_pci_id *id_table; + int ret; + struct rte_pci_addr *loc; if ((dr == NULL) || (dev == NULL)) return -EINVAL; - for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) { + loc = &dev->addr; - /* check if device's identifiers match the driver's ones */ - if (id_table->vendor_id != dev->id.vendor_id && - id_table->vendor_id != PCI_ANY_ID) - continue; - if (id_table->device_id != dev->id.device_id && - id_table->device_id != PCI_ANY_ID) - continue; - if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && - id_table->subsystem_vendor_id != PCI_ANY_ID) - continue; - if (id_table->subsystem_device_id != dev->id.subsystem_device_id && - id_table->subsystem_device_id != PCI_ANY_ID) - continue; + /* The device is not blacklisted; Check if driver supports it */ + if (!rte_pci_match(dr, dev)) { + /* Match of device and driver failed */ + RTE_LOG(DEBUG, EAL, "Driver (%s) doesn't match the device\n", + dr->driver.name); + return 1; + } - struct rte_pci_addr *loc = &dev->addr; + RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, loc->function, + dev->device.numa_node); + + /* no initialization when blacklisted, return without error */ + if (dev->device.devargs != NULL && + dev->device.devargs->type == + RTE_DEVTYPE_BLACKLISTED_PCI) { + RTE_LOG(INFO, EAL, " Device is blacklisted, not" + " initializing\n"); + return 1; + } - RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", - loc->domain, loc->bus, loc->devid, - loc->function, dev->device.numa_node); + RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->driver.name); - RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, - dev->id.device_id, dr->driver.name); + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { + /* map resources for devices that use igb_uio */ + ret = rte_pci_map_device(dev); + if (ret != 0) + return ret; + } - if (dr->remove && (dr->remove(dev) < 0)) - return -1; /* negative value is an error */ + /* reference driver structure */ + dev->driver = dr; + dev->device.driver = &dr->driver; - /* clear driver structure */ + /* call the driver probe() function */ + ret = dr->probe(dr, dev); + if (ret) { dev->driver = NULL; - if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) - /* unmap resources for devices that use igb_uio */ - rte_eal_pci_unmap_device(dev); - - return 0; + rte_pci_unmap_device(dev); } - /* return positive value if driver doesn't support this device */ - return 1; + return ret; } /* - * If vendor/device ID match, call the probe() function of all - * registered driver for the given device. Return -1 if initialization - * failed, return 1 if no driver is found for this device. + * If vendor/device ID match, call the remove() function of the + * driver. */ static int -pci_probe_all_drivers(struct rte_pci_device *dev) +rte_pci_detach_dev(struct rte_pci_device *dev) { - struct rte_pci_driver *dr = NULL; - int rc = 0; + struct rte_pci_addr *loc; + struct rte_pci_driver *dr; if (dev == NULL) - return -1; + return -EINVAL; - /* Check if a driver is already loaded */ - if (dev->driver != NULL) - return 0; + dr = dev->driver; + loc = &dev->addr; - TAILQ_FOREACH(dr, &pci_driver_list, next) { - rc = rte_eal_pci_probe_one_driver(dr, dev); - if (rc < 0) - /* negative value is an error */ - return -1; - if (rc > 0) - /* positive value means driver doesn't support it */ - continue; - return 0; - } - return 1; + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, + loc->function, dev->device.numa_node); + + RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->driver.name); + + if (dr->remove && (dr->remove(dev) < 0)) + return -1; /* negative value is an error */ + + /* clear driver structure */ + dev->driver = NULL; + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) + /* unmap resources for devices that use igb_uio */ + rte_pci_unmap_device(dev); + + return 0; } /* - * If vendor/device ID match, call the remove() function of all + * If vendor/device ID match, call the probe() function of all * registered driver for the given device. Return -1 if initialization * failed, return 1 if no driver is found for this device. */ static int -pci_detach_all_drivers(struct rte_pci_device *dev) +pci_probe_all_drivers(struct rte_pci_device *dev) { struct rte_pci_driver *dr = NULL; int rc = 0; @@ -324,8 +307,12 @@ pci_detach_all_drivers(struct rte_pci_device *dev) if (dev == NULL) return -1; - TAILQ_FOREACH(dr, &pci_driver_list, next) { - rc = rte_eal_pci_detach_dev(dr, dev); + /* Check if a driver is already loaded */ + if (dev->driver != NULL) + return 0; + + FOREACH_DRIVER_ON_PCIBUS(dr) { + rc = rte_pci_probe_one_driver(dr, dev); if (rc < 0) /* negative value is an error */ return -1; @@ -342,9 +329,10 @@ pci_detach_all_drivers(struct rte_pci_device *dev) * the driver of the devive. */ int -rte_eal_pci_probe_one(const struct rte_pci_addr *addr) +rte_pci_probe_one(const struct rte_pci_addr *addr) { struct rte_pci_device *dev = NULL; + int ret = 0; if (addr == NULL) @@ -356,7 +344,7 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr) if (pci_update_device(addr) < 0) goto err_return; - TAILQ_FOREACH(dev, &pci_device_list, next) { + FOREACH_DEVICE_ON_PCIBUS(dev) { if (rte_eal_compare_pci_addr(&dev->addr, addr)) continue; @@ -378,7 +366,7 @@ err_return: * Detach device specified by its pci address. */ int -rte_eal_pci_detach(const struct rte_pci_addr *addr) +rte_pci_detach(const struct rte_pci_addr *addr) { struct rte_pci_device *dev = NULL; int ret = 0; @@ -386,15 +374,19 @@ rte_eal_pci_detach(const struct rte_pci_addr *addr) if (addr == NULL) return -1; - TAILQ_FOREACH(dev, &pci_device_list, next) { + FOREACH_DEVICE_ON_PCIBUS(dev) { if (rte_eal_compare_pci_addr(&dev->addr, addr)) continue; - ret = pci_detach_all_drivers(dev); + ret = rte_pci_detach_dev(dev); if (ret < 0) + /* negative value is an error */ goto err_return; + if (ret > 0) + /* positive value means driver doesn't support it */ + continue; - TAILQ_REMOVE(&pci_device_list, dev, next); + rte_pci_remove_device(dev); free(dev); return 0; } @@ -413,9 +405,10 @@ err_return: * for discovered devices. */ int -rte_eal_pci_probe(void) +rte_pci_probe(void) { struct rte_pci_device *dev = NULL; + size_t probed = 0, failed = 0; struct rte_devargs *devargs; int probe_all = 0; int ret = 0; @@ -423,7 +416,8 @@ rte_eal_pci_probe(void) if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) == 0) probe_all = 1; - TAILQ_FOREACH(dev, &pci_device_list, next) { + FOREACH_DEVICE_ON_PCIBUS(dev) { + probed++; /* set devargs in PCI structure */ devargs = pci_devargs_lookup(dev); @@ -436,13 +430,17 @@ rte_eal_pci_probe(void) else if (devargs != NULL && devargs->type == RTE_DEVTYPE_WHITELISTED_PCI) ret = pci_probe_all_drivers(dev); - if (ret < 0) - rte_exit(EXIT_FAILURE, "Requested device " PCI_PRI_FMT + if (ret < 0) { + RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT " cannot be used\n", dev->addr.domain, dev->addr.bus, dev->addr.devid, dev->addr.function); + rte_errno = errno; + failed++; + ret = 0; + } } - return 0; + return (probed && probed == failed) ? -1 : 0; } /* dump one device */ @@ -467,27 +465,60 @@ pci_dump_one_device(FILE *f, struct rte_pci_device *dev) /* dump devices on the bus */ void -rte_eal_pci_dump(FILE *f) +rte_pci_dump(FILE *f) { struct rte_pci_device *dev = NULL; - TAILQ_FOREACH(dev, &pci_device_list, next) { + FOREACH_DEVICE_ON_PCIBUS(dev) { pci_dump_one_device(f, dev); } } /* register a driver */ void -rte_eal_pci_register(struct rte_pci_driver *driver) +rte_pci_register(struct rte_pci_driver *driver) { - TAILQ_INSERT_TAIL(&pci_driver_list, driver, next); - rte_eal_driver_register(&driver->driver); + TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next); + driver->bus = &rte_pci_bus; } /* unregister a driver */ void -rte_eal_pci_unregister(struct rte_pci_driver *driver) +rte_pci_unregister(struct rte_pci_driver *driver) +{ + TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next); + driver->bus = NULL; +} + +/* Add a device to PCI bus */ +void +rte_pci_add_device(struct rte_pci_device *pci_dev) +{ + TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next); +} + +/* Insert a device into a predefined position in PCI bus */ +void +rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, + struct rte_pci_device *new_pci_dev) { - rte_eal_driver_unregister(&driver->driver); - TAILQ_REMOVE(&pci_driver_list, driver, next); + TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next); } + +/* Remove a device from PCI bus */ +void +rte_pci_remove_device(struct rte_pci_device *pci_dev) +{ + TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next); +} + +struct rte_pci_bus rte_pci_bus = { + .bus = { + .scan = rte_pci_scan, + .probe = rte_pci_probe, + }, + .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), + .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), +}; + +RTE_REGISTER_BUS(PCI_BUS_NAME, rte_pci_bus.bus); diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c index bb08ec8b..4f698288 100644 --- a/lib/librte_eal/common/eal_common_tailqs.c +++ b/lib/librte_eal/common/eal_common_tailqs.c @@ -188,8 +188,7 @@ rte_eal_tailqs_init(void) if (t->head == NULL) { RTE_LOG(ERR, EAL, "Cannot initialize tailq: %s\n", t->name); - /* no need to TAILQ_REMOVE, we are going to panic in - * rte_eal_init() */ + /* TAILQ_REMOVE not needed, error is already fatal */ goto fail; } } diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c index 7d6e54f4..0037a641 100644 --- a/lib/librte_eal/common/eal_common_vdev.c +++ b/lib/librte_eal/common/eal_common_vdev.c @@ -37,35 +37,84 @@ #include <stdint.h> #include <sys/queue.h> +#include <rte_eal.h> +#include <rte_bus.h> #include <rte_vdev.h> #include <rte_common.h> +#include <rte_devargs.h> +#include <rte_memory.h> +/** Double linked list of virtual device drivers. */ +TAILQ_HEAD(vdev_device_list, rte_vdev_device); + +static struct vdev_device_list vdev_device_list = + TAILQ_HEAD_INITIALIZER(vdev_device_list); struct vdev_driver_list vdev_driver_list = TAILQ_HEAD_INITIALIZER(vdev_driver_list); +static void rte_vdev_bus_register(void); + /* register a driver */ void -rte_eal_vdrv_register(struct rte_vdev_driver *driver) +rte_vdev_register(struct rte_vdev_driver *driver) { + rte_vdev_bus_register(); + TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next); - rte_eal_driver_register(&driver->driver); } /* unregister a driver */ void -rte_eal_vdrv_unregister(struct rte_vdev_driver *driver) +rte_vdev_unregister(struct rte_vdev_driver *driver) { - rte_eal_driver_unregister(&driver->driver); TAILQ_REMOVE(&vdev_driver_list, driver, next); } -int -rte_eal_vdev_init(const char *name, const char *args) +/* + * Parse "driver" devargs without adding a dependency on rte_kvargs.h + */ +static char *parse_driver_arg(const char *args) +{ + const char *c; + char *str; + + if (!args || args[0] == '\0') + return NULL; + + c = args; + + do { + if (strncmp(c, "driver=", 7) == 0) { + c += 7; + break; + } + + c = strchr(c, ','); + if (c) + c++; + } while (c); + + if (c) + str = strdup(c); + else + str = NULL; + + return str; +} + +static int +vdev_probe_all_drivers(struct rte_vdev_device *dev) { + const char *name; + char *drv_name; struct rte_vdev_driver *driver; + int ret = 1; - if (name == NULL) - return -EINVAL; + drv_name = parse_driver_arg(rte_vdev_device_args(dev)); + name = drv_name ? drv_name : rte_vdev_device_name(dev); + + RTE_LOG(DEBUG, EAL, "Search driver %s to probe device %s\n", name, + rte_vdev_device_name(dev)); TAILQ_FOREACH(driver, &vdev_driver_list, next) { /* @@ -75,50 +124,235 @@ rte_eal_vdev_init(const char *name, const char *args) * So use strncmp to compare. */ if (!strncmp(driver->driver.name, name, - strlen(driver->driver.name))) - return driver->probe(name, args); + strlen(driver->driver.name))) { + dev->device.driver = &driver->driver; + ret = driver->probe(dev); + if (ret) + dev->device.driver = NULL; + goto out; + } } /* Give new names precedence over aliases. */ TAILQ_FOREACH(driver, &vdev_driver_list, next) { if (driver->driver.alias && !strncmp(driver->driver.alias, name, - strlen(driver->driver.alias))) - return driver->probe(name, args); + strlen(driver->driver.alias))) { + dev->device.driver = &driver->driver; + ret = driver->probe(dev); + if (ret) + dev->device.driver = NULL; + break; + } + } + +out: + free(drv_name); + return ret; +} + +static struct rte_vdev_device * +find_vdev(const char *name) +{ + struct rte_vdev_device *dev; + + if (!name) + return NULL; + + TAILQ_FOREACH(dev, &vdev_device_list, next) { + const char *devname = rte_vdev_device_name(dev); + if (!strncmp(devname, name, strlen(name))) + return dev; } - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); - return -EINVAL; + return NULL; +} + +static struct rte_devargs * +alloc_devargs(const char *name, const char *args) +{ + struct rte_devargs *devargs; + int ret; + + devargs = calloc(1, sizeof(*devargs)); + if (!devargs) + return NULL; + + devargs->type = RTE_DEVTYPE_VIRTUAL; + if (args) + devargs->args = strdup(args); + + ret = snprintf(devargs->virt.drv_name, + sizeof(devargs->virt.drv_name), "%s", name); + if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name)) { + free(devargs->args); + free(devargs); + return NULL; + } + + return devargs; } int -rte_eal_vdev_uninit(const char *name) +rte_vdev_init(const char *name, const char *args) { - struct rte_vdev_driver *driver; + struct rte_vdev_device *dev; + struct rte_devargs *devargs; + int ret; if (name == NULL) return -EINVAL; - TAILQ_FOREACH(driver, &vdev_driver_list, next) { - /* - * search a driver prefix in virtual device name. - * For example, if the driver is pcap PMD, driver->name - * will be "net_pcap", but "name" will be "net_pcapN". - * So use strncmp to compare. - */ - if (!strncmp(driver->driver.name, name, - strlen(driver->driver.name))) - return driver->remove(name); + dev = find_vdev(name); + if (dev) + return -EEXIST; + + devargs = alloc_devargs(name, args); + if (!devargs) + return -ENOMEM; + + dev = calloc(1, sizeof(*dev)); + if (!dev) { + ret = -ENOMEM; + goto fail; } - /* Give new names precedence over aliases. */ - TAILQ_FOREACH(driver, &vdev_driver_list, next) { - if (driver->driver.alias && - !strncmp(driver->driver.alias, name, - strlen(driver->driver.alias))) - return driver->remove(name); + dev->device.devargs = devargs; + dev->device.numa_node = SOCKET_ID_ANY; + dev->device.name = devargs->virt.drv_name; + + ret = vdev_probe_all_drivers(dev); + if (ret) { + if (ret > 0) + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + goto fail; } - RTE_LOG(ERR, EAL, "no driver found for %s\n", name); - return -EINVAL; + TAILQ_INSERT_TAIL(&devargs_list, devargs, next); + + TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); + return 0; + +fail: + free(devargs->args); + free(devargs); + free(dev); + return ret; +} + +static int +vdev_remove_driver(struct rte_vdev_device *dev) +{ + const char *name = rte_vdev_device_name(dev); + const struct rte_vdev_driver *driver; + + if (!dev->device.driver) { + RTE_LOG(DEBUG, EAL, "no driver attach to device %s\n", name); + return 1; + } + + driver = container_of(dev->device.driver, const struct rte_vdev_driver, + driver); + return driver->remove(dev); +} + +int +rte_vdev_uninit(const char *name) +{ + struct rte_vdev_device *dev; + struct rte_devargs *devargs; + int ret; + + if (name == NULL) + return -EINVAL; + + dev = find_vdev(name); + if (!dev) + return -ENOENT; + + devargs = dev->device.devargs; + + ret = vdev_remove_driver(dev); + if (ret) + return ret; + + TAILQ_REMOVE(&vdev_device_list, dev, next); + + TAILQ_REMOVE(&devargs_list, devargs, next); + + free(devargs->args); + free(devargs); + free(dev); + return 0; +} + +static int +vdev_scan(void) +{ + struct rte_vdev_device *dev; + struct rte_devargs *devargs; + + /* for virtual devices we scan the devargs_list populated via cmdline */ + + TAILQ_FOREACH(devargs, &devargs_list, next) { + + if (devargs->type != RTE_DEVTYPE_VIRTUAL) + continue; + + dev = find_vdev(devargs->virt.drv_name); + if (dev) + continue; + + dev = calloc(1, sizeof(*dev)); + if (!dev) + return -1; + + dev->device.devargs = devargs; + dev->device.numa_node = SOCKET_ID_ANY; + dev->device.name = devargs->virt.drv_name; + + TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); + } + + return 0; +} + +static int +vdev_probe(void) +{ + struct rte_vdev_device *dev; + + /* call the init function for each virtual device */ + TAILQ_FOREACH(dev, &vdev_device_list, next) { + + if (dev->device.driver) + continue; + + if (vdev_probe_all_drivers(dev)) { + RTE_LOG(ERR, EAL, "failed to initialize %s device\n", + rte_vdev_device_name(dev)); + return -1; + } + } + + return 0; +} + +static struct rte_bus rte_vdev_bus = { + .scan = vdev_scan, + .probe = vdev_probe, +}; + +RTE_INIT(rte_vdev_bus_register); + +static void rte_vdev_bus_register(void) +{ + static int registered; + + if (registered) + return; + + registered = 1; + rte_vdev_bus.name = RTE_STR(virtual); + rte_bus_register(&rte_vdev_bus); } diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 5f1367eb..7b7e8c88 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -78,7 +78,6 @@ struct internal_config { volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */ uintptr_t base_virtaddr; /**< base address to try and reserve memory from */ volatile int syslog_facility; /**< facility passed to openlog() */ - volatile uint32_t log_level; /**< default log level */ /** default interrupt mode for VFIO */ volatile enum rte_intr_mode vfio_intr_mode; const char *hugefile_prefix; /**< the base filename of hugetlbfs files */ diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 9e7d8f6b..6cacce07 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -34,6 +34,7 @@ #ifndef _EAL_PRIVATE_H_ #define _EAL_PRIVATE_H_ +#include <stdbool.h> #include <stdio.h> #include <rte_pci.h> @@ -108,18 +109,43 @@ int rte_eal_timer_init(void); */ int rte_eal_log_init(const char *id, int facility); +struct rte_pci_driver; +struct rte_pci_device; + /** - * Init the PCI infrastructure - * - * This function is private to EAL. + * Add a PCI device to the PCI Bus (append to PCI Device list). This function + * also updates the bus references of the PCI Device (and the generic device + * object embedded within. * - * @return - * 0 on success, negative on error + * @param pci_dev + * PCI device to add + * @return void */ -int rte_eal_pci_init(void); +void rte_pci_add_device(struct rte_pci_device *pci_dev); -struct rte_pci_driver; -struct rte_pci_device; +/** + * Insert a PCI device in the PCI Bus at a particular location in the device + * list. It also updates the PCI Bus reference of the new devices to be + * inserted. + * + * @param exist_pci_dev + * Existing PCI device in PCI Bus + * @param new_pci_dev + * PCI device to be added before exist_pci_dev + * @return void + */ +void rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, + struct rte_pci_device *new_pci_dev); + +/** + * Remove a PCI device from the PCI Bus. This sets to NULL the bus references + * in the PCI device object as well as the generic device object. + * + * @param pci_device + * PCI device to be removed from PCI Bus + * @return void + */ +void rte_pci_remove_device(struct rte_pci_device *pci_device); /** * Update a pci device object by asking the kernel for the latest information. @@ -301,4 +327,15 @@ int rte_eal_hugepage_init(void); */ int rte_eal_hugepage_attach(void); +/** + * Returns true if the system is able to obtain + * physical addresses. Return false if using DMA + * addresses through an IOMMU. + * + * Drivers based on uio will not load unless physical + * addresses are obtainable. It is only possible to get + * physical addresses when running as a privileged user. + */ +bool rte_eal_using_phys_addrs(void); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h index 454a12b0..f3f3b6e3 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h @@ -39,10 +39,4 @@ #include <rte_atomic_32.h> #endif -#define rte_smp_mb() rte_mb() - -#define rte_smp_wmb() rte_wmb() - -#define rte_smp_rmb() rte_rmb() - #endif /* _RTE_ATOMIC_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h index 9ae1e78b..14c04864 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h @@ -67,6 +67,18 @@ extern "C" { */ #define rte_rmb() __sync_synchronize() +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_wmb() + +#define rte_smp_rmb() rte_rmb() + +#define rte_io_mb() rte_mb() + +#define rte_io_wmb() rte_wmb() + +#define rte_io_rmb() rte_rmb() + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h index 671caa76..dc3a0f3b 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h @@ -43,43 +43,26 @@ extern "C" { #include "generic/rte_atomic.h" -#define dmb(opt) do { asm volatile("dmb " #opt : : : "memory"); } while (0) +#define dsb(opt) { asm volatile("dsb " #opt : : : "memory"); } +#define dmb(opt) { asm volatile("dmb " #opt : : : "memory"); } -/** - * General memory barrier. - * - * Guarantees that the LOAD and STORE operations generated before the - * barrier occur before the LOAD and STORE operations generated after. - * This function is architecture dependent. - */ -static inline void rte_mb(void) -{ - dmb(ish); -} +#define rte_mb() dsb(sy) -/** - * Write memory barrier. - * - * Guarantees that the STORE operations generated before the barrier - * occur before the STORE operations generated after. - * This function is architecture dependent. - */ -static inline void rte_wmb(void) -{ - dmb(ishst); -} +#define rte_wmb() dsb(st) -/** - * Read memory barrier. - * - * Guarantees that the LOAD operations generated before the barrier - * occur before the LOAD operations generated after. - * This function is architecture dependent. - */ -static inline void rte_rmb(void) -{ - dmb(ishld); -} +#define rte_rmb() dsb(ld) + +#define rte_smp_mb() dmb(ish) + +#define rte_smp_wmb() dmb(ishst) + +#define rte_smp_rmb() dmb(ishld) + +#define rte_io_mb() rte_mb() + +#define rte_io_wmb() rte_wmb() + +#define rte_io_rmb() rte_rmb() #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h b/lib/librte_eal/common/include/arch/arm/rte_io.h index 1849b520..9593b424 100644 --- a/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h +++ b/lib/librte_eal/common/include/arch/arm/rte_io.h @@ -1,7 +1,8 @@ /* * BSD LICENSE * - * Copyright (C) EZchip Semiconductor Ltd. 2015. + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its + * * Neither the name of Cavium networks nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -28,26 +29,23 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ -#ifndef _RTE_CPUFLAGS_TILE_H_ -#define _RTE_CPUFLAGS_TILE_H_ +#ifndef _RTE_IO_ARM_H_ +#define _RTE_IO_ARM_H_ #ifdef __cplusplus extern "C" { #endif -/** - * Enumeration of all CPU features supported - */ -enum rte_cpu_flag_t { - RTE_CPUFLAG_NUMFLAGS /**< This should always be the last! */ -}; - -#include "generic/rte_cpuflags.h" +#ifdef RTE_ARCH_64 +#include "rte_io_64.h" +#else +#include "generic/rte_io.h" +#endif #ifdef __cplusplus } #endif -#endif /* _RTE_CPUFLAGS_TILE_H_ */ +#endif /* _RTE_IO_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_io_64.h b/lib/librte_eal/common/include/arch/arm/rte_io_64.h new file mode 100644 index 00000000..0402125b --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_io_64.h @@ -0,0 +1,199 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_IO_ARM64_H_ +#define _RTE_IO_ARM64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +#define RTE_OVERRIDE_IO_H + +#include "generic/rte_io.h" +#include "rte_atomic_64.h" + +static inline uint8_t __attribute__((always_inline)) +rte_read8_relaxed(const volatile void *addr) +{ + uint8_t val; + + asm volatile( + "ldrb %w[val], [%x[addr]]" + : [val] "=r" (val) + : [addr] "r" (addr)); + return val; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16_relaxed(const volatile void *addr) +{ + uint16_t val; + + asm volatile( + "ldrh %w[val], [%x[addr]]" + : [val] "=r" (val) + : [addr] "r" (addr)); + return val; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32_relaxed(const volatile void *addr) +{ + uint32_t val; + + asm volatile( + "ldr %w[val], [%x[addr]]" + : [val] "=r" (val) + : [addr] "r" (addr)); + return val; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64_relaxed(const volatile void *addr) +{ + uint64_t val; + + asm volatile( + "ldr %x[val], [%x[addr]]" + : [val] "=r" (val) + : [addr] "r" (addr)); + return val; +} + +static inline void __attribute__((always_inline)) +rte_write8_relaxed(uint8_t val, volatile void *addr) +{ + asm volatile( + "strb %w[val], [%x[addr]]" + : + : [val] "r" (val), [addr] "r" (addr)); +} + +static inline void __attribute__((always_inline)) +rte_write16_relaxed(uint16_t val, volatile void *addr) +{ + asm volatile( + "strh %w[val], [%x[addr]]" + : + : [val] "r" (val), [addr] "r" (addr)); +} + +static inline void __attribute__((always_inline)) +rte_write32_relaxed(uint32_t val, volatile void *addr) +{ + asm volatile( + "str %w[val], [%x[addr]]" + : + : [val] "r" (val), [addr] "r" (addr)); +} + +static inline void __attribute__((always_inline)) +rte_write64_relaxed(uint64_t val, volatile void *addr) +{ + asm volatile( + "str %x[val], [%x[addr]]" + : + : [val] "r" (val), [addr] "r" (addr)); +} + +static inline uint8_t __attribute__((always_inline)) +rte_read8(const volatile void *addr) +{ + uint8_t val; + val = rte_read8_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16(const volatile void *addr) +{ + uint16_t val; + val = rte_read16_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32(const volatile void *addr) +{ + uint32_t val; + val = rte_read32_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64(const volatile void *addr) +{ + uint64_t val; + val = rte_read64_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline void __attribute__((always_inline)) +rte_write8(uint8_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write8_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write16(uint16_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write16_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write32(uint32_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write32_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write64(uint64_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write64_relaxed(value, addr); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_IO_ARM64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h index b86c2cf5..4107c998 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -34,6 +34,7 @@ #define _RTE_VECT_ARM_H_ #include <stdint.h> +#include "generic/rte_vect.h" #include "arm_neon.h" #ifdef __cplusplus diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h index fb4fccb4..150810cd 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -87,6 +87,12 @@ extern "C" { #define rte_smp_rmb() rte_rmb() +#define rte_io_mb() rte_mb() + +#define rte_io_wmb() rte_wmb() + +#define rte_io_rmb() rte_rmb() + /*------------------------- 16 bit atomic operations -------------------------*/ /* To be compatible with Power7, use GCC built-in functions for 16 bit * operations */ diff --git a/lib/librte_eal/common/arch/tile/rte_cpuflags.c b/lib/librte_eal/common/include/arch/ppc_64/rte_io.h index a2b6c51a..be192da7 100644 --- a/lib/librte_eal/common/arch/tile/rte_cpuflags.c +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_io.h @@ -1,7 +1,8 @@ /* * BSD LICENSE * - * Copyright (C) EZchip Semiconductor Ltd. 2015. + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its + * * Neither the name of Cavium networks nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -28,20 +29,19 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ -#include "rte_cpuflags.h" +#ifndef _RTE_IO_PPC_64_H_ +#define _RTE_IO_PPC_64_H_ -#include <errno.h> +#ifdef __cplusplus +extern "C" { +#endif -const struct feature_entry rte_cpu_feature_table[] = { -}; +#include "generic/rte_io.h" -/* - * Checks if a particular flag is available on current machine. - */ -int -rte_cpu_get_flag_enabled(__attribute__((unused)) enum rte_cpu_flag_t feature) -{ - return -ENOENT; +#ifdef __cplusplus } +#endif + +#endif /* _RTE_IO_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h index 05209e52..99586e58 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h @@ -34,6 +34,7 @@ #define _RTE_VECT_PPC_64_H_ #include <altivec.h> +#include "generic/rte_vect.h" #ifdef __cplusplus extern "C" { diff --git a/lib/librte_eal/common/include/arch/tile/rte_atomic.h b/lib/librte_eal/common/include/arch/tile/rte_atomic.h deleted file mode 100644 index 28825ff6..00000000 --- a/lib/librte_eal/common/include/arch/tile/rte_atomic.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * BSD LICENSE - * - * Copyright (C) EZchip Semiconductor Ltd. 2015. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _RTE_ATOMIC_TILE_H_ -#define _RTE_ATOMIC_TILE_H_ - -#ifndef RTE_FORCE_INTRINSICS -# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#include "generic/rte_atomic.h" - -/** - * General memory barrier. - * - * Guarantees that the LOAD and STORE operations generated before the - * barrier occur before the LOAD and STORE operations generated after. - * This function is architecture dependent. - */ -static inline void rte_mb(void) -{ - __sync_synchronize(); -} - -/** - * Write memory barrier. - * - * Guarantees that the STORE operations generated before the barrier - * occur before the STORE operations generated after. - * This function is architecture dependent. - */ -static inline void rte_wmb(void) -{ - __sync_synchronize(); -} - -/** - * Read memory barrier. - * - * Guarantees that the LOAD operations generated before the barrier - * occur before the LOAD operations generated after. - * This function is architecture dependent. - */ -static inline void rte_rmb(void) -{ - __sync_synchronize(); -} - -#define rte_smp_mb() rte_mb() - -#define rte_smp_wmb() rte_compiler_barrier() - -#define rte_smp_rmb() rte_compiler_barrier() - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_ATOMIC_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_byteorder.h b/lib/librte_eal/common/include/arch/tile/rte_byteorder.h deleted file mode 100644 index 7239e437..00000000 --- a/lib/librte_eal/common/include/arch/tile/rte_byteorder.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * BSD LICENSE - * - * Copyright (C) EZchip Semiconductor Ltd. 2015. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _RTE_BYTEORDER_TILE_H_ -#define _RTE_BYTEORDER_TILE_H_ - -#ifndef RTE_FORCE_INTRINSICS -# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#include "generic/rte_byteorder.h" - -#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) -#define rte_bswap16(x) rte_constant_bswap16(x) -#endif - -#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN - -#define rte_cpu_to_le_16(x) (x) -#define rte_cpu_to_le_32(x) (x) -#define rte_cpu_to_le_64(x) (x) - -#define rte_cpu_to_be_16(x) rte_bswap16(x) -#define rte_cpu_to_be_32(x) rte_bswap32(x) -#define rte_cpu_to_be_64(x) rte_bswap64(x) - -#define rte_le_to_cpu_16(x) (x) -#define rte_le_to_cpu_32(x) (x) -#define rte_le_to_cpu_64(x) (x) - -#define rte_be_to_cpu_16(x) rte_bswap16(x) -#define rte_be_to_cpu_32(x) rte_bswap32(x) -#define rte_be_to_cpu_64(x) rte_bswap64(x) - -#else /* RTE_BIG_ENDIAN */ - -#define rte_cpu_to_le_16(x) rte_bswap16(x) -#define rte_cpu_to_le_32(x) rte_bswap32(x) -#define rte_cpu_to_le_64(x) rte_bswap64(x) - -#define rte_cpu_to_be_16(x) (x) -#define rte_cpu_to_be_32(x) (x) -#define rte_cpu_to_be_64(x) (x) - -#define rte_le_to_cpu_16(x) rte_bswap16(x) -#define rte_le_to_cpu_32(x) rte_bswap32(x) -#define rte_le_to_cpu_64(x) rte_bswap64(x) - -#define rte_be_to_cpu_16(x) (x) -#define rte_be_to_cpu_32(x) (x) -#define rte_be_to_cpu_64(x) (x) -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_BYTEORDER_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h deleted file mode 100644 index e606957c..00000000 --- a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * BSD LICENSE - * - * Copyright (C) EZchip Semiconductor Ltd. 2015. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _RTE_MEMCPY_TILE_H_ -#define _RTE_MEMCPY_TILE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include <stdint.h> -#include <string.h> - -#include "generic/rte_memcpy.h" - -static inline void -rte_mov16(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 16); -} - -static inline void -rte_mov32(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 32); -} - -static inline void -rte_mov48(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 48); -} - -static inline void -rte_mov64(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 64); -} - -static inline void -rte_mov128(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 128); -} - -static inline void -rte_mov256(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 256); -} - -#define rte_memcpy(d, s, n) memcpy((d), (s), (n)) - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_MEMCPY_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h deleted file mode 100644 index 7a1bb93e..00000000 --- a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * BSD LICENSE - * - * Copyright (C) EZchip Semiconductor Ltd. 2015. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _RTE_PREFETCH_TILE_H_ -#define _RTE_PREFETCH_TILE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "generic/rte_prefetch.h" - -static inline void rte_prefetch0(const volatile void *p) -{ - __builtin_prefetch((const void *)(uintptr_t)p, 0, 3); -} - -static inline void rte_prefetch1(const volatile void *p) -{ - __builtin_prefetch((const void *)(uintptr_t)p, 0, 2); -} - -static inline void rte_prefetch2(const volatile void *p) -{ - __builtin_prefetch((const void *)(uintptr_t)p, 0, 1); -} - -static inline void rte_prefetch_non_temporal(const volatile void *p) -{ - /* non-temporal version not available, fallback to rte_prefetch0 */ - rte_prefetch0(p); -} - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_PREFETCH_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_rwlock.h b/lib/librte_eal/common/include/arch/tile/rte_rwlock.h deleted file mode 100644 index 8f67a190..00000000 --- a/lib/librte_eal/common/include/arch/tile/rte_rwlock.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * BSD LICENSE - * - * Copyright (C) EZchip Semiconductor Ltd. 2015. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _RTE_RWLOCK_TILE_H_ -#define _RTE_RWLOCK_TILE_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "generic/rte_rwlock.h" - -static inline void -rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) -{ - rte_rwlock_read_lock(rwl); -} - -static inline void -rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) -{ - rte_rwlock_read_unlock(rwl); -} - -static inline void -rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) -{ - rte_rwlock_write_lock(rwl); -} - -static inline void -rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) -{ - rte_rwlock_write_unlock(rwl); -} - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_RWLOCK_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_spinlock.h b/lib/librte_eal/common/include/arch/tile/rte_spinlock.h deleted file mode 100644 index e91f99ee..00000000 --- a/lib/librte_eal/common/include/arch/tile/rte_spinlock.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * BSD LICENSE - * - * Copyright (C) EZchip Semiconductor Ltd. 2015. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _RTE_SPINLOCK_TILE_H_ -#define _RTE_SPINLOCK_TILE_H_ - -#ifndef RTE_FORCE_INTRINSICS -# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#include <rte_common.h> -#include "generic/rte_spinlock.h" - -static inline int rte_tm_supported(void) -{ - return 0; -} - -static inline void -rte_spinlock_lock_tm(rte_spinlock_t *sl) -{ - rte_spinlock_lock(sl); /* fall-back */ -} - -static inline int -rte_spinlock_trylock_tm(rte_spinlock_t *sl) -{ - return rte_spinlock_trylock(sl); -} - -static inline void -rte_spinlock_unlock_tm(rte_spinlock_t *sl) -{ - rte_spinlock_unlock(sl); -} - -static inline void -rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) -{ - rte_spinlock_recursive_lock(slr); /* fall-back */ -} - -static inline void -rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) -{ - rte_spinlock_recursive_unlock(slr); -} - -static inline int -rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) -{ - return rte_spinlock_recursive_trylock(slr); -} - -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_SPINLOCK_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h index 00b1cdf5..4eac6663 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -61,6 +61,12 @@ extern "C" { #define rte_smp_rmb() rte_compiler_barrier() +#define rte_io_mb() rte_mb() + +#define rte_io_wmb() rte_compiler_barrier() + +#define rte_io_rmb() rte_compiler_barrier() + /*------------------------- 16 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS diff --git a/lib/librte_eal/common/include/arch/tile/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_io.h index 0b2200a3..c8d14043 100644 --- a/lib/librte_eal/common/include/arch/tile/rte_cycles.h +++ b/lib/librte_eal/common/include/arch/x86/rte_io.h @@ -1,7 +1,8 @@ /* * BSD LICENSE * - * Copyright (C) EZchip Semiconductor Ltd. 2015. + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +14,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of EZchip Semiconductor nor the names of its + * * Neither the name of Cavium networks nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -28,43 +29,19 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ -#ifndef _RTE_CYCLES_TILE_H_ -#define _RTE_CYCLES_TILE_H_ +#ifndef _RTE_IO_X86_H_ +#define _RTE_IO_X86_H_ #ifdef __cplusplus extern "C" { #endif -#include <arch/cycle.h> - -#include "generic/rte_cycles.h" - -/** - * Read the time base register. - * - * @return - * The time base for this lcore. - */ -static inline uint64_t -rte_rdtsc(void) -{ - return get_cycle_count(); -} - -static inline uint64_t -rte_rdtsc_precise(void) -{ - rte_mb(); - return rte_rdtsc(); -} - -static inline uint64_t -rte_get_tsc_cycles(void) { return rte_rdtsc(); } +#include "generic/rte_io.h" #ifdef __cplusplus } #endif -#endif /* _RTE_CYCLES_TILE_H_ */ +#endif /* _RTE_IO_X86_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h index b3bfc235..b9785e85 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -69,6 +69,8 @@ rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline)); #ifdef RTE_MACHINE_CPUFLAG_AVX512F +#define ALIGNMENT_MASK 0x3F + /** * AVX512 implementation below */ @@ -189,7 +191,7 @@ rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n) } static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { uintptr_t dstu = (uintptr_t)dst; uintptr_t srcu = (uintptr_t)src; @@ -308,6 +310,8 @@ COPY_BLOCK_128_BACK63: #elif defined RTE_MACHINE_CPUFLAG_AVX2 +#define ALIGNMENT_MASK 0x1F + /** * AVX2 implementation below */ @@ -387,7 +391,7 @@ rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) } static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { uintptr_t dstu = (uintptr_t)dst; uintptr_t srcu = (uintptr_t)src; @@ -499,6 +503,8 @@ COPY_BLOCK_128_BACK31: #else /* RTE_MACHINE_CPUFLAG */ +#define ALIGNMENT_MASK 0x0F + /** * SSE & AVX implementation below */ @@ -677,7 +683,7 @@ __extension__ ({ \ }) static inline void * -rte_memcpy(void *dst, const void *src, size_t n) +rte_memcpy_generic(void *dst, const void *src, size_t n) { __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; uintptr_t dstu = (uintptr_t)dst; @@ -821,6 +827,75 @@ COPY_BLOCK_64_BACK15: #endif /* RTE_MACHINE_CPUFLAG */ +static inline void * +rte_memcpy_aligned(void *dst, const void *src, size_t n) +{ + void *ret = dst; + + /* Copy size <= 16 bytes */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dst = *(const uint8_t *)src; + src = (const uint8_t *)src + 1; + dst = (uint8_t *)dst + 1; + } + if (n & 0x02) { + *(uint16_t *)dst = *(const uint16_t *)src; + src = (const uint16_t *)src + 1; + dst = (uint16_t *)dst + 1; + } + if (n & 0x04) { + *(uint32_t *)dst = *(const uint32_t *)src; + src = (const uint32_t *)src + 1; + dst = (uint32_t *)dst + 1; + } + if (n & 0x08) + *(uint64_t *)dst = *(const uint64_t *)src; + + return ret; + } + + /* Copy 16 <= size <= 32 bytes */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + + return ret; + } + + /* Copy 32 < size <= 64 bytes */ + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); + + return ret; + } + + /* Copy 64 bytes blocks */ + for (; n >= 64; n -= 64) { + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; + } + + /* Copy whatever left */ + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + + return ret; +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + if (!(((uintptr_t)dst | (uintptr_t)src) & ALIGNMENT_MASK)) + return rte_memcpy_aligned(dst, src, n); + else + return rte_memcpy_generic(dst, src, n); +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h index 77f2e253..1b4b85dd 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h @@ -31,8 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _RTE_VECT_H_ -#define _RTE_VECT_H_ +#ifndef _RTE_VECT_X86_H_ +#define _RTE_VECT_X86_H_ /** * @file @@ -41,6 +41,7 @@ */ #include <stdint.h> +#include "generic/rte_vect.h" #if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) @@ -133,4 +134,4 @@ __extension__ ({ \ } #endif -#endif /* _RTE_VECT_H_ */ +#endif /* _RTE_VECT_X86_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h index 43a704ec..7b81705b 100644 --- a/lib/librte_eal/common/include/generic/rte_atomic.h +++ b/lib/librte_eal/common/include/generic/rte_atomic.h @@ -100,6 +100,33 @@ static inline void rte_smp_wmb(void); */ static inline void rte_smp_rmb(void); +/** + * General memory barrier for I/O device + * + * Guarantees that the LOAD and STORE operations that precede the + * rte_io_mb() call are visible to I/O device or CPU before the + * LOAD and STORE operations that follow it. + */ +static inline void rte_io_mb(void); + +/** + * Write memory barrier for I/O device + * + * Guarantees that the STORE operations that precede the + * rte_io_wmb() call are visible to I/O device before the STORE + * operations that follow it. + */ +static inline void rte_io_wmb(void); + +/** + * Read memory barrier for IO device + * + * Guarantees that the LOAD operations on I/O device that precede the + * rte_io_rmb() call are visible to CPU before the LOAD + * operations that follow it. + */ +static inline void rte_io_rmb(void); + #endif /* __DOXYGEN__ */ /** diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h index 71321f32..c1c5551f 100644 --- a/lib/librte_eal/common/include/generic/rte_cpuflags.h +++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h @@ -39,6 +39,7 @@ * Architecture specific API to determine available CPU features at runtime. */ +#include "rte_common.h" #include <errno.h> /** @@ -79,7 +80,17 @@ rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature); * that were specified at compile time. It is called automatically within the * EAL, so does not need to be used by applications. */ +__rte_deprecated void rte_cpu_check_supported(void); +/** + * This function checks that the currently used CPU supports the CPU features + * that were specified at compile time. It is called automatically within the + * EAL, so does not need to be used by applications. This version returns a + * result so that decisions may be made (for instance, graceful shutdowns). + */ +int +rte_cpu_is_supported(void); + #endif /* _RTE_CPUFLAGS_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h index 00103ca9..0e645c2c 100644 --- a/lib/librte_eal/common/include/generic/rte_cycles.h +++ b/lib/librte_eal/common/include/generic/rte_cycles.h @@ -150,15 +150,17 @@ int rte_eal_hpet_init(int make_default); static inline uint64_t rte_get_timer_cycles(void) { +#ifdef RTE_LIBEAL_USE_HPET switch(eal_timer_source) { case EAL_TIMER_TSC: +#endif return rte_get_tsc_cycles(); - case EAL_TIMER_HPET: #ifdef RTE_LIBEAL_USE_HPET + case EAL_TIMER_HPET: return rte_get_hpet_cycles(); -#endif default: rte_panic("Invalid timer source specified\n"); } +#endif } /** @@ -170,15 +172,17 @@ rte_get_timer_cycles(void) static inline uint64_t rte_get_timer_hz(void) { +#ifdef RTE_LIBEAL_USE_HPET switch(eal_timer_source) { case EAL_TIMER_TSC: +#endif return rte_get_tsc_hz(); - case EAL_TIMER_HPET: #ifdef RTE_LIBEAL_USE_HPET + case EAL_TIMER_HPET: return rte_get_hpet_hz(); -#endif default: rte_panic("Invalid timer source specified\n"); } +#endif } /** * Wait at least us microseconds. diff --git a/lib/librte_eal/common/include/generic/rte_io.h b/lib/librte_eal/common/include/generic/rte_io.h new file mode 100644 index 00000000..d82ee695 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_io.h @@ -0,0 +1,381 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2016 Cavium networks. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_IO_H_ +#define _RTE_IO_H_ + +#include <rte_atomic.h> + +/** + * @file + * I/O device memory operations + * + * This file defines the generic API for I/O device memory read/write operations + */ + +#include <stdint.h> +#include <rte_common.h> +#include <rte_atomic.h> + +#ifdef __DOXYGEN__ + +/** + * Read a 8-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint8_t +rte_read8_relaxed(const volatile void *addr); + +/** + * Read a 16-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint16_t +rte_read16_relaxed(const volatile void *addr); + +/** + * Read a 32-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint32_t +rte_read32_relaxed(const volatile void *addr); + +/** + * Read a 64-bit value from I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint64_t +rte_read64_relaxed(const volatile void *addr); + +/** + * Write a 8-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ + +static inline void +rte_write8_relaxed(uint8_t value, volatile void *addr); + +/** + * Write a 16-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write16_relaxed(uint16_t value, volatile void *addr); + +/** + * Write a 32-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write32_relaxed(uint32_t value, volatile void *addr); + +/** + * Write a 64-bit value to I/O device memory address *addr*. + * + * The relaxed version does not have additional I/O memory barrier, useful in + * accessing the device registers of integrated controllers which implicitly + * strongly ordered with respect to memory access. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write64_relaxed(uint64_t value, volatile void *addr); + +/** + * Read a 8-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint8_t +rte_read8(const volatile void *addr); + +/** + * Read a 16-bit value from I/O device memory address *addr*. + * + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint16_t +rte_read16(const volatile void *addr); + +/** + * Read a 32-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint32_t +rte_read32(const volatile void *addr); + +/** + * Read a 64-bit value from I/O device memory address *addr*. + * + * @param addr + * I/O memory address to read the value from + * @return + * read value + */ +static inline uint64_t +rte_read64(const volatile void *addr); + +/** + * Write a 8-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ + +static inline void +rte_write8(uint8_t value, volatile void *addr); + +/** + * Write a 16-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write16(uint16_t value, volatile void *addr); + +/** + * Write a 32-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write32(uint32_t value, volatile void *addr); + +/** + * Write a 64-bit value to I/O device memory address *addr*. + * + * @param value + * Value to write + * @param addr + * I/O memory address to write the value to + */ +static inline void +rte_write64(uint64_t value, volatile void *addr); + +#endif /* __DOXYGEN__ */ + +#ifndef RTE_OVERRIDE_IO_H + +static inline uint8_t __attribute__((always_inline)) +rte_read8_relaxed(const volatile void *addr) +{ + return *(const volatile uint8_t *)addr; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16_relaxed(const volatile void *addr) +{ + return *(const volatile uint16_t *)addr; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32_relaxed(const volatile void *addr) +{ + return *(const volatile uint32_t *)addr; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64_relaxed(const volatile void *addr) +{ + return *(const volatile uint64_t *)addr; +} + +static inline void __attribute__((always_inline)) +rte_write8_relaxed(uint8_t value, volatile void *addr) +{ + *(volatile uint8_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write16_relaxed(uint16_t value, volatile void *addr) +{ + *(volatile uint16_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write32_relaxed(uint32_t value, volatile void *addr) +{ + *(volatile uint32_t *)addr = value; +} + +static inline void __attribute__((always_inline)) +rte_write64_relaxed(uint64_t value, volatile void *addr) +{ + *(volatile uint64_t *)addr = value; +} + +static inline uint8_t __attribute__((always_inline)) +rte_read8(const volatile void *addr) +{ + uint8_t val; + val = rte_read8_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint16_t __attribute__((always_inline)) +rte_read16(const volatile void *addr) +{ + uint16_t val; + val = rte_read16_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint32_t __attribute__((always_inline)) +rte_read32(const volatile void *addr) +{ + uint32_t val; + val = rte_read32_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline uint64_t __attribute__((always_inline)) +rte_read64(const volatile void *addr) +{ + uint64_t val; + val = rte_read64_relaxed(addr); + rte_io_rmb(); + return val; +} + +static inline void __attribute__((always_inline)) +rte_write8(uint8_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write8_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write16(uint16_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write16_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write32(uint32_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write32_relaxed(value, addr); +} + +static inline void __attribute__((always_inline)) +rte_write64(uint64_t value, volatile void *addr) +{ + rte_io_wmb(); + rte_write64_relaxed(value, addr); +} + +#endif /* RTE_OVERRIDE_IO_H */ + +#endif /* _RTE_IO_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_vect.h b/lib/librte_eal/common/include/generic/rte_vect.h new file mode 100644 index 00000000..600ee9f3 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_vect.h @@ -0,0 +1,214 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VECT_H_ +#define _RTE_VECT_H_ + +/** + * @file + * SIMD vector types + * + * This file defines types to use vector instructions with generic C code. + */ + +#include <stdint.h> + +/* Unsigned vector types */ + +/** + * 64 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v64u8_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint8_t rte_v64u8_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v64u16_t){ a0, a1, a2, a3 } + */ +typedef uint16_t rte_v64u16_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v64u32_t){ a0, a1 } + */ +typedef uint32_t rte_v64u32_t __attribute__((vector_size(8), aligned(8))); + +/** + * 128 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v128u8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef uint8_t rte_v128u8_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v128u16_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint16_t rte_v128u16_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v128u32_t){ a0, a1, a2, a3, a4 } + */ +typedef uint32_t rte_v128u32_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with unsigned 64 bits elements. + * + * a = (rte_v128u64_t){ a0, a1 } + */ +typedef uint64_t rte_v128u64_t __attribute__((vector_size(16), aligned(16))); + +/** + * 256 bits vector size to use with unsigned 8 bits elements. + * + * a = (rte_v256u8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15, + * a16, a17, a18, a19, a20, a21, a22, a23, + * a24, a25, a26, a27, a28, a29, a30, a31 } + */ +typedef uint8_t rte_v256u8_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 16 bits elements. + * + * a = (rte_v256u16_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef uint16_t rte_v256u16_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 32 bits elements. + * + * a = (rte_v256u32_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef uint32_t rte_v256u32_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with unsigned 64 bits elements. + * + * a = (rte_v256u64_t){ a0, a1, a2, a3 } + */ +typedef uint64_t rte_v256u64_t __attribute__((vector_size(32), aligned(32))); + + +/* Signed vector types */ + +/** + * 64 bits vector size to use with 8 bits elements. + * + * a = (rte_v64s8_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int8_t rte_v64s8_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with 16 bits elements. + * + * a = (rte_v64s16_t){ a0, a1, a2, a3 } + */ +typedef int16_t rte_v64s16_t __attribute__((vector_size(8), aligned(8))); + +/** + * 64 bits vector size to use with 32 bits elements. + * + * a = (rte_v64s32_t){ a0, a1 } + */ +typedef int32_t rte_v64s32_t __attribute__((vector_size(8), aligned(8))); + +/** + * 128 bits vector size to use with 8 bits elements. + * + * a = (rte_v128s8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef int8_t rte_v128s8_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 16 bits elements. + * + * a = (rte_v128s16_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int16_t rte_v128s16_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 32 bits elements. + * + * a = (rte_v128s32_t){ a0, a1, a2, a3 } + */ +typedef int32_t rte_v128s32_t __attribute__((vector_size(16), aligned(16))); + +/** + * 128 bits vector size to use with 64 bits elements. + * + * a = (rte_v128s64_t){ a1, a2 } + */ +typedef int64_t rte_v128s64_t __attribute__((vector_size(16), aligned(16))); + +/** + * 256 bits vector size to use with 8 bits elements. + * + * a = (rte_v256s8_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15, + * a16, a17, a18, a19, a20, a21, a22, a23, + * a24, a25, a26, a27, a28, a29, a30, a31 } + */ +typedef int8_t rte_v256s8_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 16 bits elements. + * + * a = (rte_v256s16_t){ a00, a01, a02, a03, a04, a05, a06, a07, + * a08, a09, a10, a11, a12, a13, a14, a15 } + */ +typedef int16_t rte_v256s16_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 32 bits elements. + * + * a = (rte_v256s32_t){ a0, a1, a2, a3, a4, a5, a6, a7 } + */ +typedef int32_t rte_v256s32_t __attribute__((vector_size(32), aligned(32))); + +/** + * 256 bits vector size to use with 64 bits elements. + * + * a = (rte_v256s64_t){ a0, a1, a2, a3 } + */ +typedef int64_t rte_v256s64_t __attribute__((vector_size(32), aligned(32))); + +#endif /* _RTE_VECT_H_ */ diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h new file mode 100644 index 00000000..7c369692 --- /dev/null +++ b/lib/librte_eal/common/include/rte_bus.h @@ -0,0 +1,158 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 NXP + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of NXP nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BUS_H_ +#define _RTE_BUS_H_ + +/** + * @file + * + * DPDK device bus interface + * + * This file exposes API and interfaces for bus abstraction + * over the devices and drivers in EAL. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <sys/queue.h> + +#include <rte_log.h> +#include <rte_dev.h> + +/** Double linked list of buses */ +TAILQ_HEAD(rte_bus_list, rte_bus); + +/** + * Bus specific scan for devices attached on the bus. + * For each bus object, the scan would be reponsible for finding devices and + * adding them to its private device list. + * + * A bus should mandatorily implement this method. + * + * @return + * 0 for successful scan + * <0 for unsuccessful scan with error value + */ +typedef int (*rte_bus_scan_t)(void); + +/** + * Implementation specific probe function which is responsible for linking + * devices on that bus with applicable drivers. + * + * This is called while iterating over each registered bus. + * + * @return + * 0 for successful probe + * !0 for any error while probing + */ +typedef int (*rte_bus_probe_t)(void); + +/** + * A structure describing a generic bus. + */ +struct rte_bus { + TAILQ_ENTRY(rte_bus) next; /**< Next bus object in linked list */ + const char *name; /**< Name of the bus */ + rte_bus_scan_t scan; /**< Scan for devices attached to bus */ + rte_bus_probe_t probe; /**< Probe devices on bus */ +}; + +/** + * Register a Bus handler. + * + * @param bus + * A pointer to a rte_bus structure describing the bus + * to be registered. + */ +void rte_bus_register(struct rte_bus *bus); + +/** + * Unregister a Bus handler. + * + * @param bus + * A pointer to a rte_bus structure describing the bus + * to be unregistered. + */ +void rte_bus_unregister(struct rte_bus *bus); + +/** + * Scan all the buses. + * + * @return + * 0 in case of success in scanning all buses + * !0 in case of failure to scan + */ +int rte_bus_scan(void); + +/** + * For each device on the buses, perform a driver 'match' and call the + * driver-specific probe for device initialization. + * + * @return + * 0 for successful match/probe + * !0 otherwise + */ +int rte_bus_probe(void); + +/** + * Dump information of all the buses registered with EAL. + * + * @param f + * A valid and open output stream handle + * + * @return + * 0 in case of success + * !0 in case there is error in opening the output stream + */ +void rte_bus_dump(FILE *f); + +/** + * Helper for Bus registration. + * The constructor has higher priority than PMD constructors. + */ +#define RTE_REGISTER_BUS(nm, bus) \ +static void __attribute__((constructor(101), used)) businitfn_ ##nm(void) \ +{\ + (bus).name = RTE_STR(nm);\ + rte_bus_register(&bus); \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BUS_H */ diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index db5ac91c..e057f6e2 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -331,6 +331,29 @@ rte_bsf32(uint32_t v) #define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) #endif +/** + * Return pointer to the wrapping struct instance. + * + * Example: + * + * struct wrapper { + * ... + * struct child c; + * ... + * }; + * + * struct child *x = obtain(...); + * struct wrapper *w = container_of(x, struct wrapper, c); + */ +#ifndef container_of +#define container_of(ptr, type, member) __extension__ ({ \ + const typeof(((type *)0)->member) *_ptr = (ptr); \ + __attribute__((unused)) type *_target_ptr = \ + (type *)(ptr); \ + (type *)(((uintptr_t)_ptr) - offsetof(type, member)); \ + }) +#endif + #define _RTE_STR(x) #x /** Take a macro value and get a string version of it */ #define RTE_STR(x) _RTE_STR(x) diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index 8840380d..de20c063 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -49,6 +49,7 @@ extern "C" { #include <stdio.h> #include <sys/queue.h> +#include <rte_config.h> #include <rte_log.h> __attribute__((format(printf, 2, 0))) @@ -70,6 +71,19 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer); } +/* + * Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the + * RTE_*_RET() macros defined below is compiled in debug mode. + */ +#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \ + defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \ + defined(RTE_LIBRTE_EVENTDEV_DEBUG) +#define RTE_PMD_DEBUG_TRACE(...) \ + rte_pmd_debug_trace(__func__, __VA_ARGS__) +#else +#define RTE_PMD_DEBUG_TRACE(...) (void)0 +#endif + /* Macros for checking for restricting functions to primary instance only */ #define RTE_PROC_PRIMARY_OR_ERR_RET(retval) do { \ if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ @@ -109,40 +123,6 @@ struct rte_mem_resource { void *addr; /**< Virtual address, NULL when not mapped. */ }; -/** Double linked list of device drivers. */ -TAILQ_HEAD(rte_driver_list, rte_driver); -/** Double linked list of devices. */ -TAILQ_HEAD(rte_device_list, rte_device); - -/* Forward declaration */ -struct rte_driver; - -/** - * A structure describing a generic device. - */ -struct rte_device { - TAILQ_ENTRY(rte_device) next; /**< Next device */ - struct rte_driver *driver; /**< Associated driver */ - int numa_node; /**< NUMA node connection */ - struct rte_devargs *devargs; /**< Device user arguments */ -}; - -/** - * Insert a device detected by a bus scanning. - * - * @param dev - * A pointer to a rte_device structure describing the detected device. - */ -void rte_eal_device_insert(struct rte_device *dev); - -/** - * Remove a device (e.g. when being unplugged). - * - * @param dev - * A pointer to a rte_device structure describing the device to be removed. - */ -void rte_eal_device_remove(struct rte_device *dev); - /** * A structure describing a device driver. */ @@ -153,27 +133,15 @@ struct rte_driver { }; /** - * Register a device driver. - * - * @param driver - * A pointer to a rte_dev structure describing the driver - * to be registered. - */ -void rte_eal_driver_register(struct rte_driver *driver); - -/** - * Unregister a device driver. - * - * @param driver - * A pointer to a rte_dev structure describing the driver - * to be unregistered. - */ -void rte_eal_driver_unregister(struct rte_driver *driver); - -/** - * Initalize all the registered drivers in this process + * A structure describing a generic device. */ -int rte_eal_dev_init(void); +struct rte_device { + TAILQ_ENTRY(rte_device) next; /**< Next device */ + const char *name; /**< Device name */ + const struct rte_driver *driver;/**< Associated driver */ + int numa_node; /**< NUMA node connection */ + struct rte_devargs *devargs; /**< Device user arguments */ +}; /** * Initialize a driver specified by name. @@ -185,7 +153,7 @@ int rte_eal_dev_init(void); * @return * 0 on success, negative on error */ -int rte_eal_vdev_init(const char *name, const char *args); +int rte_vdev_init(const char *name, const char *args); /** * Uninitalize a driver specified by name. @@ -195,7 +163,7 @@ int rte_eal_vdev_init(const char *name, const char *args); * @return * 0 on success, negative on error */ -int rte_eal_vdev_uninit(const char *name); +int rte_vdev_uninit(const char *name); /** * Attach a device to a registered driver. @@ -239,6 +207,31 @@ RTE_STR(table) static const char DRV_EXP_TAG(name, param_string_export)[] \ __attribute__((used)) = str +/** + * Advertise the list of kernel modules required to run this driver + * + * This string lists the kernel modules required for the devices + * associated to a PMD. The format of each line of the string is: + * "<device-pattern> <kmod-expression>". + * + * The possible formats for the device pattern are: + * "*" all devices supported by this driver + * "pci:*" all PCI devices supported by this driver + * "pci:v8086:d*:sv*:sd*" all PCI devices supported by this driver + * whose vendor id is 0x8086. + * + * The format of the kernel modules list is a parenthesed expression + * containing logical-and (&) and logical-or (|). + * + * The device pattern and the kmod expression are separated by a space. + * + * Example: + * - "* igb_uio | uio_pci_generic | vfio" + */ +#define RTE_PMD_REGISTER_KMOD_DEP(name, str) \ +static const char DRV_EXP_TAG(name, kmod_dep_export)[] \ +__attribute__((used)) = str + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h index d150b9dd..abf020bf 100644 --- a/lib/librte_eal/common/include/rte_eal.h +++ b/lib/librte_eal/common/include/rte_eal.h @@ -146,15 +146,45 @@ int rte_eal_iopl_init(void); * This behavior may change in the future. * * @param argc - * The argc argument that was given to the main() function. + * A non-negative value. If it is greater than 0, the array members + * for argv[0] through argv[argc] (non-inclusive) shall contain pointers + * to strings. * @param argv - * The argv argument that was given to the main() function. + * An array of strings. The contents of the array, as well as the strings + * which are pointed to by the array, may be modified by this function. * @return * - On success, the number of parsed arguments, which is greater or * equal to zero. After the call to rte_eal_init(), - * all arguments argv[x] with x < ret may be modified and should - * not be accessed by the application. - * - On failure, a negative error value. + * all arguments argv[x] with x < ret may have been modified by this + * function call and should not be further interpreted by the + * application. The EAL does not take any ownership of the memory used + * for either the argv array, or its members. + * - On failure, -1 and rte_errno is set to a value indicating the cause + * for failure. In some instances, the application will need to be + * restarted as part of clearing the issue. + * + * Error codes returned via rte_errno: + * EACCES indicates a permissions issue. + * + * EAGAIN indicates either a bus or system resource was not available, + * setup may be attempted again. + * + * EALREADY indicates that the rte_eal_init function has already been + * called, and cannot be called again. + * + * EFAULT indicates the tailq configuration name was not found in + * memory configuration. + * + * EINVAL indicates invalid parameters were passed as argv/argc. + * + * ENOMEM indicates failure likely caused by an out-of-memory condition. + * + * ENODEV indicates memory setup issues. + * + * ENOTSUP indicates that the EAL cannot initialize on this system. + * + * EPROTO indicates that the PCI bus is either not present, or is not + * readable by the eal. */ int rte_eal_init(int argc, char **argv); diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h index fd3c6eff..5d06ed79 100644 --- a/lib/librte_eal/common/include/rte_interrupts.h +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -51,8 +51,7 @@ extern "C" { struct rte_intr_handle; /** Function to be registered for the specific interrupt */ -typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle, - void *cb_arg); +typedef void (*rte_intr_callback_fn)(void *cb_arg); #include <exec-env/rte_interrupts.h> @@ -70,7 +69,7 @@ typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle, * - On success, zero. * - On failure, a negative value. */ -int rte_intr_callback_register(struct rte_intr_handle *intr_handle, +int rte_intr_callback_register(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg); /** @@ -88,7 +87,7 @@ int rte_intr_callback_register(struct rte_intr_handle *intr_handle, * - On success, return the number of callback entities removed. * - On failure, a negative value. */ -int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, +int rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg); /** @@ -101,7 +100,7 @@ int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, * - On success, zero. * - On failure, a negative value. */ -int rte_intr_enable(struct rte_intr_handle *intr_handle); +int rte_intr_enable(const struct rte_intr_handle *intr_handle); /** * It disables the interrupt for the specified handle. @@ -113,7 +112,7 @@ int rte_intr_enable(struct rte_intr_handle *intr_handle); * - On success, zero. * - On failure, a negative value. */ -int rte_intr_disable(struct rte_intr_handle *intr_handle); +int rte_intr_disable(const struct rte_intr_handle *intr_handle); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h index 29f7d192..34191385 100644 --- a/lib/librte_eal/common/include/rte_log.h +++ b/lib/librte_eal/common/include/rte_log.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,45 +50,56 @@ extern "C" { #include <stdio.h> #include <stdarg.h> +#include <rte_common.h> + +struct rte_log_dynamic_type; + /** The rte_log structure. */ struct rte_logs { uint32_t type; /**< Bitfield with enabled logs. */ uint32_t level; /**< Log level. */ FILE *file; /**< Output file set by rte_openlog_stream, or NULL. */ + size_t dynamic_types_len; + struct rte_log_dynamic_type *dynamic_types; }; /** Global log informations */ extern struct rte_logs rte_logs; /* SDK log type */ -#define RTE_LOGTYPE_EAL 0x00000001 /**< Log related to eal. */ -#define RTE_LOGTYPE_MALLOC 0x00000002 /**< Log related to malloc. */ -#define RTE_LOGTYPE_RING 0x00000004 /**< Log related to ring. */ -#define RTE_LOGTYPE_MEMPOOL 0x00000008 /**< Log related to mempool. */ -#define RTE_LOGTYPE_TIMER 0x00000010 /**< Log related to timers. */ -#define RTE_LOGTYPE_PMD 0x00000020 /**< Log related to poll mode driver. */ -#define RTE_LOGTYPE_HASH 0x00000040 /**< Log related to hash table. */ -#define RTE_LOGTYPE_LPM 0x00000080 /**< Log related to LPM. */ -#define RTE_LOGTYPE_KNI 0x00000100 /**< Log related to KNI. */ -#define RTE_LOGTYPE_ACL 0x00000200 /**< Log related to ACL. */ -#define RTE_LOGTYPE_POWER 0x00000400 /**< Log related to power. */ -#define RTE_LOGTYPE_METER 0x00000800 /**< Log related to QoS meter. */ -#define RTE_LOGTYPE_SCHED 0x00001000 /**< Log related to QoS port scheduler. */ -#define RTE_LOGTYPE_PORT 0x00002000 /**< Log related to port. */ -#define RTE_LOGTYPE_TABLE 0x00004000 /**< Log related to table. */ -#define RTE_LOGTYPE_PIPELINE 0x00008000 /**< Log related to pipeline. */ -#define RTE_LOGTYPE_MBUF 0x00010000 /**< Log related to mbuf. */ -#define RTE_LOGTYPE_CRYPTODEV 0x00020000 /**< Log related to cryptodev. */ +#define RTE_LOGTYPE_EAL 0 /**< Log related to eal. */ +#define RTE_LOGTYPE_MALLOC 1 /**< Log related to malloc. */ +#define RTE_LOGTYPE_RING 2 /**< Log related to ring. */ +#define RTE_LOGTYPE_MEMPOOL 3 /**< Log related to mempool. */ +#define RTE_LOGTYPE_TIMER 4 /**< Log related to timers. */ +#define RTE_LOGTYPE_PMD 5 /**< Log related to poll mode driver. */ +#define RTE_LOGTYPE_HASH 6 /**< Log related to hash table. */ +#define RTE_LOGTYPE_LPM 7 /**< Log related to LPM. */ +#define RTE_LOGTYPE_KNI 8 /**< Log related to KNI. */ +#define RTE_LOGTYPE_ACL 9 /**< Log related to ACL. */ +#define RTE_LOGTYPE_POWER 10 /**< Log related to power. */ +#define RTE_LOGTYPE_METER 11 /**< Log related to QoS meter. */ +#define RTE_LOGTYPE_SCHED 12 /**< Log related to QoS port scheduler. */ +#define RTE_LOGTYPE_PORT 13 /**< Log related to port. */ +#define RTE_LOGTYPE_TABLE 14 /**< Log related to table. */ +#define RTE_LOGTYPE_PIPELINE 15 /**< Log related to pipeline. */ +#define RTE_LOGTYPE_MBUF 16 /**< Log related to mbuf. */ +#define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */ +#define RTE_LOGTYPE_EFD 18 /**< Log related to EFD. */ +#define RTE_LOGTYPE_EVENTDEV 19 /**< Log related to eventdev. */ /* these log types can be used in an application */ -#define RTE_LOGTYPE_USER1 0x01000000 /**< User-defined log type 1. */ -#define RTE_LOGTYPE_USER2 0x02000000 /**< User-defined log type 2. */ -#define RTE_LOGTYPE_USER3 0x04000000 /**< User-defined log type 3. */ -#define RTE_LOGTYPE_USER4 0x08000000 /**< User-defined log type 4. */ -#define RTE_LOGTYPE_USER5 0x10000000 /**< User-defined log type 5. */ -#define RTE_LOGTYPE_USER6 0x20000000 /**< User-defined log type 6. */ -#define RTE_LOGTYPE_USER7 0x40000000 /**< User-defined log type 7. */ -#define RTE_LOGTYPE_USER8 0x80000000 /**< User-defined log type 8. */ +#define RTE_LOGTYPE_USER1 24 /**< User-defined log type 1. */ +#define RTE_LOGTYPE_USER2 25 /**< User-defined log type 2. */ +#define RTE_LOGTYPE_USER3 26 /**< User-defined log type 3. */ +#define RTE_LOGTYPE_USER4 27 /**< User-defined log type 4. */ +#define RTE_LOGTYPE_USER5 28 /**< User-defined log type 5. */ +#define RTE_LOGTYPE_USER6 29 /**< User-defined log type 6. */ +#define RTE_LOGTYPE_USER7 30 /**< User-defined log type 7. */ +#define RTE_LOGTYPE_USER8 31 /**< User-defined log type 8. */ + +/** First identifier for extended logs */ +#define RTE_LOGTYPE_FIRST_EXT_ID 32 /* Can't use 0, as it gives compiler warnings */ #define RTE_LOG_EMERG 1U /**< System is unusable. */ @@ -118,18 +129,32 @@ int rte_openlog_stream(FILE *f); /** * Set the global log level. * - * After this call, all logs that are lower or equal than level and - * lower or equal than the RTE_LOG_LEVEL configuration option will be - * displayed. + * After this call, logs with a level lower or equal than the level + * passed as argument will be displayed. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). */ +void rte_log_set_global_level(uint32_t level); + +/** + * Deprecated, replaced by rte_log_set_global_level(). + */ +__rte_deprecated void rte_set_log_level(uint32_t level); /** * Get the global log level. + * + * @return + * The current global log level. + */ +uint32_t rte_log_get_global_level(void); + +/** + * Deprecated, replaced by rte_log_get_global_level(). */ +__rte_deprecated uint32_t rte_get_log_level(void); /** @@ -140,14 +165,40 @@ uint32_t rte_get_log_level(void); * @param enable * True for enable; false for disable. */ +__rte_deprecated void rte_set_log_type(uint32_t type, int enable); /** * Get the global log type. */ +__rte_deprecated uint32_t rte_get_log_type(void); /** + * Set the log level for a given type. + * + * @param pattern + * The regexp identifying the log type. + * @param level + * The level to be set. + * @return + * 0 on success, a negative value if level is invalid. + */ +int rte_log_set_level_regexp(const char *pattern, uint32_t level); + +/** + * Set the log level for a given type. + * + * @param logtype + * The log type identifier. + * @param level + * The level to be set. + * @return + * 0 on success, a negative value if logtype or level is invalid. + */ +int rte_log_set_level(uint32_t logtype, uint32_t level); + +/** * Get the current loglevel for the message being processed. * * Before calling the user-defined stream for logging, the log @@ -176,6 +227,30 @@ int rte_log_cur_msg_loglevel(void); int rte_log_cur_msg_logtype(void); /** + * Register a dynamic log type + * + * If a log is already registered with the same type, the returned value + * is the same than the previous one. + * + * @param name + * The string identifying the log type. + * @return + * - >0: success, the returned value is the log type identifier. + * - (-ENONEM): cannot allocate memory. + */ +int rte_log_register(const char *name); + +/** + * Dump log information. + * + * Dump the global level and the registered log types. + * + * @param f + * The output stream where the dump should be sent. + */ +void rte_log_dump(FILE *f); + +/** * Generates a log message. * * The message will be sent in the stream defined by the previous call @@ -184,9 +259,8 @@ int rte_log_cur_msg_logtype(void); * The level argument determines if the log should be displayed or * not, depending on the global rte_logs variable. * - * The preferred alternative is the RTE_LOG() function because debug logs may - * be removed at compilation time if optimization is enabled. Moreover, - * logs are automatically prefixed by type when using the macro. + * The preferred alternative is the RTE_LOG() because it adds the + * level and type in the logged string. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). @@ -217,8 +291,8 @@ int rte_log(uint32_t level, uint32_t logtype, const char *format, ...) * not, depending on the global rte_logs variable. A trailing * newline may be added if needed. * - * The preferred alternative is the RTE_LOG() because debug logs may be - * removed at compilation time. + * The preferred alternative is the RTE_LOG() because it adds the + * level and type in the logged string. * * @param level * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). @@ -239,15 +313,8 @@ int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) /** * Generates a log message. * - * The RTE_LOG() is equivalent to rte_log() with two differences: - - * - RTE_LOG() can be used to remove debug logs at compilation time, - * depending on RTE_LOG_LEVEL configuration option, and compilation - * optimization level. If optimization is enabled, the tests - * involving constants only are pre-computed. If compilation is done - * with -O0, these tests will be done at run time. - * - The log level and log type names are smaller, for example: - * RTE_LOG(INFO, EAL, "this is a %s", "log"); + * The RTE_LOG() is a helper that prefixes the string with the log level + * and type, and call rte_log(). * * @param l * Log level. A value between EMERG (1) and DEBUG (8). The short name is @@ -263,7 +330,31 @@ int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) * - Negative on error. */ #define RTE_LOG(l, t, ...) \ - (void)((RTE_LOG_ ## l <= RTE_LOG_LEVEL) ? \ + rte_log(RTE_LOG_ ## l, \ + RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) + +/** + * Generates a log message for data path. + * + * Similar to RTE_LOG(), except that it is removed at compilation time + * if the RTE_LOG_DP_LEVEL configuration option is lower than the log + * level argument. + * + * @param l + * Log level. A value between EMERG (1) and DEBUG (8). The short name is + * expanded by the macro, so it cannot be an integer value. + * @param t + * The log type, for example, EAL. The short name is expanded by the + * macro, so it cannot be an integer value. + * @param ... + * The fmt string, as in printf(3), followed by the variable arguments + * required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +#define RTE_LOG_DP(l, t, ...) \ + (void)((RTE_LOG_ ## l <= RTE_LOG_DP_LEVEL) ? \ rte_log(RTE_LOG_ ## l, \ RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) : \ 0) diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h index 9ce88472..ab64c63c 100644 --- a/lib/librte_eal/common/include/rte_pci.h +++ b/lib/librte_eal/common/include/rte_pci.h @@ -85,12 +85,7 @@ extern "C" { #include <rte_debug.h> #include <rte_interrupts.h> #include <rte_dev.h> - -TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */ -TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */ - -extern struct pci_driver_list pci_driver_list; /**< Global list of PCI drivers. */ -extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. */ +#include <rte_bus.h> /** Pathname of PCI devices directory. */ const char *pci_get_sysfs_path(void); @@ -111,6 +106,25 @@ const char *pci_get_sysfs_path(void); /** Maximum number of PCI resources. */ #define PCI_MAX_RESOURCE 6 +/** Name of PCI Bus */ +#define PCI_BUS_NAME "PCI" + +/* Forward declarations */ +struct rte_pci_device; +struct rte_pci_driver; + +/** List of PCI devices */ +TAILQ_HEAD(rte_pci_device_list, rte_pci_device); +/** List of PCI drivers */ +TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver); + +/* PCI Bus iterators */ +#define FOREACH_DEVICE_ON_PCIBUS(p) \ + TAILQ_FOREACH(p, &(rte_pci_bus.device_list), next) + +#define FOREACH_DRIVER_ON_PCIBUS(p) \ + TAILQ_FOREACH(p, &(rte_pci_bus.driver_list), next) + /** * A structure describing an ID for a PCI driver. Each driver provides a * table of these IDs for each device that it supports. @@ -158,8 +172,15 @@ struct rte_pci_device { struct rte_pci_driver *driver; /**< Associated driver */ uint16_t max_vfs; /**< sriov enable if not zero */ enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ + char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */ }; +/** + * @internal + * Helper macro for drivers that need to convert to struct rte_pci_device. + */ +#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device) + /** Any PCI device identifier (vendor, device, ...) */ #define PCI_ANY_ID (0xffff) #define RTE_CLASS_ANY_ID (0xffffff) @@ -182,8 +203,6 @@ struct rte_pci_device { .subsystem_device_id = PCI_ANY_ID #endif -struct rte_pci_driver; - /** * Initialisation function for the driver called during PCI probing. */ @@ -200,20 +219,28 @@ typedef int (pci_remove_t)(struct rte_pci_device *); struct rte_pci_driver { TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ struct rte_driver driver; /**< Inherit core driver. */ + struct rte_pci_bus *bus; /**< PCI bus reference. */ pci_probe_t *probe; /**< Device Probe function. */ pci_remove_t *remove; /**< Device Remove function. */ const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ uint32_t drv_flags; /**< Flags contolling handling of device. */ }; +/** + * Structure describing the PCI bus + */ +struct rte_pci_bus { + struct rte_bus bus; /**< Inherit the generic class */ + struct rte_pci_device_list device_list; /**< List of PCI devices */ + struct rte_pci_driver_list driver_list; /**< List of PCI drivers */ +}; + /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ #define RTE_PCI_DRV_NEED_MAPPING 0x0001 -/** Device needs to be unbound even if no module is provided */ -#define RTE_PCI_DRV_FORCE_UNBIND 0x0004 /** Device driver supports link state interrupt */ #define RTE_PCI_DRV_INTR_LSC 0x0008 -/** Device driver supports detaching capability */ -#define RTE_PCI_DRV_DETACHABLE 0x0010 +/** Device driver supports device removal interrupt */ +#define RTE_PCI_DRV_INTR_RMV 0x0010 /** * A structure describing a PCI mapping. @@ -315,8 +342,8 @@ eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) * The output buffer size */ static inline void -rte_eal_pci_device_name(const struct rte_pci_addr *addr, - char *output, size_t size) +rte_pci_device_name(const struct rte_pci_addr *addr, + char *output, size_t size) { RTE_VERIFY(size >= PCI_PRI_STR_SIZE); RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT, @@ -366,20 +393,17 @@ rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, * @return * 0 on success, negative on error */ -int rte_eal_pci_scan(void); +int rte_pci_scan(void); /** - * Probe the PCI bus for registered drivers. - * - * Scan the content of the PCI bus, and call the probe() function for - * all registered drivers that have a matching entry in its id_table - * for discovered devices. + * Probe the PCI bus * * @return * - 0 on success. - * - Negative on error. + * - !0 on error. */ -int rte_eal_pci_probe(void); +int +rte_pci_probe(void); /** * Map the PCI device resources in user space virtual memory address @@ -396,7 +420,7 @@ int rte_eal_pci_probe(void); * 0 on success, negative on error and positive if no driver * is found for the device. */ -int rte_eal_pci_map_device(struct rte_pci_device *dev); +int rte_pci_map_device(struct rte_pci_device *dev); /** * Unmap this device @@ -405,7 +429,7 @@ int rte_eal_pci_map_device(struct rte_pci_device *dev); * A pointer to a rte_pci_device structure describing the device * to use */ -void rte_eal_pci_unmap_device(struct rte_pci_device *dev); +void rte_pci_unmap_device(struct rte_pci_device *dev); /** * @internal @@ -452,7 +476,7 @@ void pci_unmap_resource(void *requested_addr, size_t size); * - 0 on success. * - Negative on error. */ -int rte_eal_pci_probe_one(const struct rte_pci_addr *addr); +int rte_pci_probe_one(const struct rte_pci_addr *addr); /** * Close the single PCI device. @@ -467,7 +491,7 @@ int rte_eal_pci_probe_one(const struct rte_pci_addr *addr); * - 0 on success. * - Negative on error. */ -int rte_eal_pci_detach(const struct rte_pci_addr *addr); +int rte_pci_detach(const struct rte_pci_addr *addr); /** * Dump the content of the PCI bus. @@ -475,7 +499,7 @@ int rte_eal_pci_detach(const struct rte_pci_addr *addr); * @param f * A pointer to a file for output */ -void rte_eal_pci_dump(FILE *f); +void rte_pci_dump(FILE *f); /** * Register a PCI driver. @@ -484,7 +508,7 @@ void rte_eal_pci_dump(FILE *f); * A pointer to a rte_pci_driver structure describing the driver * to be registered. */ -void rte_eal_pci_register(struct rte_pci_driver *driver); +void rte_pci_register(struct rte_pci_driver *driver); /** Helper for PCI device registration from driver (eth, crypto) instance */ #define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ @@ -492,7 +516,7 @@ RTE_INIT(pciinitfn_ ##nm); \ static void pciinitfn_ ##nm(void) \ {\ (pci_drv).driver.name = RTE_STR(nm);\ - rte_eal_pci_register(&pci_drv); \ + rte_pci_register(&pci_drv); \ } \ RTE_PMD_EXPORT_NAME(nm, __COUNTER__) @@ -503,7 +527,7 @@ RTE_PMD_EXPORT_NAME(nm, __COUNTER__) * A pointer to a rte_pci_driver structure describing the driver * to be unregistered. */ -void rte_eal_pci_unregister(struct rte_pci_driver *driver); +void rte_pci_unregister(struct rte_pci_driver *driver); /** * Read PCI config space. @@ -518,8 +542,8 @@ void rte_eal_pci_unregister(struct rte_pci_driver *driver); * @param offset * The offset into PCI config space */ -int rte_eal_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset); +int rte_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset); /** * Write PCI config space. @@ -534,8 +558,8 @@ int rte_eal_pci_read_config(const struct rte_pci_device *device, * @param offset * The offset into PCI config space */ -int rte_eal_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset); +int rte_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset); /** * A structure used to access io resources for a pci device. @@ -563,8 +587,8 @@ struct rte_pci_ioport { * @return * 0 on success, negative on error. */ -int rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p); +int rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); /** * Release any resources used in a rte_pci_ioport object. @@ -574,7 +598,7 @@ int rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, * @return * 0 on success, negative on error. */ -int rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p); +int rte_pci_ioport_unmap(struct rte_pci_ioport *p); /** * Read from a io pci resource. @@ -588,8 +612,8 @@ int rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p); * @param offset * The offset into the pci io resource. */ -void rte_eal_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset); +void rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); /** * Write to a io pci resource. @@ -603,8 +627,8 @@ void rte_eal_pci_ioport_read(struct rte_pci_ioport *p, * @param offset * The offset into the pci io resource. */ -void rte_eal_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset); +void rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h index 784e837d..e6b678ea 100644 --- a/lib/librte_eal/common/include/rte_vdev.h +++ b/lib/librte_eal/common/include/rte_vdev.h @@ -39,6 +39,28 @@ extern "C" { #include <sys/queue.h> #include <rte_dev.h> +#include <rte_devargs.h> + +struct rte_vdev_device { + TAILQ_ENTRY(rte_vdev_device) next; /**< Next attached vdev */ + struct rte_device device; /**< Inherit core device */ +}; + +static inline const char * +rte_vdev_device_name(const struct rte_vdev_device *dev) +{ + if (dev && dev->device.devargs) + return dev->device.devargs->virt.drv_name; + return NULL; +} + +static inline const char * +rte_vdev_device_args(const struct rte_vdev_device *dev) +{ + if (dev && dev->device.devargs) + return dev->device.devargs->args; + return ""; +} /** Double linked list of virtual device drivers. */ TAILQ_HEAD(vdev_driver_list, rte_vdev_driver); @@ -46,12 +68,12 @@ TAILQ_HEAD(vdev_driver_list, rte_vdev_driver); /** * Probe function called for each virtual device driver once. */ -typedef int (rte_vdev_probe_t)(const char *name, const char *args); +typedef int (rte_vdev_probe_t)(struct rte_vdev_device *dev); /** * Remove function called for each virtual device driver once. */ -typedef int (rte_vdev_remove_t)(const char *name); +typedef int (rte_vdev_remove_t)(struct rte_vdev_device *dev); /** * A virtual device driver abstraction. @@ -70,7 +92,7 @@ struct rte_vdev_driver { * A pointer to a rte_vdev_driver structure describing the driver * to be registered. */ -void rte_eal_vdrv_register(struct rte_vdev_driver *driver); +void rte_vdev_register(struct rte_vdev_driver *driver); /** * Unregister a virtual device driver. @@ -79,7 +101,7 @@ void rte_eal_vdrv_register(struct rte_vdev_driver *driver); * A pointer to a rte_vdev_driver structure describing the driver * to be unregistered. */ -void rte_eal_vdrv_unregister(struct rte_vdev_driver *driver); +void rte_vdev_unregister(struct rte_vdev_driver *driver); #define RTE_PMD_REGISTER_VDEV(nm, vdrv)\ RTE_INIT(vdrvinitfn_ ##vdrv);\ @@ -88,7 +110,7 @@ static void vdrvinitfn_ ##vdrv(void)\ {\ (vdrv).driver.name = RTE_STR(nm);\ (vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\ - rte_eal_vdrv_register(&vdrv);\ + rte_vdev_register(&vdrv);\ } \ RTE_PMD_EXPORT_NAME(nm, __COUNTER__) diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 0de35fb7..07a085eb 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -56,17 +56,17 @@ extern "C" { /** * Major version/year number i.e. the yy in yy.mm.z */ -#define RTE_VER_YEAR 16 +#define RTE_VER_YEAR 17 /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 11 +#define RTE_VER_MONTH 5 /** * Patch level number i.e. the z in yy.mm.z */ -#define RTE_VER_MINOR 1 +#define RTE_VER_MINOR 0 /** * Extra string to be appended to version number diff --git a/lib/librte_eal/common/include/rte_warnings.h b/lib/librte_eal/common/include/rte_warnings.h deleted file mode 100644 index 54b545c9..00000000 --- a/lib/librte_eal/common/include/rte_warnings.h +++ /dev/null @@ -1,84 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * @file - * Definitions of warnings for use of various insecure functions - */ - -#ifndef _RTE_WARNINGS_H_ -#define _RTE_WARNINGS_H_ - -#ifdef RTE_INSECURE_FUNCTION_WARNING - -/* we need to include all used standard header files so that they appear - * _before_ we poison the function names. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#include <errno.h> -#ifdef RTE_EXEC_ENV_LINUXAPP -#include <dirent.h> -#endif - -/* the following function are deemed not fully secure for use e.g. they - * do not always null-terminate arguments */ -#pragma GCC poison sprintf strtok snprintf vsnprintf -#pragma GCC poison strlen strcpy strcat -#pragma GCC poison sscanf - -/* other unsafe functions may be implemented as macros so just undef them */ -#ifdef strsep -#undef strsep -#else -#pragma GCC poison strsep -#endif - -#ifdef strncpy -#undef strncpy -#else -#pragma GCC poison strncpy -#endif - -#ifdef strncat -#undef strncat -#else -#pragma GCC poison strncat -#endif - -#endif - -#endif /* RTE_WARNINGS_H */ diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile index 20d2a916..4794696b 100644 --- a/lib/librte_eal/linuxapp/Makefile +++ b/lib/librte_eal/linuxapp/Makefile @@ -34,6 +34,8 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio DIRS-$(CONFIG_RTE_KNI_KMOD) += kni +DEPDIRS-kni := eal DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0 +DEPDIRS-xen_dom0 := eal include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 4e206f09..640afd08 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -37,7 +37,7 @@ ARCH_DIR ?= $(RTE_ARCH) EXPORT_MAP := rte_eal_version.map VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) -LIBABIVER := 3 +LIBABIVER := 4 VPATH += $(RTE_SDK)/lib/librte_eal/common @@ -87,6 +87,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_cpuflags.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_bus.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c @@ -130,7 +131,4 @@ INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) -DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common -DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common/arch/$(ARCH_DIR) - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 2075282e..7c78f2dc 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -61,6 +61,7 @@ #include <rte_launch.h> #include <rte_eal.h> #include <rte_eal_memconfig.h> +#include <rte_errno.h> #include <rte_per_lcore.h> #include <rte_lcore.h> #include <rte_log.h> @@ -69,6 +70,7 @@ #include <rte_string_fns.h> #include <rte_cpuflags.h> #include <rte_interrupts.h> +#include <rte_bus.h> #include <rte_pci.h> #include <rte_dev.h> #include <rte_devargs.h> @@ -210,7 +212,7 @@ rte_eal_config_create(void) rte_panic("Cannot mmap memory for rte_config\n"); } memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); - rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + rte_config.mem_config = rte_mem_cfg_addr; /* store address of the config in the config itself so that secondary * processes could later map the config into this exact location */ @@ -490,8 +492,6 @@ eal_log_level_parse(int argc, char **argv) argvopt = argv; optind = 1; - eal_reset_internal_config(&internal_config); - while ((opt = getopt_long(argc, argvopt, eal_short_options, eal_long_options, &option_index)) != EOF) { @@ -739,6 +739,12 @@ static int rte_eal_vfio_setup(void) } #endif +static void rte_eal_init_alert(const char *msg) +{ + fprintf(stderr, "EAL: FATAL: %s\n", msg); + RTE_LOG(ERR, EAL, "%s\n", msg); +} + /* Launch threads, called at application init(). */ int rte_eal_init(int argc, char **argv) @@ -751,33 +757,51 @@ rte_eal_init(int argc, char **argv) char thread_name[RTE_MAX_THREAD_NAME_LEN]; /* checks if the machine is adequate */ - rte_cpu_check_supported(); + if (!rte_cpu_is_supported()) { + rte_eal_init_alert("unsupported cpu type."); + rte_errno = ENOTSUP; + return -1; + } - if (!rte_atomic32_test_and_set(&run_once)) + if (!rte_atomic32_test_and_set(&run_once)) { + rte_eal_init_alert("already called initialization."); + rte_errno = EALREADY; return -1; + } logid = strrchr(argv[0], '/'); logid = strdup(logid ? logid + 1: argv[0]); thread_id = pthread_self(); - eal_log_level_parse(argc, argv); + eal_reset_internal_config(&internal_config); /* set log level as early as possible */ - rte_set_log_level(internal_config.log_level); + eal_log_level_parse(argc, argv); - if (rte_eal_cpu_init() < 0) - rte_panic("Cannot detect lcores\n"); + if (rte_eal_cpu_init() < 0) { + rte_eal_init_alert("Cannot detect lcores."); + rte_errno = ENOTSUP; + return -1; + } fctret = eal_parse_args(argc, argv); - if (fctret < 0) - exit(1); + if (fctret < 0) { + rte_eal_init_alert("Invalid 'command line' arguments."); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && internal_config.xen_dom0_support == 0 && - eal_hugepage_info_init() < 0) - rte_panic("Cannot get hugepage information\n"); + eal_hugepage_info_init() < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { if (internal_config.no_hugetlbfs) @@ -799,39 +823,59 @@ rte_eal_init(int argc, char **argv) rte_config_init(); - if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) - rte_panic("Cannot init logs\n"); - - if (rte_eal_pci_init() < 0) - rte_panic("Cannot init PCI\n"); + if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) { + rte_eal_init_alert("Cannot init logging."); + rte_errno = ENOMEM; + rte_atomic32_clear(&run_once); + return -1; + } #ifdef VFIO_PRESENT - if (rte_eal_vfio_setup() < 0) - rte_panic("Cannot init VFIO\n"); + if (rte_eal_vfio_setup() < 0) { + rte_eal_init_alert("Cannot init VFIO\n"); + rte_errno = EAGAIN; + rte_atomic32_clear(&run_once); + return -1; + } #endif - if (rte_eal_memory_init() < 0) - rte_panic("Cannot init memory\n"); + if (rte_eal_memory_init() < 0) { + rte_eal_init_alert("Cannot init memory\n"); + rte_errno = ENOMEM; + return -1; + } /* the directories are locked during eal_hugepage_info_init */ eal_hugedirs_unlock(); - if (rte_eal_memzone_init() < 0) - rte_panic("Cannot init memzone\n"); + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone\n"); + rte_errno = ENODEV; + return -1; + } - if (rte_eal_tailqs_init() < 0) - rte_panic("Cannot init tail queues for objects\n"); + if (rte_eal_tailqs_init() < 0) { + rte_eal_init_alert("Cannot init tail queues for objects\n"); + rte_errno = EFAULT; + return -1; + } - if (rte_eal_alarm_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_eal_alarm_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + /* rte_eal_alarm_init sets rte_errno on failure. */ + return -1; + } - if (rte_eal_timer_init() < 0) - rte_panic("Cannot init HPET or TSC timers\n"); + if (rte_eal_timer_init() < 0) { + rte_eal_init_alert("Cannot init HPET or TSC timers\n"); + rte_errno = ENOTSUP; + return -1; + } eal_check_mem_on_local_socket(); if (eal_plugins_init() < 0) - rte_panic("Cannot init plugins\n"); + rte_eal_init_alert("Cannot init plugins\n"); eal_thread_init_master(rte_config.master_lcore); @@ -841,11 +885,16 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, (int)thread_id, cpuset, ret == 0 ? "" : "..."); - if (rte_eal_dev_init() < 0) - rte_panic("Cannot init pmd devices\n"); + if (rte_eal_intr_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + return -1; + } - if (rte_eal_intr_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + return -1; + } RTE_LCORE_FOREACH_SLAVE(i) { @@ -883,9 +932,12 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); - /* Probe & Initialize PCI devices */ - if (rte_eal_pci_probe()) - rte_panic("Cannot probe PCI\n"); + /* Probe all the buses and devices/drivers on them */ + if (rte_bus_probe()) { + rte_eal_init_alert("Cannot probe devices\n"); + rte_errno = ENOTSUP; + return -1; + } rte_eal_mcfg_complete(); diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c index 8b042abc..fbae4613 100644 --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -83,7 +83,7 @@ static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER; static struct rte_intr_handle intr_handle = {.fd = -1 }; static int handler_registered = 0; -static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg); +static void eal_alarm_callback(void *arg); int rte_eal_alarm_init(void) @@ -102,8 +102,7 @@ error: } static void -eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused, - void *arg __rte_unused) +eal_alarm_callback(void *arg __rte_unused) { struct timespec now; struct alarm_entry *ap; diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c index 5fbc17c5..e1c75548 100644 --- a/lib/librte_eal/linuxapp/eal/eal_debug.c +++ b/lib/librte_eal/linuxapp/eal/eal_debug.c @@ -31,7 +31,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifdef RTE_BACKTRACE #include <execinfo.h> +#endif #include <stdarg.h> #include <signal.h> #include <stdlib.h> @@ -47,6 +49,7 @@ /* dump the stack of the calling core */ void rte_dump_stack(void) { +#ifdef RTE_BACKTRACE void *func[BACKTRACE_SIZE]; char **symb = NULL; int size; @@ -64,6 +67,7 @@ void rte_dump_stack(void) } free(symb); +#endif /* RTE_BACKTRACE */ } /* not implemented in this environment */ diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 18858e2d..7a21e8f6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -283,9 +283,12 @@ eal_hugepage_info_init(void) struct dirent *dirent; dir = opendir(sys_dir_path); - if (dir == NULL) - rte_panic("Cannot open directory %s to read system hugepage " - "info\n", sys_dir_path); + if (dir == NULL) { + RTE_LOG(ERR, EAL, + "Cannot open directory %s to read system hugepage info\n", + sys_dir_path); + return -1; + } for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { struct hugepage_info *hpi; diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 47a3b20a..2e3bd12a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -46,6 +46,7 @@ #include <sys/ioctl.h> #include <sys/eventfd.h> #include <assert.h> +#include <stdbool.h> #include <rte_common.h> #include <rte_interrupts.h> @@ -136,7 +137,7 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(struct rte_intr_handle *intr_handle) { +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -183,7 +184,7 @@ vfio_enable_intx(struct rte_intr_handle *intr_handle) { /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(struct rte_intr_handle *intr_handle) { +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -194,14 +195,14 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) { irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; irq_set->start = 0; ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { - RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } @@ -226,7 +227,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) { /* enable MSI interrupts */ static int -vfio_enable_msi(struct rte_intr_handle *intr_handle) { +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { int len, ret; char irq_set_buf[IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -255,7 +256,7 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) { /* disable MSI interrupts */ static int -vfio_disable_msi(struct rte_intr_handle *intr_handle) { +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -280,7 +281,7 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) { /* enable MSI-X interrupts */ static int -vfio_enable_msix(struct rte_intr_handle *intr_handle) { +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { int len, ret; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -290,12 +291,10 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) { irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; - if (!intr_handle->max_intr) - intr_handle->max_intr = 1; - else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) - intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; - - irq_set->count = intr_handle->max_intr; + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ + irq_set->count = intr_handle->max_intr ? + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = 0; @@ -318,7 +317,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) { /* disable MSI-X interrupts */ static int -vfio_disable_msix(struct rte_intr_handle *intr_handle) { +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; int len, ret; @@ -343,7 +342,7 @@ vfio_disable_msix(struct rte_intr_handle *intr_handle) { #endif static int -uio_intx_intr_disable(struct rte_intr_handle *intr_handle) +uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) { unsigned char command_high; @@ -367,7 +366,7 @@ uio_intx_intr_disable(struct rte_intr_handle *intr_handle) } static int -uio_intx_intr_enable(struct rte_intr_handle *intr_handle) +uio_intx_intr_enable(const struct rte_intr_handle *intr_handle) { unsigned char command_high; @@ -391,7 +390,7 @@ uio_intx_intr_enable(struct rte_intr_handle *intr_handle) } static int -uio_intr_disable(struct rte_intr_handle *intr_handle) +uio_intr_disable(const struct rte_intr_handle *intr_handle) { const int value = 0; @@ -405,7 +404,7 @@ uio_intr_disable(struct rte_intr_handle *intr_handle) } static int -uio_intr_enable(struct rte_intr_handle *intr_handle) +uio_intr_enable(const struct rte_intr_handle *intr_handle) { const int value = 1; @@ -419,7 +418,7 @@ uio_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_callback_register(struct rte_intr_handle *intr_handle, +rte_intr_callback_register(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg) { int ret, wake_thread; @@ -491,7 +490,7 @@ rte_intr_callback_register(struct rte_intr_handle *intr_handle, } int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, +rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb_fn, void *cb_arg) { int ret; @@ -555,8 +554,11 @@ rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, } int -rte_intr_enable(struct rte_intr_handle *intr_handle) +rte_intr_enable(const struct rte_intr_handle *intr_handle) { + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; @@ -599,8 +601,11 @@ rte_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_disable(struct rte_intr_handle *intr_handle) +rte_intr_disable(const struct rte_intr_handle *intr_handle) { + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; @@ -645,6 +650,7 @@ rte_intr_disable(struct rte_intr_handle *intr_handle) static int eal_intr_process_interrupts(struct epoll_event *events, int nfds) { + bool call = false; int n, bytes_read; struct rte_intr_source *src; struct rte_intr_callback *cb; @@ -693,13 +699,18 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) bytes_read = sizeof(buf.vfio_intr_count); break; #endif + case RTE_INTR_HANDLE_VDEV: case RTE_INTR_HANDLE_EXT: + bytes_read = 0; + call = true; + break; + default: bytes_read = 1; break; } - if (src->intr_handle.type != RTE_INTR_HANDLE_EXT) { + if (bytes_read > 0) { /** * read out to clear the ready-to-be-read flag * for epoll_wait. @@ -716,12 +727,14 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) } else if (bytes_read == 0) RTE_LOG(ERR, EAL, "Read nothing from file " "descriptor %d\n", events[n].data.fd); + else + call = true; } /* grab a lock, again to call callbacks and update status. */ rte_spinlock_lock(&intr_lock); - if (bytes_read > 0) { + if (call) { /* Finally, call all callbacks. */ TAILQ_FOREACH(cb, &src->callbacks, next) { @@ -731,8 +744,7 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) rte_spinlock_unlock(&intr_lock); /* call the actual callback */ - active_cb.cb_fn(&src->intr_handle, - active_cb.cb_arg); + active_cb.cb_fn(active_cb.cb_arg); /*get the lock back. */ rte_spinlock_lock(&intr_lock); @@ -832,7 +844,7 @@ eal_intr_thread_main(__rte_unused void *arg) TAILQ_FOREACH(src, &intr_sources, next) { if (src->callbacks.tqh_first == NULL) continue; /* skip those with no callbacks */ - ev.events = EPOLLIN | EPOLLPRI; + ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; ev.data.fd = src->intr_handle.fd; /** @@ -872,13 +884,16 @@ rte_eal_intr_init(void) * create a pipe which will be waited by epoll and notified to * rebuild the wait list of epoll. */ - if (pipe(intr_pipe.pipefd) < 0) + if (pipe(intr_pipe.pipefd) < 0) { + rte_errno = errno; return -1; + } /* create the host thread to wait/handle the interrupt */ ret = pthread_create(&intr_thread, NULL, eal_intr_thread_main, NULL); if (ret != 0) { + rte_errno = ret; RTE_LOG(ERR, EAL, "Failed to create thread for interrupt handling\n"); } else { @@ -913,6 +928,14 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) bytes_read = sizeof(buf.vfio_intr_count); break; #endif + case RTE_INTR_HANDLE_VDEV: + /* for vdev, fd points to: + * a. eventfd which does not need to read out; + * b. datapath fd which needs PMD to read out. + */ + return; + case RTE_INTR_HANDLE_EXT: + return; default: bytes_read = 1; RTE_LOG(INFO, EAL, "unexpected intr type\n"); @@ -1141,6 +1164,24 @@ rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, return rc; } +void +rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle) +{ + uint32_t i; + struct rte_epoll_event *rev; + + for (i = 0; i < intr_handle->nb_efd; i++) { + rev = &intr_handle->elist[i]; + if (rev->status == RTE_EPOLL_INVALID) + continue; + if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { + /* force free if the entry valid */ + eal_epoll_data_safe_free(rev); + rev->status = RTE_EPOLL_INVALID; + } + } +} + int rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) { @@ -1157,12 +1198,14 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) RTE_LOG(ERR, EAL, "can't setup eventfd, error %i (%s)\n", errno, strerror(errno)); - return -1; + return -errno; } intr_handle->efds[i] = fd; } intr_handle->nb_efd = n; intr_handle->max_intr = NB_OTHER_INTR + n; + } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) { + /* do nothing, and let vdev driver to initialize this struct */ } else { intr_handle->efds[0] = intr_handle->fd; intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); @@ -1176,19 +1219,8 @@ void rte_intr_efd_disable(struct rte_intr_handle *intr_handle) { uint32_t i; - struct rte_epoll_event *rev; - - for (i = 0; i < intr_handle->nb_efd; i++) { - rev = &intr_handle->elist[i]; - if (rev->status == RTE_EPOLL_INVALID) - continue; - if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { - /* force free if the entry valid */ - eal_epoll_data_safe_free(rev); - rev->status = RTE_EPOLL_INVALID; - } - } + rte_intr_free_epoll_fd(intr_handle); if (intr_handle->max_intr > intr_handle->nb_efd) { for (i = 0; i < intr_handle->nb_efd; i++) close(intr_handle->efds[i]); @@ -1218,5 +1250,8 @@ rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) return 1; + if (intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 1; + return 0; } diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index a956bb22..ebe06833 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -64,6 +64,7 @@ #define _FILE_OFFSET_BITS 64 #include <errno.h> #include <stdarg.h> +#include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <stdint.h> @@ -122,26 +123,28 @@ int rte_xen_dom0_supported(void) static uint64_t baseaddr_offset; -static unsigned proc_pagemap_readable; +static bool phys_addrs_available = true; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" static void -test_proc_pagemap_readable(void) +test_phys_addrs_available(void) { - int fd = open("/proc/self/pagemap", O_RDONLY); + uint64_t tmp; + phys_addr_t physaddr; - if (fd < 0) { + /* For dom0, phys addresses can always be available */ + if (rte_xen_dom0_supported()) + return; + + physaddr = rte_mem_virt2phy(&tmp); + if (physaddr == RTE_BAD_PHYS_ADDR) { RTE_LOG(ERR, EAL, - "Cannot open /proc/self/pagemap: %s. " - "virt2phys address translation will not work\n", + "Cannot obtain physical addresses: %s. " + "Only vfio will function.\n", strerror(errno)); - return; + phys_addrs_available = false; } - - /* Is readable */ - close(fd); - proc_pagemap_readable = 1; } /* Lock page in physical memory and prevent from swapping. */ @@ -190,7 +193,7 @@ rte_mem_virt2phy(const void *virtaddr) } /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ - if (!proc_pagemap_readable) + if (!phys_addrs_available) return RTE_BAD_PHYS_ADDR; /* standard page size */ @@ -229,6 +232,9 @@ rte_mem_virt2phy(const void *virtaddr) * the pfn (page frame number) are bits 0-54 (see * pagemap.txt in linux Documentation) */ + if ((page & 0x7fffffffffffffULL) == 0) + return RTE_BAD_PHYS_ADDR; + physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size); @@ -242,7 +248,7 @@ rte_mem_virt2phy(const void *virtaddr) static int find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) { - unsigned i; + unsigned int i; phys_addr_t addr; for (i = 0; i < hpi->num_pages[0]; i++) { @@ -255,6 +261,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) } /* + * For each hugepage in hugepg_tbl, fill the physaddr value sequentially. + */ +static int +set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned int i; + static phys_addr_t addr; + + for (i = 0; i < hpi->num_pages[0]; i++) { + hugepg_tbl[i].physaddr = addr; + addr += hugepg_tbl[i].size; + } + return 0; +} + +/* * Check whether address-space layout randomization is enabled in * the kernel. This is important for multi-process as it can prevent * two processes mapping data to the same virtual address @@ -313,7 +335,13 @@ get_virtual_area(size_t *size, size_t hugepage_sz) } do { addr = mmap(addr, - (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0); + (*size) + hugepage_sz, PROT_READ, +#ifdef RTE_ARCH_PPC_64 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, +#else + MAP_PRIVATE, +#endif + fd, 0); if (addr == MAP_FAILED) *size -= hugepage_sz; } while (addr == MAP_FAILED && *size > 0); @@ -592,12 +620,12 @@ static int cmp_physaddr(const void *a, const void *b) { #ifndef RTE_ARCH_PPC_64 - const struct hugepage_file *p1 = (const struct hugepage_file *)a; - const struct hugepage_file *p2 = (const struct hugepage_file *)b; + const struct hugepage_file *p1 = a; + const struct hugepage_file *p2 = b; #else /* PowerPC needs memory sorted in reverse order from x86 */ - const struct hugepage_file *p1 = (const struct hugepage_file *)b; - const struct hugepage_file *p2 = (const struct hugepage_file *)a; + const struct hugepage_file *p1 = b; + const struct hugepage_file *p2 = a; #endif if (p1->physaddr < p2->physaddr) return -1; @@ -951,7 +979,7 @@ rte_eal_hugepage_init(void) int nr_hugefiles, nr_hugepages = 0; void *addr; - test_proc_pagemap_readable(); + test_phys_addrs_available(); memset(used_hp, 0, sizeof(used_hp)); @@ -1043,11 +1071,22 @@ rte_eal_hugepage_init(void) continue; } - /* find physical addresses and sockets for each hugepage */ - if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; + if (phys_addrs_available) { + /* find physical addresses for each hugepage */ + if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { + RTE_LOG(DEBUG, EAL, "Failed to find phys addr " + "for %u MB pages\n", + (unsigned int)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + } else { + /* set physical addresses for each hugepage */ + if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { + RTE_LOG(DEBUG, EAL, "Failed to set phys addr " + "for %u MB pages\n", + (unsigned int)(hpi->hugepage_sz / 0x100000)); + goto fail; + } } if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){ @@ -1289,7 +1328,7 @@ rte_eal_hugepage_attach(void) "into secondary processes\n"); } - test_proc_pagemap_readable(); + test_phys_addrs_available(); if (internal_config.xen_dom0_support) { #ifdef RTE_LIBRTE_XEN_DOM0 @@ -1330,7 +1369,13 @@ rte_eal_hugepage_attach(void) * use mmap to get identical addresses as the primary process. */ base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, - PROT_READ, MAP_PRIVATE, fd_zero, 0); + PROT_READ, +#ifdef RTE_ARCH_PPC_64 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, +#else + MAP_PRIVATE, +#endif + fd_zero, 0); if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) { max_seg = s; @@ -1426,3 +1471,9 @@ error: close(fd_hugepage); return -1; } + +bool +rte_eal_using_phys_addrs(void) +{ + return phys_addrs_available; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 876ba381..595622b2 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -35,6 +35,7 @@ #include <dirent.h> #include <rte_log.h> +#include <rte_bus.h> #include <rte_pci.h> #include <rte_eal_memconfig.h> #include <rte_malloc.h> @@ -54,44 +55,7 @@ * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). */ -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev) -{ - int n; - FILE *f; - char filename[PATH_MAX]; - char buf[BUFSIZ]; - struct rte_pci_addr *loc = &dev->addr; - - /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ - snprintf(filename, sizeof(filename), - "%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - - f = fopen(filename, "w"); - if (f == NULL) /* device was not bound */ - return 0; - - n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n", - loc->domain, loc->bus, loc->devid, loc->function); - if ((n < 0) || (n >= (int)sizeof(buf))) { - RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__); - goto error; - } - if (fwrite(buf, n, 1, f) == 0) { - RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__, - filename); - goto error; - } - - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} +extern struct rte_pci_bus rte_pci_bus; static int pci_get_kernel_driver_by_path(const char *filename, char *dri_name) @@ -124,7 +88,7 @@ pci_get_kernel_driver_by_path(const char *filename, char *dri_name) /* Map pci device */ int -rte_eal_pci_map_device(struct rte_pci_device *dev) +rte_pci_map_device(struct rte_pci_device *dev) { int ret = -1; @@ -138,8 +102,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) break; case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); + if (rte_eal_using_phys_addrs()) { + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + } break; default: RTE_LOG(DEBUG, EAL, @@ -153,12 +119,15 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) /* Unmap pci device */ void -rte_eal_pci_unmap_device(struct rte_pci_device *dev) +rte_pci_unmap_device(struct rte_pci_device *dev) { /* try unmapping the NIC resources using VFIO if it exists */ switch (dev->kdrv) { case RTE_KDRV_VFIO: - RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n"); +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + pci_vfio_unmap_resource(dev); +#endif break; case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: @@ -267,8 +236,7 @@ error: /* Scan one pci sysfs entry, and fill the devices list from it. */ static int -pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, - uint8_t devid, uint8_t function) +pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) { char filename[PATH_MAX]; unsigned long tmp; @@ -281,10 +249,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, return -1; memset(dev, 0, sizeof(*dev)); - dev->addr.domain = domain; - dev->addr.bus = bus; - dev->addr.devid = devid; - dev->addr.function = function; + dev->addr = *addr; /* get vendor id */ snprintf(filename, sizeof(filename), "%s/vendor", dirname); @@ -359,6 +324,9 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dev->device.numa_node = tmp; } + rte_pci_device_name(addr, dev->name, sizeof(dev->name)); + dev->device.name = dev->name; + /* parse resources */ snprintf(filename, sizeof(filename), "%s/resource", dirname); if (pci_parse_sysfs_resource(filename, dev) < 0) { @@ -389,21 +357,19 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dev->kdrv = RTE_KDRV_NONE; /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&pci_device_list)) { - rte_eal_device_insert(&dev->device); - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); } else { struct rte_pci_device *dev2; int ret; - TAILQ_FOREACH(dev2, &pci_device_list, next) { + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); if (ret > 0) continue; if (ret < 0) { - TAILQ_INSERT_BEFORE(dev2, dev, next); - rte_eal_device_insert(&dev->device); + rte_pci_insert_device(dev2, dev); } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; @@ -413,8 +379,8 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, } return 0; } - rte_eal_device_insert(&dev->device); - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + + rte_pci_add_device(dev); } return 0; @@ -429,16 +395,14 @@ pci_update_device(const struct rte_pci_addr *addr) pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, addr->function); - return pci_scan_one(filename, addr->domain, addr->bus, addr->devid, - addr->function); + return pci_scan_one(filename, addr); } /* * split up a pci address into its constituent parts. */ static int -parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, - uint8_t *bus, uint8_t *devid, uint8_t *function) +parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) { /* first split on ':' */ union splitaddr { @@ -466,10 +430,10 @@ parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, /* now convert to int values */ errno = 0; - *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); - *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); - *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); - *function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + addr->domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); + addr->bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); + addr->devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); + addr->function = (uint8_t)strtoul(splitaddr.function, NULL, 10); if (errno != 0) goto error; @@ -485,13 +449,16 @@ error: * list */ int -rte_eal_pci_scan(void) +rte_pci_scan(void) { struct dirent *e; DIR *dir; char dirname[PATH_MAX]; - uint16_t domain; - uint8_t bus, devid, function; + struct rte_pci_addr addr; + + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; dir = opendir(pci_get_sysfs_path()); if (dir == NULL) { @@ -504,13 +471,13 @@ rte_eal_pci_scan(void) if (e->d_name[0] == '.') continue; - if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain, - &bus, &devid, &function) != 0) + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) continue; snprintf(dirname, sizeof(dirname), "%s/%s", pci_get_sysfs_path(), e->d_name); - if (pci_scan_one(dirname, domain, bus, devid, function) < 0) + + if (pci_scan_one(dirname, &addr) < 0) goto error; } closedir(dir); @@ -522,8 +489,8 @@ error: } /* Read PCI config space. */ -int rte_eal_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset) +int rte_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset) { const struct rte_intr_handle *intr_handle = &device->intr_handle; @@ -547,8 +514,8 @@ int rte_eal_pci_read_config(const struct rte_pci_device *device, } /* Write PCI config space. */ -int rte_eal_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset) +int rte_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset) { const struct rte_intr_handle *intr_handle = &device->intr_handle; @@ -574,7 +541,7 @@ int rte_eal_pci_write_config(const struct rte_pci_device *device, #if defined(RTE_ARCH_X86) static int pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, - struct rte_pci_ioport *p) + struct rte_pci_ioport *p) { uint16_t start, end; FILE *fp; @@ -632,8 +599,8 @@ pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, #endif int -rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) +rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) { int ret = -1; @@ -670,8 +637,8 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, } void -rte_eal_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) +rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { #ifdef VFIO_PRESENT @@ -696,8 +663,8 @@ rte_eal_pci_ioport_read(struct rte_pci_ioport *p, } void -rte_eal_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) +rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { #ifdef VFIO_PRESENT @@ -722,7 +689,7 @@ rte_eal_pci_ioport_write(struct rte_pci_ioport *p, } int -rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) +rte_pci_ioport_unmap(struct rte_pci_ioport *p) { int ret = -1; @@ -754,19 +721,3 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) return ret; } - -/* Init the PCI EAL subsystem */ -int -rte_eal_pci_init(void) -{ - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - if (rte_eal_pci_scan() < 0) { - RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); - return -1; - } - - return 0; -} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h index 6a960d1b..ae2980d6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h @@ -88,8 +88,9 @@ void pci_vfio_ioport_write(struct rte_pci_ioport *p, const void *data, size_t len, off_t offset); int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); -/* map VFIO resource prototype */ +/* map/unmap VFIO resource prototype */ int pci_vfio_map_resource(struct rte_pci_device *dev); +int pci_vfio_unmap_resource(struct rte_pci_device *dev); #endif diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index 3e4ffb57..fa10329f 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -38,6 +38,7 @@ #include <inttypes.h> #include <sys/stat.h> #include <sys/mman.h> +#include <sys/sysmacros.h> #include <linux/pci_regs.h> #if defined(RTE_ARCH_X86) @@ -230,7 +231,7 @@ pci_uio_free_resource(struct rte_pci_device *dev, close(dev->intr_handle.uio_cfg_fd); dev->intr_handle.uio_cfg_fd = -1; } - if (dev->intr_handle.fd) { + if (dev->intr_handle.fd >= 0) { close(dev->intr_handle.fd); dev->intr_handle.fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index 5f478c59..2be13195 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -38,6 +38,7 @@ #include <sys/socket.h> #include <sys/ioctl.h> #include <sys/mman.h> +#include <stdbool.h> #include <rte_log.h> #include <rte_pci.h> @@ -172,7 +173,7 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, /* set PCI bus mastering */ static int -pci_vfio_set_bus_master(int dev_fd) +pci_vfio_set_bus_master(int dev_fd, bool op) { uint16_t reg; int ret; @@ -185,8 +186,11 @@ pci_vfio_set_bus_master(int dev_fd) return -1; } - /* set the master bit */ - reg |= PCI_COMMAND_MASTER; + if (op) + /* set the master bit */ + reg |= PCI_COMMAND_MASTER; + else + reg &= ~(PCI_COMMAND_MASTER); ret = pwrite64(dev_fd, ®, sizeof(reg), VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + @@ -355,7 +359,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } else { /* if we're in a secondary process, just find our tailq entry */ TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + if (rte_eal_compare_pci_addr(&vfio_res->pci_addr, + &dev->addr)) continue; break; } @@ -517,7 +522,7 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd)) { + if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr); close(vfio_dev_fd); rte_free(vfio_res); @@ -535,6 +540,79 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } int +pci_vfio_unmap_resource(struct rte_pci_device *dev) +{ + char pci_addr[PATH_MAX] = {0}; + struct rte_pci_addr *loc = &dev->addr; + int i, ret; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list; + + struct pci_map *maps; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + + if (close(dev->intr_handle.fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", + pci_addr); + return -1; + } + + if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { + RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", + pci_addr); + return -1; + } + + ret = vfio_release_device(pci_get_sysfs_path(), pci_addr, + dev->intr_handle.vfio_dev_fd); + if (ret < 0) { + RTE_LOG(ERR, EAL, + "%s(): cannot release device\n", __func__); + return ret; + } + + vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + /* Get vfio_res */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + continue; + break; + } + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + return -1; + } + + /* unmap BARs */ + maps = vfio_res->maps; + + RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n", + pci_addr); + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + + /* + * We do not need to be aware of MSI-X table BAR mappings as + * when mapping. Just using current maps array is enough + */ + if (maps[i].addr) { + RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n", + pci_addr, maps[i].addr); + pci_unmap_resource(maps[i].addr, maps[i].size); + } + } + + TAILQ_REMOVE(vfio_res_list, vfio_res, next); + + return 0; +} + +int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) { diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 702f7a2e..53ac725d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -50,12 +50,15 @@ static struct vfio_config vfio_cfg; static int vfio_type1_dma_map(int); +static int vfio_spapr_dma_map(int); static int vfio_noiommu_dma_map(int); /* IOMMU types we support */ static const struct vfio_iommu_type iommu_types[] = { /* x86 IOMMU, otherwise known as type 1 */ { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + /* ppc64 IOMMU, otherwise known as spapr */ + { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map}, /* IOMMU-less mode */ { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, }; @@ -65,13 +68,32 @@ vfio_get_group_fd(int iommu_group_no) { int i; int vfio_group_fd; + int group_idx = -1; char filename[PATH_MAX]; /* check if we already have the group descriptor open */ - for (i = 0; i < vfio_cfg.vfio_group_idx; i++) + for (i = 0; i < VFIO_MAX_GROUPS; i++) if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no) return vfio_cfg.vfio_groups[i].fd; + /* Lets see first if there is room for a new group */ + if (vfio_cfg.vfio_active_groups == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + return -1; + } + + /* Now lets get an index for the new group */ + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg.vfio_groups[i].group_no == -1) { + group_idx = i; + break; + } + + /* This should not happen */ + if (group_idx == -1) { + RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); + return -1; + } /* if primary, try to open the group */ if (internal_config.process_type == RTE_PROC_PRIMARY) { /* try regular group format */ @@ -101,14 +123,9 @@ vfio_get_group_fd(int iommu_group_no) /* noiommu group found */ } - /* if the fd is valid, create a new group for it */ - if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) { - RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); - close(vfio_group_fd); - return -1; - } - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; + vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no; + vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd; + vfio_cfg.vfio_active_groups++; return vfio_group_fd; } /* if we're in a secondary process, request group fd from the primary @@ -155,14 +172,115 @@ vfio_get_group_fd(int iommu_group_no) return -1; } + +static int +get_vfio_group_idx(int vfio_group_fd) +{ + int i; + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd) + return i; + return -1; +} + +static void +vfio_group_device_get(int vfio_group_fd) +{ + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > VFIO_MAX_GROUPS) + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + else + vfio_cfg.vfio_groups[i].devices++; +} + static void -clear_current_group(void) +vfio_group_device_put(int vfio_group_fd) { - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1; + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > VFIO_MAX_GROUPS) + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + else + vfio_cfg.vfio_groups[i].devices--; +} + +static int +vfio_group_device_count(int vfio_group_fd) +{ + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + return -1; + } + + return vfio_cfg.vfio_groups[i].devices; +} + +int +clear_group(int vfio_group_fd) +{ + int i; + int socket_fd, ret; + + if (internal_config.process_type == RTE_PROC_PRIMARY) { + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0) + return -1; + vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].fd = -1; + vfio_cfg.vfio_groups[i].devices = 0; + vfio_cfg.vfio_active_groups--; + return 0; + } + + /* This is just for SECONDARY processes */ + socket_fd = vfio_mp_sync_connect_to_primary(); + + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); + return -1; + } + + if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) { + RTE_LOG(ERR, EAL, " cannot request container fd!\n"); + close(socket_fd); + return -1; + } + + if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) { + RTE_LOG(ERR, EAL, " cannot send group fd!\n"); + close(socket_fd); + return -1; + } + + ret = vfio_mp_sync_receive_request(socket_fd); + switch (ret) { + case SOCKET_NO_FD: + RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n"); + close(socket_fd); + break; + case SOCKET_OK: + close(socket_fd); + return 0; + case SOCKET_ERR: + RTE_LOG(ERR, EAL, " Socket error\n"); + close(socket_fd); + break; + default: + RTE_LOG(ERR, EAL, " UNKNOWN reply, %d\n", ret); + close(socket_fd); + } + return -1; } -int vfio_setup_device(const char *sysfs_base, const char *dev_addr, +int +vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info) { struct vfio_group_status group_status = { @@ -189,18 +307,10 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr, if (vfio_group_fd < 0) return -1; - /* store group fd */ - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; - /* if group_fd == 0, that means the device isn't managed by VFIO */ if (vfio_group_fd == 0) { - RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", dev_addr); - /* we store 0 as group fd to distinguish between existing but - * unbound VFIO groups, and groups that don't exist at all. - */ - vfio_cfg.vfio_group_idx++; return 1; } @@ -215,12 +325,12 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr, RTE_LOG(ERR, EAL, " %s cannot get group status, " "error %i (%s)\n", dev_addr, errno, strerror(errno)); close(vfio_group_fd); - clear_current_group(); + clear_group(vfio_group_fd); return -1; } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", dev_addr); close(vfio_group_fd); - clear_current_group(); + clear_group(vfio_group_fd); return -1; } @@ -234,60 +344,131 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr, RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, " "error %i (%s)\n", dev_addr, errno, strerror(errno)); close(vfio_group_fd); - clear_current_group(); + clear_group(vfio_group_fd); return -1; } + /* - * at this point we know that this group has been successfully - * initialized, so we increment vfio_group_idx to indicate that we can - * add new groups. + * pick an IOMMU type and set up DMA mappings for container + * + * needs to be done only once, only when first group is + * assigned to a container and only in primary process. + * Note this can happen several times with the hotplug + * functionality. */ - vfio_cfg.vfio_group_idx++; - } - - /* - * pick an IOMMU type and set up DMA mappings for container - * - * needs to be done only once, only when at least one group is assigned to - * a container and only in primary process - */ - if (internal_config.process_type == RTE_PROC_PRIMARY && - vfio_cfg.vfio_container_has_dma == 0) { - /* select an IOMMU type which we will be using */ - const struct vfio_iommu_type *t = + if (internal_config.process_type == RTE_PROC_PRIMARY && + vfio_cfg.vfio_active_groups == 1) { + /* select an IOMMU type which we will be using */ + const struct vfio_iommu_type *t = vfio_set_iommu_type(vfio_cfg.vfio_container_fd); - if (!t) { - RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", dev_addr); - return -1; - } - ret = t->dma_map_func(vfio_cfg.vfio_container_fd); - if (ret) { - RTE_LOG(ERR, EAL, " %s DMA remapping failed, " - "error %i (%s)\n", dev_addr, errno, strerror(errno)); - return -1; + if (!t) { + RTE_LOG(ERR, EAL, + " %s failed to select IOMMU type\n", + dev_addr); + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; + } + ret = t->dma_map_func(vfio_cfg.vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, + " %s DMA remapping failed, error %i (%s)\n", + dev_addr, errno, strerror(errno)); + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; + } } - vfio_cfg.vfio_container_has_dma = 1; } /* get a file descriptor for the device */ *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr); if (*vfio_dev_fd < 0) { - /* if we cannot get a device fd, this simply means that this - * particular port is not bound to VFIO - */ - RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + /* if we cannot get a device fd, this implies a problem with + * the VFIO group or the container not having IOMMU configured. + */ + + RTE_LOG(WARNING, EAL, "Getting a vfio_dev_fd for %s failed\n", dev_addr); - return 1; + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; } /* test and setup the device */ ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info); if (ret) { RTE_LOG(ERR, EAL, " %s cannot get device info, " - "error %i (%s)\n", dev_addr, errno, strerror(errno)); + "error %i (%s)\n", dev_addr, errno, + strerror(errno)); close(*vfio_dev_fd); + close(vfio_group_fd); + clear_group(vfio_group_fd); return -1; } + vfio_group_device_get(vfio_group_fd); + + return 0; +} + +int +vfio_release_device(const char *sysfs_base, const char *dev_addr, + int vfio_dev_fd) +{ + struct vfio_group_status group_status = { + .argsz = sizeof(group_status) + }; + int vfio_group_fd; + int iommu_group_no; + int ret; + + /* get group number */ + ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no); + if (ret <= 0) { + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n", + dev_addr); + /* This is an error at this point. */ + return -1; + } + + /* get the actual group fd */ + vfio_group_fd = vfio_get_group_fd(iommu_group_no); + if (vfio_group_fd <= 0) { + RTE_LOG(INFO, EAL, "vfio_get_group_fd failed for %s\n", + dev_addr); + return -1; + } + + /* At this point we got an active group. Closing it will make the + * container detachment. If this is the last active group, VFIO kernel + * code will unset the container and the IOMMU mappings. + */ + + /* Closing a device */ + if (close(vfio_dev_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n", + dev_addr); + return -1; + } + + /* An VFIO group can have several devices attached. Just when there is + * no devices remaining should the group be closed. + */ + vfio_group_device_put(vfio_group_fd); + if (!vfio_group_device_count(vfio_group_fd)) { + + if (close(vfio_group_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n", + dev_addr); + return -1; + } + + if (clear_group(vfio_group_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when clearing group for %s\n", + dev_addr); + return -1; + } + } return 0; } @@ -302,6 +483,7 @@ vfio_enable(const char *modname) for (i = 0; i < VFIO_MAX_GROUPS; i++) { vfio_cfg.vfio_groups[i].fd = -1; vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].devices = 0; } /* inform the user that we are probing for VFIO */ @@ -531,7 +713,8 @@ vfio_type1_dma_map(int vfio_container_fd) if (ret) { RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " - "error %i (%s)\n", errno, strerror(errno)); + "error %i (%s)\n", errno, + strerror(errno)); return -1; } } @@ -540,6 +723,93 @@ vfio_type1_dma_map(int vfio_container_fd) } static int +vfio_spapr_dma_map(int vfio_container_fd) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + int i, ret; + + struct vfio_iommu_spapr_register_memory reg = { + .argsz = sizeof(reg), + .flags = 0 + }; + struct vfio_iommu_spapr_tce_info info = { + .argsz = sizeof(info), + }; + struct vfio_iommu_spapr_tce_create create = { + .argsz = sizeof(create), + }; + struct vfio_iommu_spapr_tce_remove remove = { + .argsz = sizeof(remove), + }; + + /* query spapr iommu info */ + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + RTE_LOG(ERR, EAL, " cannot get iommu info, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* remove default DMA of 32 bit window */ + remove.start_addr = info.dma32_window_start; + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + if (ret) { + RTE_LOG(ERR, EAL, " cannot remove default DMA window, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* calculate window size based on number of hugepages configured */ + create.window_size = rte_eal_get_physmem_size(); + create.page_shift = __builtin_ctzll(ms->hugepage_sz); + create.levels = 2; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); + if (ret) { + RTE_LOG(ERR, EAL, " cannot create new DMA window, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + struct vfio_iommu_type1_dma_map dma_map; + + if (ms[i].addr == NULL) + break; + + reg.vaddr = (uintptr_t) ms[i].addr; + reg.size = ms[i].len; + ret = ioctl(vfio_container_fd, + VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®); + if (ret) { + RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + memset(&dma_map, 0, sizeof(dma_map)); + dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); + dma_map.vaddr = ms[i].addr_64; + dma_map.size = ms[i].len; + dma_map.iova = ms[i].phys_addr; + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | + VFIO_DMA_MAP_FLAG_WRITE; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); + + if (ret) { + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + } + + return 0; +} + +static int vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) { /* No-IOMMU mode does not need DMA mapping */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 29f7f3ec..5ff63e5d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -54,6 +54,62 @@ #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU +#ifndef VFIO_SPAPR_TCE_v2_IOMMU +#define RTE_VFIO_SPAPR 7 +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + +struct vfio_iommu_spapr_register_memory { + uint32_t argsz; + uint32_t flags; + uint64_t vaddr; + uint64_t size; +}; + +struct vfio_iommu_spapr_tce_create { + uint32_t argsz; + uint32_t flags; + /* in */ + uint32_t page_shift; + uint32_t __resv1; + uint64_t window_size; + uint32_t levels; + uint32_t __resv2; + /* out */ + uint64_t start_addr; +}; + +struct vfio_iommu_spapr_tce_remove { + uint32_t argsz; + uint32_t flags; + /* in */ + uint64_t start_addr; +}; + +struct vfio_iommu_spapr_tce_ddw_info { + uint64_t pgsizes; + uint32_t max_dynamic_windows_supported; + uint32_t levels; +}; + +/* SPAPR_v2 is not present, but SPAPR might be */ +#ifndef VFIO_SPAPR_TCE_IOMMU +#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +struct vfio_iommu_spapr_tce_info { + uint32_t argsz; + uint32_t flags; + uint32_t dma32_window_start; + uint32_t dma32_window_size; + struct vfio_iommu_spapr_tce_ddw_info ddw; +}; +#endif /* VFIO_SPAPR_TCE_IOMMU */ + +#else /* VFIO_SPAPR_TCE_v2_IOMMU */ +#define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU +#endif + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) #define RTE_VFIO_NOIOMMU 8 #else @@ -78,13 +134,13 @@ int vfio_mp_sync_connect_to_primary(void); struct vfio_group { int group_no; int fd; + int devices; }; struct vfio_config { int vfio_enabled; int vfio_container_fd; - int vfio_container_has_dma; - int vfio_group_idx; + int vfio_active_groups; struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; }; @@ -130,6 +186,10 @@ vfio_get_group_no(const char *sysfs_base, int vfio_get_group_fd(int iommu_group_no); +/* remove group fd from internal VFIO group fd array */ +int +clear_group(int vfio_group_fd); + /** * Setup vfio_cfg for the device identified by its address. It discovers * the configured I/O MMU groups or sets a new one for the device. If a new @@ -140,6 +200,8 @@ vfio_get_group_fd(int iommu_group_no); int vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info); +int vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); + int vfio_enable(const char *modname); int vfio_is_enabled(const char *modname); @@ -150,6 +212,7 @@ int vfio_mp_sync_setup(void); #define SOCKET_REQ_CONTAINER 0x100 #define SOCKET_REQ_GROUP 0x200 +#define SOCKET_CLR_GROUP 0x300 #define SOCKET_OK 0x0 #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c index fb4a2f84..7e8095cb 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c @@ -267,7 +267,7 @@ vfio_mp_sync_connect_to_primary(void) static __attribute__((noreturn)) void * vfio_mp_sync_thread(void __rte_unused * arg) { - int ret, fd, vfio_group_no; + int ret, fd, vfio_data; /* wait for requests on the socket */ for (;;) { @@ -305,13 +305,13 @@ vfio_mp_sync_thread(void __rte_unused * arg) break; case SOCKET_REQ_GROUP: /* wait for group number */ - vfio_group_no = vfio_mp_sync_receive_request(conn_sock); - if (vfio_group_no < 0) { + vfio_data = vfio_mp_sync_receive_request(conn_sock); + if (vfio_data < 0) { close(conn_sock); continue; } - fd = vfio_get_group_fd(vfio_group_no); + fd = vfio_get_group_fd(vfio_data); if (fd < 0) vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); @@ -324,6 +324,21 @@ vfio_mp_sync_thread(void __rte_unused * arg) vfio_mp_sync_send_fd(conn_sock, fd); } break; + case SOCKET_CLR_GROUP: + /* wait for group fd */ + vfio_data = vfio_mp_sync_receive_request(conn_sock); + if (vfio_data < 0) { + close(conn_sock); + continue; + } + + ret = clear_group(vfio_data); + + if (ret < 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); + else + vfio_mp_sync_send_request(conn_sock, SOCKET_OK); + break; default: vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); break; diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h index d459bf48..6daffebf 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -49,8 +49,9 @@ enum rte_intr_handle_type { RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */ RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */ - RTE_INTR_HANDLE_ALARM, /**< alarm handle */ - RTE_INTR_HANDLE_EXT, /**< external handler */ + RTE_INTR_HANDLE_ALARM, /**< alarm handle */ + RTE_INTR_HANDLE_EXT, /**< external handler */ + RTE_INTR_HANDLE_VDEV, /**< virtual device */ RTE_INTR_HANDLE_MAX }; @@ -171,6 +172,15 @@ rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, int op, unsigned int vec, void *data); /** + * It deletes registered eventfds. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +void +rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle); + +/** * It enables the packet I/O interrupt event if it's necessary. * It creates event fd for each interrupt vector when MSIX is used, * otherwise it multiplexes a single event fd. diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h index 09713b0c..2ac879fd 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -116,11 +116,10 @@ struct rte_kni_fifo { struct rte_kni_mbuf { void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); uint64_t buf_physaddr; - char pad0[2]; uint16_t data_off; /**< Start address of data in segment buffer. */ char pad1[2]; - uint8_t nb_segs; /**< Number of segments. */ - char pad4[1]; + uint16_t nb_segs; /**< Number of segments. */ + char pad4[2]; uint64_t ol_flags; /**< Offload features. */ char pad2[4]; uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index 83721ba5..670bab3a 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -6,8 +6,6 @@ DPDK_2.0 { eal_parse_sysfs_value; eal_timer_source; lcore_config; - pci_device_list; - pci_driver_list; per_lcore__lcore_id; per_lcore__rte_errno; rte_calloc; @@ -22,12 +20,9 @@ DPDK_2.0 { rte_dump_tailq; rte_eal_alarm_cancel; rte_eal_alarm_set; - rte_eal_dev_init; rte_eal_devargs_add; rte_eal_devargs_dump; rte_eal_devargs_type_count; - rte_eal_driver_register; - rte_eal_driver_unregister; rte_eal_get_configuration; rte_eal_get_lcore_state; rte_eal_get_physmem_layout; @@ -40,18 +35,10 @@ DPDK_2.0 { rte_eal_mp_remote_launch; rte_eal_mp_wait_lcore; rte_eal_parse_devargs_str; - rte_eal_pci_dump; - rte_eal_pci_probe; - rte_eal_pci_probe_one; - rte_eal_pci_register; - rte_eal_pci_scan; - rte_eal_pci_unregister; rte_eal_process_type; rte_eal_remote_launch; rte_eal_tailq_lookup; rte_eal_tailq_register; - rte_eal_vdev_init; - rte_eal_vdev_uninit; rte_eal_wait_lcore; rte_exit; rte_free; @@ -66,11 +53,8 @@ DPDK_2.0 { rte_intr_disable; rte_intr_enable; rte_log; - rte_log_add_in_history; rte_log_cur_msg_loglevel; rte_log_cur_msg_logtype; - rte_log_dump_history; - rte_log_set_history; rte_logs; rte_malloc; rte_malloc_dump_stats; @@ -114,9 +98,6 @@ DPDK_2.0 { DPDK_2.1 { global: - rte_eal_pci_detach; - rte_eal_pci_read_config; - rte_eal_pci_write_config; rte_epoll_ctl; rte_epoll_wait; rte_intr_allow_others; @@ -146,12 +127,6 @@ DPDK_16.04 { global: rte_cpu_get_flag_name; - rte_eal_pci_ioport_map; - rte_eal_pci_ioport_read; - rte_eal_pci_ioport_unmap; - rte_eal_pci_ioport_write; - rte_eal_pci_map_device; - rte_eal_pci_unmap_device; rte_eal_primary_proc_alive; } DPDK_2.2; @@ -174,7 +149,52 @@ DPDK_16.11 { rte_delay_us_callback_register; rte_eal_dev_attach; rte_eal_dev_detach; - rte_eal_vdrv_register; - rte_eal_vdrv_unregister; } DPDK_16.07; + +DPDK_17.02 { + global: + + rte_bus_dump; + rte_bus_probe; + rte_bus_register; + rte_bus_scan; + rte_bus_unregister; + +} DPDK_16.11; + +DPDK_17.05 { + global: + + rte_cpu_is_supported; + rte_intr_free_epoll_fd; + rte_log_dump; + rte_log_get_global_level; + rte_log_register; + rte_log_set_global_level; + rte_log_set_level; + rte_log_set_level_regexp; + rte_pci_detach; + rte_pci_dump; + rte_pci_ioport_map; + rte_pci_ioport_read; + rte_pci_ioport_unmap; + rte_pci_ioport_write; + rte_pci_map_device; + rte_pci_probe; + rte_pci_probe_one; + rte_pci_read_config; + rte_pci_register; + rte_pci_scan; + rte_pci_unmap_device; + rte_pci_unregister; + rte_pci_write_config; + rte_vdev_init; + rte_vdev_register; + rte_vdev_uninit; + rte_vdev_unregister; + vfio_get_container_fd; + vfio_get_group_fd; + vfio_get_group_no; + +} DPDK_17.02; diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h index 0d781e48..b800a53c 100644 --- a/lib/librte_eal/linuxapp/igb_uio/compat.h +++ b/lib/librte_eal/linuxapp/igb_uio/compat.h @@ -123,3 +123,7 @@ static bool pci_check_and_mask_intx(struct pci_dev *pdev) } #endif /* < 3.3.0 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) +#define HAVE_PCI_ENABLE_MSIX +#endif diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index df41e457..b9d427c5 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -314,7 +314,7 @@ igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info) } } - return (iom != 0) ? ret : -ENOENT; + return (iom != 0 || iop != 0) ? ret : -ENOENT; } #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) @@ -325,7 +325,11 @@ static int igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct rte_uio_pci_dev *udev; +#ifdef HAVE_PCI_ENABLE_MSIX struct msix_entry msix_entry; +#endif + dma_addr_t map_dma_addr; + void *map_addr; int err; udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL); @@ -379,18 +383,28 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) switch (igbuio_intr_mode_preferred) { case RTE_INTR_MODE_MSIX: /* Only 1 msi-x vector needed */ +#ifdef HAVE_PCI_ENABLE_MSIX msix_entry.entry = 0; if (pci_enable_msix(dev, &msix_entry, 1) == 0) { dev_dbg(&dev->dev, "using MSI-X"); + udev->info.irq_flags = IRQF_NO_THREAD; udev->info.irq = msix_entry.vector; udev->mode = RTE_INTR_MODE_MSIX; break; } +#else + if (pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_MSIX) == 1) { + dev_dbg(&dev->dev, "using MSI-X"); + udev->info.irq = pci_irq_vector(dev, 0); + udev->mode = RTE_INTR_MODE_MSIX; + break; + } +#endif /* fall back to INTX */ case RTE_INTR_MODE_LEGACY: if (pci_intx_mask_supported(dev)) { dev_dbg(&dev->dev, "using INTX"); - udev->info.irq_flags = IRQF_SHARED; + udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD; udev->info.irq = dev->irq; udev->mode = RTE_INTR_MODE_LEGACY; break; @@ -423,6 +437,27 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_info(&dev->dev, "uio device registered with irq %lx\n", udev->info.irq); + /* + * Doing a harmless dma mapping for attaching the device to + * the iommu identity mapping if kernel boots with iommu=pt. + * Note this is not a problem if no IOMMU at all. + */ + map_addr = dma_alloc_coherent(&dev->dev, 1024, &map_dma_addr, + GFP_KERNEL); + if (map_addr) + memset(map_addr, 0, 1024); + + if (!map_addr) + dev_info(&dev->dev, "dma mapping failed\n"); + else { + dev_info(&dev->dev, "mapping 1K dma=%#llx host=%p\n", + (unsigned long long)map_dma_addr, map_addr); + + dma_free_coherent(&dev->dev, 1024, map_addr, map_dma_addr); + dev_info(&dev->dev, "unmapping 1K dma=%#llx host=%p\n", + (unsigned long long)map_dma_addr, map_addr); + } + return 0; fail_remove_group: diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile index 4e99e07e..154c528d 100644 --- a/lib/librte_eal/linuxapp/kni/Makefile +++ b/lib/librte_eal/linuxapp/kni/Makefile @@ -44,45 +44,43 @@ MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -I$(SRCDIR)/ethtool/ixgbe -I$(SRCDIR)/e MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h MODULE_CFLAGS += -Wall -Werror -ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu) -MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .) +-include /etc/lsb-release + +ifeq ($(DISTRIB_ID),Ubuntu) +MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(subst .,,$(DISTRIB_RELEASE)) UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \ | cut -d '"' -f2 | cut -d- -f1,2 | tr .- ,`,1) MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))" endif -# this lib needs main eal -DEPDIRS-y += lib/librte_eal/linuxapp/eal - # # all source are stored in SRCS-y # -SRCS-y := ethtool/ixgbe/ixgbe_main.c -SRCS-y += ethtool/ixgbe/ixgbe_api.c -SRCS-y += ethtool/ixgbe/ixgbe_common.c -SRCS-y += ethtool/ixgbe/ixgbe_ethtool.c -SRCS-y += ethtool/ixgbe/ixgbe_82599.c -SRCS-y += ethtool/ixgbe/ixgbe_82598.c -SRCS-y += ethtool/ixgbe/ixgbe_x540.c -SRCS-y += ethtool/ixgbe/ixgbe_phy.c -SRCS-y += ethtool/ixgbe/kcompat.c +SRCS-y := kni_misc.c +SRCS-y += kni_net.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += kni_ethtool.c -SRCS-y += ethtool/igb/e1000_82575.c -SRCS-y += ethtool/igb/e1000_i210.c -SRCS-y += ethtool/igb/e1000_api.c -SRCS-y += ethtool/igb/e1000_mac.c -SRCS-y += ethtool/igb/e1000_manage.c -SRCS-y += ethtool/igb/e1000_mbx.c -SRCS-y += ethtool/igb/e1000_nvm.c -SRCS-y += ethtool/igb/e1000_phy.c -SRCS-y += ethtool/igb/igb_ethtool.c -SRCS-y += ethtool/igb/igb_main.c -SRCS-y += ethtool/igb/igb_param.c -SRCS-y += ethtool/igb/igb_vmdq.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_main.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_api.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_common.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_ethtool.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_82599.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_82598.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_x540.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_phy.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/kcompat.c -SRCS-y += kni_misc.c -SRCS-y += kni_net.c -SRCS-y += kni_ethtool.c -SRCS-$(CONFIG_RTE_KNI_VHOST) += kni_vhost.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_82575.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_i210.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_api.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_mac.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_manage.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_mbx.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_nvm.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_phy.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_ethtool.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_main.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_param.c +SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_vmdq.c include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h index 78da08e5..d96275af 100644 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -2,6 +2,8 @@ * Minimal wrappers to allow compiling kni on older kernels. */ +#include <linux/version.h> + #ifndef RHEL_RELEASE_VERSION #define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) #endif @@ -67,3 +69,7 @@ (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) #undef NET_NAME_UNKNOWN #endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c index d7a987d5..95e262b7 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c @@ -1126,7 +1126,7 @@ static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data) static irqreturn_t igb_test_intr(int irq, void *data) { - struct igb_adapter *adapter = (struct igb_adapter *) data; + struct igb_adapter *adapter = data; struct e1000_hw *hw = &adapter->hw; adapter->test_icr |= E1000_READ_REG(hw, E1000_ICR); diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c index f4dca5a3..5f1f3a6b 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c @@ -1031,8 +1031,15 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) for (i = 0; i < numvecs; i++) adapter->msix_entries[i].entry = i; +#ifdef HAVE_PCI_ENABLE_MSIX err = pci_enable_msix(pdev, adapter->msix_entries, numvecs); +#else + err = pci_enable_msix_range(pdev, + adapter->msix_entries, + numvecs, + numvecs); +#endif if (err == 0) break; } @@ -1629,7 +1636,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter) */ static int igb_get_i2c_data(void *data) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = data; struct e1000_hw *hw = &adapter->hw; s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); @@ -1644,7 +1651,7 @@ static int igb_get_i2c_data(void *data) */ static void igb_set_i2c_data(void *data, int state) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = data; struct e1000_hw *hw = &adapter->hw; s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); @@ -1669,7 +1676,7 @@ static void igb_set_i2c_data(void *data, int state) */ static void igb_set_i2c_clk(void *data, int state) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = data; struct e1000_hw *hw = &adapter->hw; s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); @@ -1691,7 +1698,7 @@ static void igb_set_i2c_clk(void *data, int state) */ static int igb_get_i2c_clk(void *data) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = data; struct e1000_hw *hw = &adapter->hw; s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index 84826b26..4c52da3c 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -710,6 +710,9 @@ struct _kc_ethtool_pauseparam { #elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) ) /* SLES12 is at least 3.12.28+ based */ #define SLE_VERSION_CODE SLE_VERSION(12,0,0) +#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57)) +/* SLES12SP3 is at least 4.4.57+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0) #endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */ #endif /* CONFIG_SUSE_KERNEL */ #ifndef SLE_VERSION_CODE @@ -3929,8 +3932,13 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) #define vlan_tx_tag_present skb_vlan_tag_present #endif -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) ) +#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) || \ + (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(12, 3, 0))) #define HAVE_VF_VLAN_PROTO -#endif /* >= 4.9.0 */ +#endif /* >= 4.9.0, >= SLES12SP3 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) +#define HAVE_PCI_ENABLE_MSIX +#endif #endif /* _KCOMPAT_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c index bc3cb2f4..cdfcb959 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c @@ -1462,7 +1462,7 @@ static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data) static irqreturn_t ixgbe_test_intr(int irq, void *data) { - struct net_device *netdev = (struct net_device *) data; + struct net_device *netdev = data; struct ixgbe_adapter *adapter = netdev_priv(netdev); adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR); @@ -2447,7 +2447,7 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, break; case ETHTOOL_GRXCLSRLALL: ret = ixgbe_get_ethtool_fdir_all(adapter, cmd, - (u32 *)rule_locs); + rule_locs); break; case ETHTOOL_GRXFH: ret = ixgbe_get_rss_hash_opts(adapter, cmd); diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h index 58cbadd3..72385ab4 100644 --- a/lib/librte_eal/linuxapp/kni/kni_dev.h +++ b/lib/librte_eal/linuxapp/kni/kni_dev.h @@ -30,17 +30,19 @@ #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include "compat.h" + #include <linux/if.h> #include <linux/wait.h> +#ifdef HAVE_SIGNAL_FUNCTIONS_OWN_HEADER +#include <linux/sched/signal.h> +#else #include <linux/sched.h> +#endif #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/list.h> -#ifdef RTE_KNI_VHOST -#include <net/sock.h> -#endif - #include <exec-env/rte_kni_common.h> #define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ @@ -102,15 +104,6 @@ struct kni_dev { /* synchro for request processing */ unsigned long synchro; -#ifdef RTE_KNI_VHOST - struct kni_vhost_queue *vhost_queue; - - volatile enum { - BE_STOP = 0x1, - BE_START = 0x2, - BE_FINISH = 0x4, - } vq_status; -#endif /* buffers */ void *pa[MBUF_BURST_SZ]; void *va[MBUF_BURST_SZ]; @@ -118,26 +111,6 @@ struct kni_dev { void *alloc_va[MBUF_BURST_SZ]; }; -#ifdef RTE_KNI_VHOST -uint32_t -kni_poll(struct file *file, struct socket *sock, poll_table * wait); -int kni_chk_vhost_rx(struct kni_dev *kni); -int kni_vhost_init(struct kni_dev *kni); -int kni_vhost_backend_release(struct kni_dev *kni); - -struct kni_vhost_queue { - struct sock sk; - struct socket *sock; - int vnet_hdr_sz; - struct kni_dev *kni; - int sockfd; - uint32_t flags; - struct sk_buff *cache; - struct rte_kni_fifo *fifo; -}; - -#endif - void kni_net_rx(struct kni_dev *kni); void kni_net_init(struct net_device *dev); void kni_net_config_lo_mode(char *lo_str); diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h index 025ec1c9..14f4141f 100644 --- a/lib/librte_eal/linuxapp/kni/kni_fifo.h +++ b/lib/librte_eal/linuxapp/kni/kni_fifo.h @@ -91,18 +91,4 @@ kni_fifo_free_count(struct rte_kni_fifo *fifo) return (fifo->read - fifo->write - 1) & (fifo->len - 1); } -#ifdef RTE_KNI_VHOST -/** - * Initializes the kni fifo structure - */ -static inline void -kni_fifo_init(struct rte_kni_fifo *fifo, uint32_t size) -{ - fifo->write = 0; - fifo->read = 0; - fifo->len = size; - fifo->elem_size = sizeof(void *); -} -#endif - #endif /* _KNI_FIFO_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c index 497db9bd..7590f1fd 100644 --- a/lib/librte_eal/linuxapp/kni/kni_misc.c +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c @@ -140,11 +140,7 @@ kni_thread_single(void *data) down_read(&knet->kni_list_lock); for (j = 0; j < KNI_RX_LOOP_NUM; j++) { list_for_each_entry(dev, &knet->kni_list_head, list) { -#ifdef RTE_KNI_VHOST - kni_chk_vhost_rx(dev); -#else kni_net_rx(dev); -#endif kni_net_poll_resp(dev); } } @@ -163,15 +159,11 @@ static int kni_thread_multiple(void *param) { int j; - struct kni_dev *dev = (struct kni_dev *)param; + struct kni_dev *dev = param; while (!kthread_should_stop()) { for (j = 0; j < KNI_RX_LOOP_NUM; j++) { -#ifdef RTE_KNI_VHOST - kni_chk_vhost_rx(dev); -#else kni_net_rx(dev); -#endif kni_net_poll_resp(dev); } #ifdef RTE_KNI_PREEMPT_DEFAULT @@ -205,12 +197,14 @@ kni_dev_remove(struct kni_dev *dev) if (!dev) return -ENODEV; +#ifdef RTE_KNI_KMOD_ETHTOOL if (dev->pci_dev) { if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev)) ixgbe_kni_remove(dev->pci_dev); else if (pci_match_id(igb_pci_tbl, dev->pci_dev)) igb_kni_remove(dev->pci_dev); } +#endif if (dev->net_dev) { unregister_netdev(dev->net_dev); @@ -246,9 +240,6 @@ kni_release(struct inode *inode, struct file *file) dev->pthread = NULL; } -#ifdef RTE_KNI_VHOST - kni_vhost_backend_release(dev); -#endif kni_dev_remove(dev); list_del(&dev->list); } @@ -326,11 +317,13 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, struct kni_net *knet = net_generic(net, kni_net_id); int ret; struct rte_kni_device_info dev_info; - struct pci_dev *pci = NULL; - struct pci_dev *found_pci = NULL; struct net_device *net_dev = NULL; - struct net_device *lad_dev = NULL; struct kni_dev *kni, *dev, *n; +#ifdef RTE_KNI_KMOD_ETHTOOL + struct pci_dev *found_pci = NULL; + struct net_device *lad_dev = NULL; + struct pci_dev *pci = NULL; +#endif pr_info("Creating kni...\n"); /* Check the buffer size, to avoid warning */ @@ -344,6 +337,12 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, return -EIO; } + /* Check if name is zero-ended */ + if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) { + pr_err("kni.name not zero-terminated"); + return -EINVAL; + } + /** * Check if the cpu core id is valid for binding. */ @@ -363,8 +362,8 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, up_read(&knet->kni_list_lock); net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, -#ifdef NET_NAME_UNKNOWN - NET_NAME_UNKNOWN, +#ifdef NET_NAME_USER + NET_NAME_USER, #endif kni_net_init); if (net_dev == NULL) { @@ -392,10 +391,6 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, kni->sync_va = dev_info.sync_va; kni->sync_kva = phys_to_virt(dev_info.sync_phys); -#ifdef RTE_KNI_VHOST - kni->vhost_queue = NULL; - kni->vq_status = BE_STOP; -#endif kni->mbuf_size = dev_info.mbuf_size; pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", @@ -418,7 +413,7 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, dev_info.function, dev_info.vendor_id, dev_info.device_id); - +#ifdef RTE_KNI_KMOD_ETHTOOL pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL); /* Support Ethtool */ @@ -459,6 +454,7 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, } if (pci) pci_dev_put(pci); +#endif if (kni->lad_dev) ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr); @@ -479,10 +475,6 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, return -ENODEV; } -#ifdef RTE_KNI_VHOST - kni_vhost_init(kni); -#endif - ret = kni_run_thread(knet, kni, dev_info.force_bind); if (ret != 0) return ret; @@ -526,9 +518,6 @@ kni_ioctl_release(struct net *net, uint32_t ioctl_num, dev->pthread = NULL; } -#ifdef RTE_KNI_VHOST - kni_vhost_backend_release(dev); -#endif kni_dev_remove(dev); list_del(&dev->list); ret = 0; diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c index 4ac99cfe..db9f4898 100644 --- a/lib/librte_eal/linuxapp/kni/kni_net.c +++ b/lib/librte_eal/linuxapp/kni/kni_net.c @@ -198,18 +198,6 @@ kni_net_config(struct net_device *dev, struct ifmap *map) /* * Transmit a packet (called by the kernel) */ -#ifdef RTE_KNI_VHOST -static int -kni_net_tx(struct sk_buff *skb, struct net_device *dev) -{ - struct kni_dev *kni = netdev_priv(dev); - - dev_kfree_skb(skb); - kni->stats.tx_dropped++; - - return NETDEV_TX_OK; -} -#else static int kni_net_tx(struct sk_buff *skb, struct net_device *dev) { @@ -289,7 +277,6 @@ drop: return NETDEV_TX_OK; } -#endif /* * RX: normal working mode diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c b/lib/librte_eal/linuxapp/kni/kni_vhost.c deleted file mode 100644 index f54c34b1..00000000 --- a/lib/librte_eal/linuxapp/kni/kni_vhost.c +++ /dev/null @@ -1,842 +0,0 @@ -/*- - * GPL LICENSE SUMMARY - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * The full GNU General Public License is included in this distribution - * in the file called LICENSE.GPL. - * - * Contact Information: - * Intel Corporation - */ - -#include <linux/module.h> -#include <linux/net.h> -#include <net/sock.h> -#include <linux/virtio_net.h> -#include <linux/wait.h> -#include <linux/mm.h> -#include <linux/nsproxy.h> -#include <linux/sched.h> -#include <linux/if_tun.h> -#include <linux/version.h> -#include <linux/file.h> - -#include "compat.h" -#include "kni_dev.h" -#include "kni_fifo.h" - -#define RX_BURST_SZ 4 - -#ifdef HAVE_STATIC_SOCK_MAP_FD -static int kni_sock_map_fd(struct socket *sock) -{ - struct file *file; - int fd = get_unused_fd_flags(0); - - if (fd < 0) - return fd; - - file = sock_alloc_file(sock, 0, NULL); - if (IS_ERR(file)) { - put_unused_fd(fd); - return PTR_ERR(file); - } - fd_install(fd, file); - return fd; -} -#endif - -static struct proto kni_raw_proto = { - .name = "kni_vhost", - .owner = THIS_MODULE, - .obj_size = sizeof(struct kni_vhost_queue), -}; - -static inline int -kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, - uint32_t offset, uint32_t len) -{ - struct rte_kni_mbuf *pkt_kva = NULL; - struct rte_kni_mbuf *pkt_va = NULL; - int ret; - - pr_debug("tx offset=%d, len=%d, iovlen=%d\n", -#ifdef HAVE_IOV_ITER_MSGHDR - offset, len, (int)m->msg_iter.iov->iov_len); -#else - offset, len, (int)m->msg_iov->iov_len); -#endif - - /** - * Check if it has at least one free entry in tx_q and - * one entry in alloc_q. - */ - if (kni_fifo_free_count(kni->tx_q) == 0 || - kni_fifo_count(kni->alloc_q) == 0) { - /** - * If no free entry in tx_q or no entry in alloc_q, - * drops skb and goes out. - */ - goto drop; - } - - /* dequeue a mbuf from alloc_q */ - ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1); - if (likely(ret == 1)) { - void *data_kva; - - pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; - data_kva = pkt_kva->buf_addr + pkt_kva->data_off - - kni->mbuf_va + kni->mbuf_kva; - -#ifdef HAVE_IOV_ITER_MSGHDR - copy_from_iter(data_kva, len, &m->msg_iter); -#else - memcpy_fromiovecend(data_kva, m->msg_iov, offset, len); -#endif - - if (unlikely(len < ETH_ZLEN)) { - memset(data_kva + len, 0, ETH_ZLEN - len); - len = ETH_ZLEN; - } - pkt_kva->pkt_len = len; - pkt_kva->data_len = len; - - /* enqueue mbuf into tx_q */ - ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); - if (unlikely(ret != 1)) { - /* Failing should not happen */ - pr_err("Fail to enqueue mbuf into tx_q\n"); - goto drop; - } - } else { - /* Failing should not happen */ - pr_err("Fail to dequeue mbuf from alloc_q\n"); - goto drop; - } - - /* update statistics */ - kni->stats.tx_bytes += len; - kni->stats.tx_packets++; - - return 0; - -drop: - /* update statistics */ - kni->stats.tx_dropped++; - - return 0; -} - -static inline int -kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, - uint32_t offset, uint32_t len) -{ - uint32_t pkt_len; - struct rte_kni_mbuf *kva; - struct rte_kni_mbuf *va; - void *data_kva; - struct sk_buff *skb; - struct kni_vhost_queue *q = kni->vhost_queue; - - if (unlikely(q == NULL)) - return 0; - - /* ensure at least one entry in free_q */ - if (unlikely(kni_fifo_free_count(kni->free_q) == 0)) - return 0; - - skb = skb_dequeue(&q->sk.sk_receive_queue); - if (unlikely(skb == NULL)) - return 0; - - kva = (struct rte_kni_mbuf *)skb->data; - - /* free skb to cache */ - skb->data = NULL; - if (unlikely(kni_fifo_put(q->fifo, (void **)&skb, 1) != 1)) - /* Failing should not happen */ - pr_err("Fail to enqueue entries into rx cache fifo\n"); - - pkt_len = kva->data_len; - if (unlikely(pkt_len > len)) - goto drop; - - pr_debug("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n", -#ifdef HAVE_IOV_ITER_MSGHDR - offset, len, pkt_len, (int)m->msg_iter.iov->iov_len); -#else - offset, len, pkt_len, (int)m->msg_iov->iov_len); -#endif - - data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva; -#ifdef HAVE_IOV_ITER_MSGHDR - if (unlikely(copy_to_iter(data_kva, pkt_len, &m->msg_iter))) -#else - if (unlikely(memcpy_toiovecend(m->msg_iov, data_kva, offset, pkt_len))) -#endif - goto drop; - - /* Update statistics */ - kni->stats.rx_bytes += pkt_len; - kni->stats.rx_packets++; - - /* enqueue mbufs into free_q */ - va = (void *)kva - kni->mbuf_kva + kni->mbuf_va; - if (unlikely(kni_fifo_put(kni->free_q, (void **)&va, 1) != 1)) - /* Failing should not happen */ - pr_err("Fail to enqueue entries into free_q\n"); - - pr_debug("receive done %d\n", pkt_len); - - return pkt_len; - -drop: - /* Update drop statistics */ - kni->stats.rx_dropped++; - - return 0; -} - -static uint32_t -kni_sock_poll(struct file *file, struct socket *sock, poll_table *wait) -{ - struct kni_vhost_queue *q = - container_of(sock->sk, struct kni_vhost_queue, sk); - struct kni_dev *kni; - uint32_t mask = 0; - - if (unlikely(q == NULL || q->kni == NULL)) - return POLLERR; - - kni = q->kni; -#ifdef HAVE_SOCKET_WQ - pr_debug("start kni_poll on group %d, wq 0x%16llx\n", - kni->group_id, (uint64_t)sock->wq); - poll_wait(file, &sock->wq->wait, wait); -#else - pr_debug("start kni_poll on group %d, wait at 0x%16llx\n", - kni->group_id, (uint64_t)&sock->wait); - poll_wait(file, &sock->wait, wait); -#endif - - if (kni_fifo_count(kni->rx_q) > 0) - mask |= POLLIN | POLLRDNORM; - - if (sock_writeable(&q->sk) || -#ifdef SOCKWQ_ASYNC_NOSPACE - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) && - sock_writeable(&q->sk))) -#else - (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) && - sock_writeable(&q->sk))) -#endif - mask |= POLLOUT | POLLWRNORM; - - return mask; -} - -static inline void -kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q, - struct sk_buff *skb, struct rte_kni_mbuf *va) -{ - struct rte_kni_mbuf *kva; - - kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva; - (skb)->data = (unsigned char *)kva; - (skb)->len = kva->data_len; - skb_queue_tail(&q->sk.sk_receive_queue, skb); -} - -static inline void -kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q, - struct sk_buff **skb, struct rte_kni_mbuf **va) -{ - int i; - - for (i = 0; i < RX_BURST_SZ; skb++, va++, i++) - kni_vhost_enqueue(kni, q, *skb, *va); -} - -int -kni_chk_vhost_rx(struct kni_dev *kni) -{ - struct kni_vhost_queue *q = kni->vhost_queue; - uint32_t nb_in, nb_mbuf, nb_skb; - const uint32_t BURST_MASK = RX_BURST_SZ - 1; - uint32_t nb_burst, nb_backlog, i; - struct sk_buff *skb[RX_BURST_SZ]; - struct rte_kni_mbuf *va[RX_BURST_SZ]; - - if (unlikely(BE_STOP & kni->vq_status)) { - kni->vq_status |= BE_FINISH; - return 0; - } - - if (unlikely(q == NULL)) - return 0; - - nb_skb = kni_fifo_count(q->fifo); - nb_mbuf = kni_fifo_count(kni->rx_q); - - nb_in = min(nb_mbuf, nb_skb); - nb_in = min_t(uint32_t, nb_in, RX_BURST_SZ); - nb_burst = (nb_in & ~BURST_MASK); - nb_backlog = (nb_in & BURST_MASK); - - /* enqueue skb_queue per BURST_SIZE bulk */ - if (nb_burst != 0) { - if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, RX_BURST_SZ) - != RX_BURST_SZ)) - goto except; - - if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, RX_BURST_SZ) - != RX_BURST_SZ)) - goto except; - - kni_vhost_enqueue_burst(kni, q, skb, va); - } - - /* all leftover, do one by one */ - for (i = 0; i < nb_backlog; ++i) { - if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, 1) != 1)) - goto except; - - if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, 1) != 1)) - goto except; - - kni_vhost_enqueue(kni, q, *skb, *va); - } - - /* Ondemand wake up */ - if ((nb_in == RX_BURST_SZ) || (nb_skb == 0) || - ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) { - wake_up_interruptible_poll(sk_sleep(&q->sk), - POLLIN | POLLRDNORM | POLLRDBAND); - pr_debug("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n", - nb_mbuf, nb_skb, nb_in); - } - - return 0; - -except: - /* Failing should not happen */ - pr_err("Fail to enqueue fifo, it shouldn't happen\n"); - BUG_ON(1); - - return 0; -} - -static int -#ifdef HAVE_KIOCB_MSG_PARAM -kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) -#else -kni_sock_sndmsg(struct socket *sock, - struct msghdr *m, size_t total_len) -#endif /* HAVE_KIOCB_MSG_PARAM */ -{ - struct kni_vhost_queue *q = - container_of(sock->sk, struct kni_vhost_queue, sk); - int vnet_hdr_len = 0; - unsigned long len = total_len; - - if (unlikely(q == NULL || q->kni == NULL)) - return 0; - - pr_debug("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n", -#ifdef HAVE_IOV_ITER_MSGHDR - len, q->flags, (int)m->msg_iter.iov->iov_len); -#else - len, q->flags, (int)m->msg_iovlen); -#endif - -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - if (likely(q->flags & IFF_VNET_HDR)) { - vnet_hdr_len = q->vnet_hdr_sz; - if (unlikely(len < vnet_hdr_len)) - return -EINVAL; - len -= vnet_hdr_len; - } -#endif - - if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz)) - return -EINVAL; - - return kni_vhost_net_tx(q->kni, m, vnet_hdr_len, len); -} - -static int -#ifdef HAVE_KIOCB_MSG_PARAM -kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len, int flags) -#else -kni_sock_rcvmsg(struct socket *sock, - struct msghdr *m, size_t len, int flags) -#endif /* HAVE_KIOCB_MSG_PARAM */ -{ - int vnet_hdr_len = 0; - int pkt_len = 0; - struct kni_vhost_queue *q = - container_of(sock->sk, struct kni_vhost_queue, sk); - static struct virtio_net_hdr - __attribute__ ((unused)) vnet_hdr = { - .flags = 0, - .gso_type = VIRTIO_NET_HDR_GSO_NONE - }; - - if (unlikely(q == NULL || q->kni == NULL)) - return 0; - -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - if (likely(q->flags & IFF_VNET_HDR)) { - vnet_hdr_len = q->vnet_hdr_sz; - len -= vnet_hdr_len; - if (len < 0) - return -EINVAL; - } -#endif - - pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len); - if (unlikely(pkt_len == 0)) - return 0; - -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - /* no need to copy hdr when no pkt received */ -#ifdef HAVE_IOV_ITER_MSGHDR - if (unlikely(copy_to_iter((void *)&vnet_hdr, vnet_hdr_len, - &m->msg_iter))) -#else - if (unlikely(memcpy_toiovecend(m->msg_iov, - (void *)&vnet_hdr, 0, vnet_hdr_len))) -#endif /* HAVE_IOV_ITER_MSGHDR */ - return -EFAULT; -#endif /* RTE_KNI_VHOST_VNET_HDR_EN */ - pr_debug("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", - (unsigned long)len, q->flags, pkt_len); - - return pkt_len + vnet_hdr_len; -} - -/* dummy tap like ioctl */ -static int -kni_sock_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - struct ifreq __user *ifr = argp; - uint32_t __user *up = argp; - struct kni_vhost_queue *q = - container_of(sock->sk, struct kni_vhost_queue, sk); - struct kni_dev *kni; - uint32_t u; - int __user *sp = argp; - int s; - int ret; - - pr_debug("tap ioctl cmd 0x%08x\n", cmd); - - switch (cmd) { - case TUNSETIFF: - pr_debug("TUNSETIFF\n"); - /* ignore the name, just look at flags */ - if (get_user(u, &ifr->ifr_flags)) - return -EFAULT; - - ret = 0; - if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) - ret = -EINVAL; - else - q->flags = u; - - return ret; - - case TUNGETIFF: - pr_debug("TUNGETIFF\n"); - rcu_read_lock_bh(); - kni = rcu_dereference_bh(q->kni); - if (kni) - dev_hold(kni->net_dev); - rcu_read_unlock_bh(); - - if (!kni) - return -ENOLINK; - - ret = 0; - if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) - || put_user(q->flags, &ifr->ifr_flags)) - ret = -EFAULT; - dev_put(kni->net_dev); - return ret; - - case TUNGETFEATURES: - pr_debug("TUNGETFEATURES\n"); - u = IFF_TAP | IFF_NO_PI; -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - u |= IFF_VNET_HDR; -#endif - if (put_user(u, up)) - return -EFAULT; - return 0; - - case TUNSETSNDBUF: - pr_debug("TUNSETSNDBUF\n"); - if (get_user(u, up)) - return -EFAULT; - - q->sk.sk_sndbuf = u; - return 0; - - case TUNGETVNETHDRSZ: - s = q->vnet_hdr_sz; - if (put_user(s, sp)) - return -EFAULT; - pr_debug("TUNGETVNETHDRSZ %d\n", s); - return 0; - - case TUNSETVNETHDRSZ: - if (get_user(s, sp)) - return -EFAULT; - if (s < (int)sizeof(struct virtio_net_hdr)) - return -EINVAL; - - pr_debug("TUNSETVNETHDRSZ %d\n", s); - q->vnet_hdr_sz = s; - return 0; - - case TUNSETOFFLOAD: - pr_debug("TUNSETOFFLOAD %lx\n", arg); -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - /* not support any offload yet */ - if (!(q->flags & IFF_VNET_HDR)) - return -EINVAL; - - return 0; -#else - return -EINVAL; -#endif - - default: - pr_debug("NOT SUPPORT\n"); - return -EINVAL; - } -} - -static int -kni_sock_compat_ioctl(struct socket *sock, uint32_t cmd, - unsigned long arg) -{ - /* 32 bits app on 64 bits OS to be supported later */ - pr_debug("Not implemented.\n"); - - return -EINVAL; -} - -#define KNI_VHOST_WAIT_WQ_SAFE() \ -do { \ - while ((BE_FINISH | BE_STOP) == kni->vq_status) \ - msleep(1); \ -} while (0) \ - - -static int -kni_sock_release(struct socket *sock) -{ - struct kni_vhost_queue *q = - container_of(sock->sk, struct kni_vhost_queue, sk); - struct kni_dev *kni; - - if (q == NULL) - return 0; - - kni = q->kni; - if (kni != NULL) { - kni->vq_status = BE_STOP; - KNI_VHOST_WAIT_WQ_SAFE(); - kni->vhost_queue = NULL; - q->kni = NULL; - } - - if (q->sockfd != -1) - q->sockfd = -1; - - sk_set_socket(&q->sk, NULL); - sock->sk = NULL; - - sock_put(&q->sk); - - pr_debug("dummy sock release done\n"); - - return 0; -} - -int -kni_sock_getname(struct socket *sock, struct sockaddr *addr, - int *sockaddr_len, int peer) -{ - pr_debug("dummy sock getname\n"); - ((struct sockaddr_ll *)addr)->sll_family = AF_PACKET; - return 0; -} - -static const struct proto_ops kni_socket_ops = { - .getname = kni_sock_getname, - .sendmsg = kni_sock_sndmsg, - .recvmsg = kni_sock_rcvmsg, - .release = kni_sock_release, - .poll = kni_sock_poll, - .ioctl = kni_sock_ioctl, - .compat_ioctl = kni_sock_compat_ioctl, -}; - -static void -kni_sk_write_space(struct sock *sk) -{ - wait_queue_head_t *wqueue; - - if (!sock_writeable(sk) || -#ifdef SOCKWQ_ASYNC_NOSPACE - !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) -#else - !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) -#endif - return; - wqueue = sk_sleep(sk); - if (wqueue && waitqueue_active(wqueue)) - wake_up_interruptible_poll( - wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); -} - -static void -kni_sk_destruct(struct sock *sk) -{ - struct kni_vhost_queue *q = - container_of(sk, struct kni_vhost_queue, sk); - - if (!q) - return; - - /* make sure there's no packet in buffer */ - while (skb_dequeue(&sk->sk_receive_queue) != NULL) - ; - - mb(); - - if (q->fifo != NULL) { - kfree(q->fifo); - q->fifo = NULL; - } - - if (q->cache != NULL) { - kfree(q->cache); - q->cache = NULL; - } -} - -static int -kni_vhost_backend_init(struct kni_dev *kni) -{ - struct kni_vhost_queue *q; - struct net *net = current->nsproxy->net_ns; - int err, i, sockfd; - struct rte_kni_fifo *fifo; - struct sk_buff *elem; - - if (kni->vhost_queue != NULL) - return -1; - -#ifdef HAVE_SK_ALLOC_KERN_PARAM - q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &kni_raw_proto, 0); -#else - q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &kni_raw_proto); -#endif - if (!q) - return -ENOMEM; - - err = sock_create_lite(AF_UNSPEC, SOCK_RAW, IPPROTO_RAW, &q->sock); - if (err) - goto free_sk; - - sockfd = kni_sock_map_fd(q->sock); - if (sockfd < 0) { - err = sockfd; - goto free_sock; - } - - /* cache init */ - q->cache = kzalloc( - RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff), - GFP_KERNEL); - if (!q->cache) - goto free_fd; - - fifo = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(void *) - + sizeof(struct rte_kni_fifo), GFP_KERNEL); - if (!fifo) - goto free_cache; - - kni_fifo_init(fifo, RTE_KNI_VHOST_MAX_CACHE_SIZE); - - for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) { - elem = &q->cache[i]; - kni_fifo_put(fifo, (void **)&elem, 1); - } - q->fifo = fifo; - - /* store sockfd in vhost_queue */ - q->sockfd = sockfd; - - /* init socket */ - q->sock->type = SOCK_RAW; - q->sock->state = SS_CONNECTED; - q->sock->ops = &kni_socket_ops; - sock_init_data(q->sock, &q->sk); - - /* init sock data */ - q->sk.sk_write_space = kni_sk_write_space; - q->sk.sk_destruct = kni_sk_destruct; - q->flags = IFF_NO_PI | IFF_TAP; - q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); -#ifdef RTE_KNI_VHOST_VNET_HDR_EN - q->flags |= IFF_VNET_HDR; -#endif - - /* bind kni_dev with vhost_queue */ - q->kni = kni; - kni->vhost_queue = q; - - wmb(); - - kni->vq_status = BE_START; - -#ifdef HAVE_SOCKET_WQ - pr_debug("backend init sockfd=%d, sock->wq=0x%16llx,sk->sk_wq=0x%16llx", - q->sockfd, (uint64_t)q->sock->wq, - (uint64_t)q->sk.sk_wq); -#else - pr_debug("backend init sockfd=%d, sock->wait at 0x%16llx,sk->sk_sleep=0x%16llx", - q->sockfd, (uint64_t)&q->sock->wait, - (uint64_t)q->sk.sk_sleep); -#endif - - return 0; - -free_cache: - kfree(q->cache); - q->cache = NULL; - -free_fd: - put_unused_fd(sockfd); - -free_sock: - q->kni = NULL; - kni->vhost_queue = NULL; - kni->vq_status |= BE_FINISH; - sock_release(q->sock); - q->sock->ops = NULL; - q->sock = NULL; - -free_sk: - sk_free((struct sock *)q); - - return err; -} - -/* kni vhost sock sysfs */ -static ssize_t -show_sock_fd(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct net_device *net_dev = container_of(dev, struct net_device, dev); - struct kni_dev *kni = netdev_priv(net_dev); - int sockfd = -1; - - if (kni->vhost_queue != NULL) - sockfd = kni->vhost_queue->sockfd; - return snprintf(buf, 10, "%d\n", sockfd); -} - -static ssize_t -show_sock_en(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct net_device *net_dev = container_of(dev, struct net_device, dev); - struct kni_dev *kni = netdev_priv(net_dev); - - return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1)); -} - -static ssize_t -set_sock_en(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct net_device *net_dev = container_of(dev, struct net_device, dev); - struct kni_dev *kni = netdev_priv(net_dev); - unsigned long en; - int err = 0; - - if (kstrtoul(buf, 0, &en) != 0) - return -EINVAL; - - if (en) - err = kni_vhost_backend_init(kni); - - return err ? err : count; -} - -static DEVICE_ATTR(sock_fd, S_IRUGO | S_IRUSR, show_sock_fd, NULL); -static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en); -static struct attribute *dev_attrs[] = { - &dev_attr_sock_fd.attr, - &dev_attr_sock_en.attr, - NULL, -}; - -static const struct attribute_group dev_attr_grp = { - .attrs = dev_attrs, -}; - -int -kni_vhost_backend_release(struct kni_dev *kni) -{ - struct kni_vhost_queue *q = kni->vhost_queue; - - if (q == NULL) - return 0; - - /* dettach from kni */ - q->kni = NULL; - - pr_debug("release backend done\n"); - - return 0; -} - -int -kni_vhost_init(struct kni_dev *kni) -{ - struct net_device *dev = kni->net_dev; - - if (sysfs_create_group(&dev->dev.kobj, &dev_attr_grp)) - sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); - - kni->vq_status = BE_STOP; - - pr_debug("kni_vhost_init done\n"); - - return 0; -} diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_eal/linuxapp/xen_dom0/Makefile index 9d22fb97..be51a82a 100644 --- a/lib/librte_eal/linuxapp/xen_dom0/Makefile +++ b/lib/librte_eal/linuxapp/xen_dom0/Makefile @@ -44,9 +44,6 @@ MODULE_CFLAGS += -I$(RTE_OUTPUT)/include MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h MODULE_CFLAGS += -Wall -Werror -# this lib needs main eal -DEPDIRS-y += lib/librte_eal/linuxapp/eal - # # all source are stored in SRCS-y # |