From fca143f059a0bddd7d47b8dc2df646a891b0eb0f Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Tue, 16 May 2017 14:51:32 +0200 Subject: Imported Upstream version 17.05 --- lib/librte_eal/linuxapp/eal/Makefile | 6 +- lib/librte_eal/linuxapp/eal/eal.c | 128 +++++-- lib/librte_eal/linuxapp/eal/eal_alarm.c | 5 +- lib/librte_eal/linuxapp/eal/eal_debug.c | 4 + lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 9 +- lib/librte_eal/linuxapp/eal/eal_interrupts.c | 117 ++++--- lib/librte_eal/linuxapp/eal/eal_memory.c | 103 ++++-- lib/librte_eal/linuxapp/eal/eal_pci.c | 149 +++----- lib/librte_eal/linuxapp/eal/eal_pci_init.h | 3 +- lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 3 +- lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 88 ++++- lib/librte_eal/linuxapp/eal/eal_vfio.c | 386 +++++++++++++++++---- lib/librte_eal/linuxapp/eal/eal_vfio.h | 67 +++- lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c | 23 +- .../linuxapp/eal/include/exec-env/rte_interrupts.h | 14 +- .../linuxapp/eal/include/exec-env/rte_kni_common.h | 5 +- lib/librte_eal/linuxapp/eal/rte_eal_version.map | 74 ++-- 17 files changed, 867 insertions(+), 317 deletions(-) (limited to 'lib/librte_eal/linuxapp/eal') diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 4e206f09..640afd08 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -37,7 +37,7 @@ ARCH_DIR ?= $(RTE_ARCH) EXPORT_MAP := rte_eal_version.map VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) -LIBABIVER := 3 +LIBABIVER := 4 VPATH += $(RTE_SDK)/lib/librte_eal/common @@ -87,6 +87,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_cpuflags.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_bus.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c @@ -130,7 +131,4 @@ INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) -DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common -DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common/arch/$(ARCH_DIR) - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 2075282e..7c78f2dc 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -69,6 +70,7 @@ #include #include #include +#include #include #include #include @@ -210,7 +212,7 @@ rte_eal_config_create(void) rte_panic("Cannot mmap memory for rte_config\n"); } memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); - rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + rte_config.mem_config = rte_mem_cfg_addr; /* store address of the config in the config itself so that secondary * processes could later map the config into this exact location */ @@ -490,8 +492,6 @@ eal_log_level_parse(int argc, char **argv) argvopt = argv; optind = 1; - eal_reset_internal_config(&internal_config); - while ((opt = getopt_long(argc, argvopt, eal_short_options, eal_long_options, &option_index)) != EOF) { @@ -739,6 +739,12 @@ static int rte_eal_vfio_setup(void) } #endif +static void rte_eal_init_alert(const char *msg) +{ + fprintf(stderr, "EAL: FATAL: %s\n", msg); + RTE_LOG(ERR, EAL, "%s\n", msg); +} + /* Launch threads, called at application init(). */ int rte_eal_init(int argc, char **argv) @@ -751,33 +757,51 @@ rte_eal_init(int argc, char **argv) char thread_name[RTE_MAX_THREAD_NAME_LEN]; /* checks if the machine is adequate */ - rte_cpu_check_supported(); + if (!rte_cpu_is_supported()) { + rte_eal_init_alert("unsupported cpu type."); + rte_errno = ENOTSUP; + return -1; + } - if (!rte_atomic32_test_and_set(&run_once)) + if (!rte_atomic32_test_and_set(&run_once)) { + rte_eal_init_alert("already called initialization."); + rte_errno = EALREADY; return -1; + } logid = strrchr(argv[0], '/'); logid = strdup(logid ? logid + 1: argv[0]); thread_id = pthread_self(); - eal_log_level_parse(argc, argv); + eal_reset_internal_config(&internal_config); /* set log level as early as possible */ - rte_set_log_level(internal_config.log_level); + eal_log_level_parse(argc, argv); - if (rte_eal_cpu_init() < 0) - rte_panic("Cannot detect lcores\n"); + if (rte_eal_cpu_init() < 0) { + rte_eal_init_alert("Cannot detect lcores."); + rte_errno = ENOTSUP; + return -1; + } fctret = eal_parse_args(argc, argv); - if (fctret < 0) - exit(1); + if (fctret < 0) { + rte_eal_init_alert("Invalid 'command line' arguments."); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && internal_config.xen_dom0_support == 0 && - eal_hugepage_info_init() < 0) - rte_panic("Cannot get hugepage information\n"); + eal_hugepage_info_init() < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { if (internal_config.no_hugetlbfs) @@ -799,39 +823,59 @@ rte_eal_init(int argc, char **argv) rte_config_init(); - if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) - rte_panic("Cannot init logs\n"); - - if (rte_eal_pci_init() < 0) - rte_panic("Cannot init PCI\n"); + if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) { + rte_eal_init_alert("Cannot init logging."); + rte_errno = ENOMEM; + rte_atomic32_clear(&run_once); + return -1; + } #ifdef VFIO_PRESENT - if (rte_eal_vfio_setup() < 0) - rte_panic("Cannot init VFIO\n"); + if (rte_eal_vfio_setup() < 0) { + rte_eal_init_alert("Cannot init VFIO\n"); + rte_errno = EAGAIN; + rte_atomic32_clear(&run_once); + return -1; + } #endif - if (rte_eal_memory_init() < 0) - rte_panic("Cannot init memory\n"); + if (rte_eal_memory_init() < 0) { + rte_eal_init_alert("Cannot init memory\n"); + rte_errno = ENOMEM; + return -1; + } /* the directories are locked during eal_hugepage_info_init */ eal_hugedirs_unlock(); - if (rte_eal_memzone_init() < 0) - rte_panic("Cannot init memzone\n"); + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone\n"); + rte_errno = ENODEV; + return -1; + } - if (rte_eal_tailqs_init() < 0) - rte_panic("Cannot init tail queues for objects\n"); + if (rte_eal_tailqs_init() < 0) { + rte_eal_init_alert("Cannot init tail queues for objects\n"); + rte_errno = EFAULT; + return -1; + } - if (rte_eal_alarm_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_eal_alarm_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + /* rte_eal_alarm_init sets rte_errno on failure. */ + return -1; + } - if (rte_eal_timer_init() < 0) - rte_panic("Cannot init HPET or TSC timers\n"); + if (rte_eal_timer_init() < 0) { + rte_eal_init_alert("Cannot init HPET or TSC timers\n"); + rte_errno = ENOTSUP; + return -1; + } eal_check_mem_on_local_socket(); if (eal_plugins_init() < 0) - rte_panic("Cannot init plugins\n"); + rte_eal_init_alert("Cannot init plugins\n"); eal_thread_init_master(rte_config.master_lcore); @@ -841,11 +885,16 @@ rte_eal_init(int argc, char **argv) rte_config.master_lcore, (int)thread_id, cpuset, ret == 0 ? "" : "..."); - if (rte_eal_dev_init() < 0) - rte_panic("Cannot init pmd devices\n"); + if (rte_eal_intr_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + return -1; + } - if (rte_eal_intr_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + return -1; + } RTE_LCORE_FOREACH_SLAVE(i) { @@ -883,9 +932,12 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); - /* Probe & Initialize PCI devices */ - if (rte_eal_pci_probe()) - rte_panic("Cannot probe PCI\n"); + /* Probe all the buses and devices/drivers on them */ + if (rte_bus_probe()) { + rte_eal_init_alert("Cannot probe devices\n"); + rte_errno = ENOTSUP; + return -1; + } rte_eal_mcfg_complete(); diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c index 8b042abc..fbae4613 100644 --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -83,7 +83,7 @@ static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER; static struct rte_intr_handle intr_handle = {.fd = -1 }; static int handler_registered = 0; -static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg); +static void eal_alarm_callback(void *arg); int rte_eal_alarm_init(void) @@ -102,8 +102,7 @@ error: } static void -eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused, - void *arg __rte_unused) +eal_alarm_callback(void *arg __rte_unused) { struct timespec now; struct alarm_entry *ap; diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c index 5fbc17c5..e1c75548 100644 --- a/lib/librte_eal/linuxapp/eal/eal_debug.c +++ b/lib/librte_eal/linuxapp/eal/eal_debug.c @@ -31,7 +31,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifdef RTE_BACKTRACE #include +#endif #include #include #include @@ -47,6 +49,7 @@ /* dump the stack of the calling core */ void rte_dump_stack(void) { +#ifdef RTE_BACKTRACE void *func[BACKTRACE_SIZE]; char **symb = NULL; int size; @@ -64,6 +67,7 @@ void rte_dump_stack(void) } free(symb); +#endif /* RTE_BACKTRACE */ } /* not implemented in this environment */ diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 18858e2d..7a21e8f6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -283,9 +283,12 @@ eal_hugepage_info_init(void) struct dirent *dirent; dir = opendir(sys_dir_path); - if (dir == NULL) - rte_panic("Cannot open directory %s to read system hugepage " - "info\n", sys_dir_path); + if (dir == NULL) { + RTE_LOG(ERR, EAL, + "Cannot open directory %s to read system hugepage info\n", + sys_dir_path); + return -1; + } for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { struct hugepage_info *hpi; diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 47a3b20a..2e3bd12a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -136,7 +137,7 @@ static pthread_t intr_thread; /* enable legacy (INTx) interrupts */ static int -vfio_enable_intx(struct rte_intr_handle *intr_handle) { +vfio_enable_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -183,7 +184,7 @@ vfio_enable_intx(struct rte_intr_handle *intr_handle) { /* disable legacy (INTx) interrupts */ static int -vfio_disable_intx(struct rte_intr_handle *intr_handle) { +vfio_disable_intx(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -194,14 +195,14 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) { irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; irq_set->start = 0; ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { - RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } @@ -226,7 +227,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) { /* enable MSI interrupts */ static int -vfio_enable_msi(struct rte_intr_handle *intr_handle) { +vfio_enable_msi(const struct rte_intr_handle *intr_handle) { int len, ret; char irq_set_buf[IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -255,7 +256,7 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) { /* disable MSI interrupts */ static int -vfio_disable_msi(struct rte_intr_handle *intr_handle) { +vfio_disable_msi(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[IRQ_SET_BUF_LEN]; int len, ret; @@ -280,7 +281,7 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) { /* enable MSI-X interrupts */ static int -vfio_enable_msix(struct rte_intr_handle *intr_handle) { +vfio_enable_msix(const struct rte_intr_handle *intr_handle) { int len, ret; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; struct vfio_irq_set *irq_set; @@ -290,12 +291,10 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) { irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; - if (!intr_handle->max_intr) - intr_handle->max_intr = 1; - else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) - intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; - - irq_set->count = intr_handle->max_intr; + /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ + irq_set->count = intr_handle->max_intr ? + (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? + RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = 0; @@ -318,7 +317,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) { /* disable MSI-X interrupts */ static int -vfio_disable_msix(struct rte_intr_handle *intr_handle) { +vfio_disable_msix(const struct rte_intr_handle *intr_handle) { struct vfio_irq_set *irq_set; char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; int len, ret; @@ -343,7 +342,7 @@ vfio_disable_msix(struct rte_intr_handle *intr_handle) { #endif static int -uio_intx_intr_disable(struct rte_intr_handle *intr_handle) +uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) { unsigned char command_high; @@ -367,7 +366,7 @@ uio_intx_intr_disable(struct rte_intr_handle *intr_handle) } static int -uio_intx_intr_enable(struct rte_intr_handle *intr_handle) +uio_intx_intr_enable(const struct rte_intr_handle *intr_handle) { unsigned char command_high; @@ -391,7 +390,7 @@ uio_intx_intr_enable(struct rte_intr_handle *intr_handle) } static int -uio_intr_disable(struct rte_intr_handle *intr_handle) +uio_intr_disable(const struct rte_intr_handle *intr_handle) { const int value = 0; @@ -405,7 +404,7 @@ uio_intr_disable(struct rte_intr_handle *intr_handle) } static int -uio_intr_enable(struct rte_intr_handle *intr_handle) +uio_intr_enable(const struct rte_intr_handle *intr_handle) { const int value = 1; @@ -419,7 +418,7 @@ uio_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_callback_register(struct rte_intr_handle *intr_handle, +rte_intr_callback_register(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb, void *cb_arg) { int ret, wake_thread; @@ -491,7 +490,7 @@ rte_intr_callback_register(struct rte_intr_handle *intr_handle, } int -rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, +rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, rte_intr_callback_fn cb_fn, void *cb_arg) { int ret; @@ -555,8 +554,11 @@ rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, } int -rte_intr_enable(struct rte_intr_handle *intr_handle) +rte_intr_enable(const struct rte_intr_handle *intr_handle) { + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; @@ -599,8 +601,11 @@ rte_intr_enable(struct rte_intr_handle *intr_handle) } int -rte_intr_disable(struct rte_intr_handle *intr_handle) +rte_intr_disable(const struct rte_intr_handle *intr_handle) { + if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 0; + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) return -1; @@ -645,6 +650,7 @@ rte_intr_disable(struct rte_intr_handle *intr_handle) static int eal_intr_process_interrupts(struct epoll_event *events, int nfds) { + bool call = false; int n, bytes_read; struct rte_intr_source *src; struct rte_intr_callback *cb; @@ -693,13 +699,18 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) bytes_read = sizeof(buf.vfio_intr_count); break; #endif + case RTE_INTR_HANDLE_VDEV: case RTE_INTR_HANDLE_EXT: + bytes_read = 0; + call = true; + break; + default: bytes_read = 1; break; } - if (src->intr_handle.type != RTE_INTR_HANDLE_EXT) { + if (bytes_read > 0) { /** * read out to clear the ready-to-be-read flag * for epoll_wait. @@ -716,12 +727,14 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) } else if (bytes_read == 0) RTE_LOG(ERR, EAL, "Read nothing from file " "descriptor %d\n", events[n].data.fd); + else + call = true; } /* grab a lock, again to call callbacks and update status. */ rte_spinlock_lock(&intr_lock); - if (bytes_read > 0) { + if (call) { /* Finally, call all callbacks. */ TAILQ_FOREACH(cb, &src->callbacks, next) { @@ -731,8 +744,7 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) rte_spinlock_unlock(&intr_lock); /* call the actual callback */ - active_cb.cb_fn(&src->intr_handle, - active_cb.cb_arg); + active_cb.cb_fn(active_cb.cb_arg); /*get the lock back. */ rte_spinlock_lock(&intr_lock); @@ -832,7 +844,7 @@ eal_intr_thread_main(__rte_unused void *arg) TAILQ_FOREACH(src, &intr_sources, next) { if (src->callbacks.tqh_first == NULL) continue; /* skip those with no callbacks */ - ev.events = EPOLLIN | EPOLLPRI; + ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; ev.data.fd = src->intr_handle.fd; /** @@ -872,13 +884,16 @@ rte_eal_intr_init(void) * create a pipe which will be waited by epoll and notified to * rebuild the wait list of epoll. */ - if (pipe(intr_pipe.pipefd) < 0) + if (pipe(intr_pipe.pipefd) < 0) { + rte_errno = errno; return -1; + } /* create the host thread to wait/handle the interrupt */ ret = pthread_create(&intr_thread, NULL, eal_intr_thread_main, NULL); if (ret != 0) { + rte_errno = ret; RTE_LOG(ERR, EAL, "Failed to create thread for interrupt handling\n"); } else { @@ -913,6 +928,14 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) bytes_read = sizeof(buf.vfio_intr_count); break; #endif + case RTE_INTR_HANDLE_VDEV: + /* for vdev, fd points to: + * a. eventfd which does not need to read out; + * b. datapath fd which needs PMD to read out. + */ + return; + case RTE_INTR_HANDLE_EXT: + return; default: bytes_read = 1; RTE_LOG(INFO, EAL, "unexpected intr type\n"); @@ -1141,6 +1164,24 @@ rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, return rc; } +void +rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle) +{ + uint32_t i; + struct rte_epoll_event *rev; + + for (i = 0; i < intr_handle->nb_efd; i++) { + rev = &intr_handle->elist[i]; + if (rev->status == RTE_EPOLL_INVALID) + continue; + if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { + /* force free if the entry valid */ + eal_epoll_data_safe_free(rev); + rev->status = RTE_EPOLL_INVALID; + } + } +} + int rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) { @@ -1157,12 +1198,14 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) RTE_LOG(ERR, EAL, "can't setup eventfd, error %i (%s)\n", errno, strerror(errno)); - return -1; + return -errno; } intr_handle->efds[i] = fd; } intr_handle->nb_efd = n; intr_handle->max_intr = NB_OTHER_INTR + n; + } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) { + /* do nothing, and let vdev driver to initialize this struct */ } else { intr_handle->efds[0] = intr_handle->fd; intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); @@ -1176,19 +1219,8 @@ void rte_intr_efd_disable(struct rte_intr_handle *intr_handle) { uint32_t i; - struct rte_epoll_event *rev; - - for (i = 0; i < intr_handle->nb_efd; i++) { - rev = &intr_handle->elist[i]; - if (rev->status == RTE_EPOLL_INVALID) - continue; - if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { - /* force free if the entry valid */ - eal_epoll_data_safe_free(rev); - rev->status = RTE_EPOLL_INVALID; - } - } + rte_intr_free_epoll_fd(intr_handle); if (intr_handle->max_intr > intr_handle->nb_efd) { for (i = 0; i < intr_handle->nb_efd; i++) close(intr_handle->efds[i]); @@ -1218,5 +1250,8 @@ rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) return 1; + if (intr_handle->type == RTE_INTR_HANDLE_VDEV) + return 1; + return 0; } diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index a956bb22..ebe06833 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -64,6 +64,7 @@ #define _FILE_OFFSET_BITS 64 #include #include +#include #include #include #include @@ -122,26 +123,28 @@ int rte_xen_dom0_supported(void) static uint64_t baseaddr_offset; -static unsigned proc_pagemap_readable; +static bool phys_addrs_available = true; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" static void -test_proc_pagemap_readable(void) +test_phys_addrs_available(void) { - int fd = open("/proc/self/pagemap", O_RDONLY); + uint64_t tmp; + phys_addr_t physaddr; - if (fd < 0) { + /* For dom0, phys addresses can always be available */ + if (rte_xen_dom0_supported()) + return; + + physaddr = rte_mem_virt2phy(&tmp); + if (physaddr == RTE_BAD_PHYS_ADDR) { RTE_LOG(ERR, EAL, - "Cannot open /proc/self/pagemap: %s. " - "virt2phys address translation will not work\n", + "Cannot obtain physical addresses: %s. " + "Only vfio will function.\n", strerror(errno)); - return; + phys_addrs_available = false; } - - /* Is readable */ - close(fd); - proc_pagemap_readable = 1; } /* Lock page in physical memory and prevent from swapping. */ @@ -190,7 +193,7 @@ rte_mem_virt2phy(const void *virtaddr) } /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ - if (!proc_pagemap_readable) + if (!phys_addrs_available) return RTE_BAD_PHYS_ADDR; /* standard page size */ @@ -229,6 +232,9 @@ rte_mem_virt2phy(const void *virtaddr) * the pfn (page frame number) are bits 0-54 (see * pagemap.txt in linux Documentation) */ + if ((page & 0x7fffffffffffffULL) == 0) + return RTE_BAD_PHYS_ADDR; + physaddr = ((page & 0x7fffffffffffffULL) * page_size) + ((unsigned long)virtaddr % page_size); @@ -242,7 +248,7 @@ rte_mem_virt2phy(const void *virtaddr) static int find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) { - unsigned i; + unsigned int i; phys_addr_t addr; for (i = 0; i < hpi->num_pages[0]; i++) { @@ -254,6 +260,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) return 0; } +/* + * For each hugepage in hugepg_tbl, fill the physaddr value sequentially. + */ +static int +set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned int i; + static phys_addr_t addr; + + for (i = 0; i < hpi->num_pages[0]; i++) { + hugepg_tbl[i].physaddr = addr; + addr += hugepg_tbl[i].size; + } + return 0; +} + /* * Check whether address-space layout randomization is enabled in * the kernel. This is important for multi-process as it can prevent @@ -313,7 +335,13 @@ get_virtual_area(size_t *size, size_t hugepage_sz) } do { addr = mmap(addr, - (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0); + (*size) + hugepage_sz, PROT_READ, +#ifdef RTE_ARCH_PPC_64 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, +#else + MAP_PRIVATE, +#endif + fd, 0); if (addr == MAP_FAILED) *size -= hugepage_sz; } while (addr == MAP_FAILED && *size > 0); @@ -592,12 +620,12 @@ static int cmp_physaddr(const void *a, const void *b) { #ifndef RTE_ARCH_PPC_64 - const struct hugepage_file *p1 = (const struct hugepage_file *)a; - const struct hugepage_file *p2 = (const struct hugepage_file *)b; + const struct hugepage_file *p1 = a; + const struct hugepage_file *p2 = b; #else /* PowerPC needs memory sorted in reverse order from x86 */ - const struct hugepage_file *p1 = (const struct hugepage_file *)b; - const struct hugepage_file *p2 = (const struct hugepage_file *)a; + const struct hugepage_file *p1 = b; + const struct hugepage_file *p2 = a; #endif if (p1->physaddr < p2->physaddr) return -1; @@ -951,7 +979,7 @@ rte_eal_hugepage_init(void) int nr_hugefiles, nr_hugepages = 0; void *addr; - test_proc_pagemap_readable(); + test_phys_addrs_available(); memset(used_hp, 0, sizeof(used_hp)); @@ -1043,11 +1071,22 @@ rte_eal_hugepage_init(void) continue; } - /* find physical addresses and sockets for each hugepage */ - if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){ - RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n", - (unsigned)(hpi->hugepage_sz / 0x100000)); - goto fail; + if (phys_addrs_available) { + /* find physical addresses for each hugepage */ + if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { + RTE_LOG(DEBUG, EAL, "Failed to find phys addr " + "for %u MB pages\n", + (unsigned int)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + } else { + /* set physical addresses for each hugepage */ + if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { + RTE_LOG(DEBUG, EAL, "Failed to set phys addr " + "for %u MB pages\n", + (unsigned int)(hpi->hugepage_sz / 0x100000)); + goto fail; + } } if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){ @@ -1289,7 +1328,7 @@ rte_eal_hugepage_attach(void) "into secondary processes\n"); } - test_proc_pagemap_readable(); + test_phys_addrs_available(); if (internal_config.xen_dom0_support) { #ifdef RTE_LIBRTE_XEN_DOM0 @@ -1330,7 +1369,13 @@ rte_eal_hugepage_attach(void) * use mmap to get identical addresses as the primary process. */ base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, - PROT_READ, MAP_PRIVATE, fd_zero, 0); + PROT_READ, +#ifdef RTE_ARCH_PPC_64 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, +#else + MAP_PRIVATE, +#endif + fd_zero, 0); if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) { max_seg = s; @@ -1426,3 +1471,9 @@ error: close(fd_hugepage); return -1; } + +bool +rte_eal_using_phys_addrs(void) +{ + return phys_addrs_available; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 876ba381..595622b2 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -54,44 +55,7 @@ * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). */ -/* unbind kernel driver for this device */ -int -pci_unbind_kernel_driver(struct rte_pci_device *dev) -{ - int n; - FILE *f; - char filename[PATH_MAX]; - char buf[BUFSIZ]; - struct rte_pci_addr *loc = &dev->addr; - - /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ - snprintf(filename, sizeof(filename), - "%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - - f = fopen(filename, "w"); - if (f == NULL) /* device was not bound */ - return 0; - - n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n", - loc->domain, loc->bus, loc->devid, loc->function); - if ((n < 0) || (n >= (int)sizeof(buf))) { - RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__); - goto error; - } - if (fwrite(buf, n, 1, f) == 0) { - RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__, - filename); - goto error; - } - - fclose(f); - return 0; - -error: - fclose(f); - return -1; -} +extern struct rte_pci_bus rte_pci_bus; static int pci_get_kernel_driver_by_path(const char *filename, char *dri_name) @@ -124,7 +88,7 @@ pci_get_kernel_driver_by_path(const char *filename, char *dri_name) /* Map pci device */ int -rte_eal_pci_map_device(struct rte_pci_device *dev) +rte_pci_map_device(struct rte_pci_device *dev) { int ret = -1; @@ -138,8 +102,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) break; case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: - /* map resources for devices that use uio */ - ret = pci_uio_map_resource(dev); + if (rte_eal_using_phys_addrs()) { + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + } break; default: RTE_LOG(DEBUG, EAL, @@ -153,12 +119,15 @@ rte_eal_pci_map_device(struct rte_pci_device *dev) /* Unmap pci device */ void -rte_eal_pci_unmap_device(struct rte_pci_device *dev) +rte_pci_unmap_device(struct rte_pci_device *dev) { /* try unmapping the NIC resources using VFIO if it exists */ switch (dev->kdrv) { case RTE_KDRV_VFIO: - RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n"); +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + pci_vfio_unmap_resource(dev); +#endif break; case RTE_KDRV_IGB_UIO: case RTE_KDRV_UIO_GENERIC: @@ -267,8 +236,7 @@ error: /* Scan one pci sysfs entry, and fill the devices list from it. */ static int -pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, - uint8_t devid, uint8_t function) +pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) { char filename[PATH_MAX]; unsigned long tmp; @@ -281,10 +249,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, return -1; memset(dev, 0, sizeof(*dev)); - dev->addr.domain = domain; - dev->addr.bus = bus; - dev->addr.devid = devid; - dev->addr.function = function; + dev->addr = *addr; /* get vendor id */ snprintf(filename, sizeof(filename), "%s/vendor", dirname); @@ -359,6 +324,9 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dev->device.numa_node = tmp; } + rte_pci_device_name(addr, dev->name, sizeof(dev->name)); + dev->device.name = dev->name; + /* parse resources */ snprintf(filename, sizeof(filename), "%s/resource", dirname); if (pci_parse_sysfs_resource(filename, dev) < 0) { @@ -389,21 +357,19 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, dev->kdrv = RTE_KDRV_NONE; /* device is valid, add in list (sorted) */ - if (TAILQ_EMPTY(&pci_device_list)) { - rte_eal_device_insert(&dev->device); - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); } else { struct rte_pci_device *dev2; int ret; - TAILQ_FOREACH(dev2, &pci_device_list, next) { + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); if (ret > 0) continue; if (ret < 0) { - TAILQ_INSERT_BEFORE(dev2, dev, next); - rte_eal_device_insert(&dev->device); + rte_pci_insert_device(dev2, dev); } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; @@ -413,8 +379,8 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, } return 0; } - rte_eal_device_insert(&dev->device); - TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + + rte_pci_add_device(dev); } return 0; @@ -429,16 +395,14 @@ pci_update_device(const struct rte_pci_addr *addr) pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, addr->function); - return pci_scan_one(filename, addr->domain, addr->bus, addr->devid, - addr->function); + return pci_scan_one(filename, addr); } /* * split up a pci address into its constituent parts. */ static int -parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, - uint8_t *bus, uint8_t *devid, uint8_t *function) +parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) { /* first split on ':' */ union splitaddr { @@ -466,10 +430,10 @@ parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, /* now convert to int values */ errno = 0; - *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); - *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); - *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); - *function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + addr->domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); + addr->bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); + addr->devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); + addr->function = (uint8_t)strtoul(splitaddr.function, NULL, 10); if (errno != 0) goto error; @@ -485,13 +449,16 @@ error: * list */ int -rte_eal_pci_scan(void) +rte_pci_scan(void) { struct dirent *e; DIR *dir; char dirname[PATH_MAX]; - uint16_t domain; - uint8_t bus, devid, function; + struct rte_pci_addr addr; + + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; dir = opendir(pci_get_sysfs_path()); if (dir == NULL) { @@ -504,13 +471,13 @@ rte_eal_pci_scan(void) if (e->d_name[0] == '.') continue; - if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain, - &bus, &devid, &function) != 0) + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) continue; snprintf(dirname, sizeof(dirname), "%s/%s", pci_get_sysfs_path(), e->d_name); - if (pci_scan_one(dirname, domain, bus, devid, function) < 0) + + if (pci_scan_one(dirname, &addr) < 0) goto error; } closedir(dir); @@ -522,8 +489,8 @@ error: } /* Read PCI config space. */ -int rte_eal_pci_read_config(const struct rte_pci_device *device, - void *buf, size_t len, off_t offset) +int rte_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset) { const struct rte_intr_handle *intr_handle = &device->intr_handle; @@ -547,8 +514,8 @@ int rte_eal_pci_read_config(const struct rte_pci_device *device, } /* Write PCI config space. */ -int rte_eal_pci_write_config(const struct rte_pci_device *device, - const void *buf, size_t len, off_t offset) +int rte_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset) { const struct rte_intr_handle *intr_handle = &device->intr_handle; @@ -574,7 +541,7 @@ int rte_eal_pci_write_config(const struct rte_pci_device *device, #if defined(RTE_ARCH_X86) static int pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, - struct rte_pci_ioport *p) + struct rte_pci_ioport *p) { uint16_t start, end; FILE *fp; @@ -632,8 +599,8 @@ pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, #endif int -rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, - struct rte_pci_ioport *p) +rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) { int ret = -1; @@ -670,8 +637,8 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, } void -rte_eal_pci_ioport_read(struct rte_pci_ioport *p, - void *data, size_t len, off_t offset) +rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { #ifdef VFIO_PRESENT @@ -696,8 +663,8 @@ rte_eal_pci_ioport_read(struct rte_pci_ioport *p, } void -rte_eal_pci_ioport_write(struct rte_pci_ioport *p, - const void *data, size_t len, off_t offset) +rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) { switch (p->dev->kdrv) { #ifdef VFIO_PRESENT @@ -722,7 +689,7 @@ rte_eal_pci_ioport_write(struct rte_pci_ioport *p, } int -rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) +rte_pci_ioport_unmap(struct rte_pci_ioport *p) { int ret = -1; @@ -754,19 +721,3 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) return ret; } - -/* Init the PCI EAL subsystem */ -int -rte_eal_pci_init(void) -{ - /* for debug purposes, PCI can be disabled */ - if (internal_config.no_pci) - return 0; - - if (rte_eal_pci_scan() < 0) { - RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); - return -1; - } - - return 0; -} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h index 6a960d1b..ae2980d6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h @@ -88,8 +88,9 @@ void pci_vfio_ioport_write(struct rte_pci_ioport *p, const void *data, size_t len, off_t offset); int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); -/* map VFIO resource prototype */ +/* map/unmap VFIO resource prototype */ int pci_vfio_map_resource(struct rte_pci_device *dev); +int pci_vfio_unmap_resource(struct rte_pci_device *dev); #endif diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index 3e4ffb57..fa10329f 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #if defined(RTE_ARCH_X86) @@ -230,7 +231,7 @@ pci_uio_free_resource(struct rte_pci_device *dev, close(dev->intr_handle.uio_cfg_fd); dev->intr_handle.uio_cfg_fd = -1; } - if (dev->intr_handle.fd) { + if (dev->intr_handle.fd >= 0) { close(dev->intr_handle.fd); dev->intr_handle.fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index 5f478c59..2be13195 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -172,7 +173,7 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, /* set PCI bus mastering */ static int -pci_vfio_set_bus_master(int dev_fd) +pci_vfio_set_bus_master(int dev_fd, bool op) { uint16_t reg; int ret; @@ -185,8 +186,11 @@ pci_vfio_set_bus_master(int dev_fd) return -1; } - /* set the master bit */ - reg |= PCI_COMMAND_MASTER; + if (op) + /* set the master bit */ + reg |= PCI_COMMAND_MASTER; + else + reg &= ~(PCI_COMMAND_MASTER); ret = pwrite64(dev_fd, ®, sizeof(reg), VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + @@ -355,7 +359,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } else { /* if we're in a secondary process, just find our tailq entry */ TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + if (rte_eal_compare_pci_addr(&vfio_res->pci_addr, + &dev->addr)) continue; break; } @@ -517,7 +522,7 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd)) { + if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr); close(vfio_dev_fd); rte_free(vfio_res); @@ -534,6 +539,79 @@ pci_vfio_map_resource(struct rte_pci_device *dev) return 0; } +int +pci_vfio_unmap_resource(struct rte_pci_device *dev) +{ + char pci_addr[PATH_MAX] = {0}; + struct rte_pci_addr *loc = &dev->addr; + int i, ret; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list; + + struct pci_map *maps; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + + if (close(dev->intr_handle.fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", + pci_addr); + return -1; + } + + if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { + RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", + pci_addr); + return -1; + } + + ret = vfio_release_device(pci_get_sysfs_path(), pci_addr, + dev->intr_handle.vfio_dev_fd); + if (ret < 0) { + RTE_LOG(ERR, EAL, + "%s(): cannot release device\n", __func__); + return ret; + } + + vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + /* Get vfio_res */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + continue; + break; + } + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + return -1; + } + + /* unmap BARs */ + maps = vfio_res->maps; + + RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n", + pci_addr); + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + + /* + * We do not need to be aware of MSI-X table BAR mappings as + * when mapping. Just using current maps array is enough + */ + if (maps[i].addr) { + RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n", + pci_addr, maps[i].addr); + pci_unmap_resource(maps[i].addr, maps[i].size); + } + } + + TAILQ_REMOVE(vfio_res_list, vfio_res, next); + + return 0; +} + int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, struct rte_pci_ioport *p) diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 702f7a2e..53ac725d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -50,12 +50,15 @@ static struct vfio_config vfio_cfg; static int vfio_type1_dma_map(int); +static int vfio_spapr_dma_map(int); static int vfio_noiommu_dma_map(int); /* IOMMU types we support */ static const struct vfio_iommu_type iommu_types[] = { /* x86 IOMMU, otherwise known as type 1 */ { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + /* ppc64 IOMMU, otherwise known as spapr */ + { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map}, /* IOMMU-less mode */ { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, }; @@ -65,13 +68,32 @@ vfio_get_group_fd(int iommu_group_no) { int i; int vfio_group_fd; + int group_idx = -1; char filename[PATH_MAX]; /* check if we already have the group descriptor open */ - for (i = 0; i < vfio_cfg.vfio_group_idx; i++) + for (i = 0; i < VFIO_MAX_GROUPS; i++) if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no) return vfio_cfg.vfio_groups[i].fd; + /* Lets see first if there is room for a new group */ + if (vfio_cfg.vfio_active_groups == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + return -1; + } + + /* Now lets get an index for the new group */ + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg.vfio_groups[i].group_no == -1) { + group_idx = i; + break; + } + + /* This should not happen */ + if (group_idx == -1) { + RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); + return -1; + } /* if primary, try to open the group */ if (internal_config.process_type == RTE_PROC_PRIMARY) { /* try regular group format */ @@ -101,14 +123,9 @@ vfio_get_group_fd(int iommu_group_no) /* noiommu group found */ } - /* if the fd is valid, create a new group for it */ - if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) { - RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); - close(vfio_group_fd); - return -1; - } - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; + vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no; + vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd; + vfio_cfg.vfio_active_groups++; return vfio_group_fd; } /* if we're in a secondary process, request group fd from the primary @@ -155,14 +172,115 @@ vfio_get_group_fd(int iommu_group_no) return -1; } + +static int +get_vfio_group_idx(int vfio_group_fd) +{ + int i; + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd) + return i; + return -1; +} + +static void +vfio_group_device_get(int vfio_group_fd) +{ + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > VFIO_MAX_GROUPS) + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + else + vfio_cfg.vfio_groups[i].devices++; +} + static void -clear_current_group(void) +vfio_group_device_put(int vfio_group_fd) { - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1; + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > VFIO_MAX_GROUPS) + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + else + vfio_cfg.vfio_groups[i].devices--; +} + +static int +vfio_group_device_count(int vfio_group_fd) +{ + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + return -1; + } + + return vfio_cfg.vfio_groups[i].devices; +} + +int +clear_group(int vfio_group_fd) +{ + int i; + int socket_fd, ret; + + if (internal_config.process_type == RTE_PROC_PRIMARY) { + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0) + return -1; + vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].fd = -1; + vfio_cfg.vfio_groups[i].devices = 0; + vfio_cfg.vfio_active_groups--; + return 0; + } + + /* This is just for SECONDARY processes */ + socket_fd = vfio_mp_sync_connect_to_primary(); + + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); + return -1; + } + + if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) { + RTE_LOG(ERR, EAL, " cannot request container fd!\n"); + close(socket_fd); + return -1; + } + + if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) { + RTE_LOG(ERR, EAL, " cannot send group fd!\n"); + close(socket_fd); + return -1; + } + + ret = vfio_mp_sync_receive_request(socket_fd); + switch (ret) { + case SOCKET_NO_FD: + RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n"); + close(socket_fd); + break; + case SOCKET_OK: + close(socket_fd); + return 0; + case SOCKET_ERR: + RTE_LOG(ERR, EAL, " Socket error\n"); + close(socket_fd); + break; + default: + RTE_LOG(ERR, EAL, " UNKNOWN reply, %d\n", ret); + close(socket_fd); + } + return -1; } -int vfio_setup_device(const char *sysfs_base, const char *dev_addr, +int +vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info) { struct vfio_group_status group_status = { @@ -189,18 +307,10 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr, if (vfio_group_fd < 0) return -1; - /* store group fd */ - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; - /* if group_fd == 0, that means the device isn't managed by VFIO */ if (vfio_group_fd == 0) { - RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", dev_addr); - /* we store 0 as group fd to distinguish between existing but - * unbound VFIO groups, and groups that don't exist at all. - */ - vfio_cfg.vfio_group_idx++; return 1; } @@ -215,12 +325,12 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr, RTE_LOG(ERR, EAL, " %s cannot get group status, " "error %i (%s)\n", dev_addr, errno, strerror(errno)); close(vfio_group_fd); - clear_current_group(); + clear_group(vfio_group_fd); return -1; } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", dev_addr); close(vfio_group_fd); - clear_current_group(); + clear_group(vfio_group_fd); return -1; } @@ -234,60 +344,131 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr, RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, " "error %i (%s)\n", dev_addr, errno, strerror(errno)); close(vfio_group_fd); - clear_current_group(); + clear_group(vfio_group_fd); return -1; } + /* - * at this point we know that this group has been successfully - * initialized, so we increment vfio_group_idx to indicate that we can - * add new groups. + * pick an IOMMU type and set up DMA mappings for container + * + * needs to be done only once, only when first group is + * assigned to a container and only in primary process. + * Note this can happen several times with the hotplug + * functionality. */ - vfio_cfg.vfio_group_idx++; - } - - /* - * pick an IOMMU type and set up DMA mappings for container - * - * needs to be done only once, only when at least one group is assigned to - * a container and only in primary process - */ - if (internal_config.process_type == RTE_PROC_PRIMARY && - vfio_cfg.vfio_container_has_dma == 0) { - /* select an IOMMU type which we will be using */ - const struct vfio_iommu_type *t = + if (internal_config.process_type == RTE_PROC_PRIMARY && + vfio_cfg.vfio_active_groups == 1) { + /* select an IOMMU type which we will be using */ + const struct vfio_iommu_type *t = vfio_set_iommu_type(vfio_cfg.vfio_container_fd); - if (!t) { - RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", dev_addr); - return -1; - } - ret = t->dma_map_func(vfio_cfg.vfio_container_fd); - if (ret) { - RTE_LOG(ERR, EAL, " %s DMA remapping failed, " - "error %i (%s)\n", dev_addr, errno, strerror(errno)); - return -1; + if (!t) { + RTE_LOG(ERR, EAL, + " %s failed to select IOMMU type\n", + dev_addr); + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; + } + ret = t->dma_map_func(vfio_cfg.vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, + " %s DMA remapping failed, error %i (%s)\n", + dev_addr, errno, strerror(errno)); + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; + } } - vfio_cfg.vfio_container_has_dma = 1; } /* get a file descriptor for the device */ *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr); if (*vfio_dev_fd < 0) { - /* if we cannot get a device fd, this simply means that this - * particular port is not bound to VFIO - */ - RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + /* if we cannot get a device fd, this implies a problem with + * the VFIO group or the container not having IOMMU configured. + */ + + RTE_LOG(WARNING, EAL, "Getting a vfio_dev_fd for %s failed\n", dev_addr); - return 1; + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; } /* test and setup the device */ ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info); if (ret) { RTE_LOG(ERR, EAL, " %s cannot get device info, " - "error %i (%s)\n", dev_addr, errno, strerror(errno)); + "error %i (%s)\n", dev_addr, errno, + strerror(errno)); close(*vfio_dev_fd); + close(vfio_group_fd); + clear_group(vfio_group_fd); return -1; } + vfio_group_device_get(vfio_group_fd); + + return 0; +} + +int +vfio_release_device(const char *sysfs_base, const char *dev_addr, + int vfio_dev_fd) +{ + struct vfio_group_status group_status = { + .argsz = sizeof(group_status) + }; + int vfio_group_fd; + int iommu_group_no; + int ret; + + /* get group number */ + ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no); + if (ret <= 0) { + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n", + dev_addr); + /* This is an error at this point. */ + return -1; + } + + /* get the actual group fd */ + vfio_group_fd = vfio_get_group_fd(iommu_group_no); + if (vfio_group_fd <= 0) { + RTE_LOG(INFO, EAL, "vfio_get_group_fd failed for %s\n", + dev_addr); + return -1; + } + + /* At this point we got an active group. Closing it will make the + * container detachment. If this is the last active group, VFIO kernel + * code will unset the container and the IOMMU mappings. + */ + + /* Closing a device */ + if (close(vfio_dev_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n", + dev_addr); + return -1; + } + + /* An VFIO group can have several devices attached. Just when there is + * no devices remaining should the group be closed. + */ + vfio_group_device_put(vfio_group_fd); + if (!vfio_group_device_count(vfio_group_fd)) { + + if (close(vfio_group_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n", + dev_addr); + return -1; + } + + if (clear_group(vfio_group_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when clearing group for %s\n", + dev_addr); + return -1; + } + } return 0; } @@ -302,6 +483,7 @@ vfio_enable(const char *modname) for (i = 0; i < VFIO_MAX_GROUPS; i++) { vfio_cfg.vfio_groups[i].fd = -1; vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].devices = 0; } /* inform the user that we are probing for VFIO */ @@ -531,7 +713,8 @@ vfio_type1_dma_map(int vfio_container_fd) if (ret) { RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " - "error %i (%s)\n", errno, strerror(errno)); + "error %i (%s)\n", errno, + strerror(errno)); return -1; } } @@ -539,6 +722,93 @@ vfio_type1_dma_map(int vfio_container_fd) return 0; } +static int +vfio_spapr_dma_map(int vfio_container_fd) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + int i, ret; + + struct vfio_iommu_spapr_register_memory reg = { + .argsz = sizeof(reg), + .flags = 0 + }; + struct vfio_iommu_spapr_tce_info info = { + .argsz = sizeof(info), + }; + struct vfio_iommu_spapr_tce_create create = { + .argsz = sizeof(create), + }; + struct vfio_iommu_spapr_tce_remove remove = { + .argsz = sizeof(remove), + }; + + /* query spapr iommu info */ + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + RTE_LOG(ERR, EAL, " cannot get iommu info, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* remove default DMA of 32 bit window */ + remove.start_addr = info.dma32_window_start; + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + if (ret) { + RTE_LOG(ERR, EAL, " cannot remove default DMA window, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* calculate window size based on number of hugepages configured */ + create.window_size = rte_eal_get_physmem_size(); + create.page_shift = __builtin_ctzll(ms->hugepage_sz); + create.levels = 2; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); + if (ret) { + RTE_LOG(ERR, EAL, " cannot create new DMA window, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + struct vfio_iommu_type1_dma_map dma_map; + + if (ms[i].addr == NULL) + break; + + reg.vaddr = (uintptr_t) ms[i].addr; + reg.size = ms[i].len; + ret = ioctl(vfio_container_fd, + VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®); + if (ret) { + RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + memset(&dma_map, 0, sizeof(dma_map)); + dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); + dma_map.vaddr = ms[i].addr_64; + dma_map.size = ms[i].len; + dma_map.iova = ms[i].phys_addr; + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | + VFIO_DMA_MAP_FLAG_WRITE; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); + + if (ret) { + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + } + + return 0; +} + static int vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) { diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 29f7f3ec..5ff63e5d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -54,6 +54,62 @@ #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU +#ifndef VFIO_SPAPR_TCE_v2_IOMMU +#define RTE_VFIO_SPAPR 7 +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + +struct vfio_iommu_spapr_register_memory { + uint32_t argsz; + uint32_t flags; + uint64_t vaddr; + uint64_t size; +}; + +struct vfio_iommu_spapr_tce_create { + uint32_t argsz; + uint32_t flags; + /* in */ + uint32_t page_shift; + uint32_t __resv1; + uint64_t window_size; + uint32_t levels; + uint32_t __resv2; + /* out */ + uint64_t start_addr; +}; + +struct vfio_iommu_spapr_tce_remove { + uint32_t argsz; + uint32_t flags; + /* in */ + uint64_t start_addr; +}; + +struct vfio_iommu_spapr_tce_ddw_info { + uint64_t pgsizes; + uint32_t max_dynamic_windows_supported; + uint32_t levels; +}; + +/* SPAPR_v2 is not present, but SPAPR might be */ +#ifndef VFIO_SPAPR_TCE_IOMMU +#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +struct vfio_iommu_spapr_tce_info { + uint32_t argsz; + uint32_t flags; + uint32_t dma32_window_start; + uint32_t dma32_window_size; + struct vfio_iommu_spapr_tce_ddw_info ddw; +}; +#endif /* VFIO_SPAPR_TCE_IOMMU */ + +#else /* VFIO_SPAPR_TCE_v2_IOMMU */ +#define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU +#endif + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) #define RTE_VFIO_NOIOMMU 8 #else @@ -78,13 +134,13 @@ int vfio_mp_sync_connect_to_primary(void); struct vfio_group { int group_no; int fd; + int devices; }; struct vfio_config { int vfio_enabled; int vfio_container_fd; - int vfio_container_has_dma; - int vfio_group_idx; + int vfio_active_groups; struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; }; @@ -130,6 +186,10 @@ vfio_get_group_no(const char *sysfs_base, int vfio_get_group_fd(int iommu_group_no); +/* remove group fd from internal VFIO group fd array */ +int +clear_group(int vfio_group_fd); + /** * Setup vfio_cfg for the device identified by its address. It discovers * the configured I/O MMU groups or sets a new one for the device. If a new @@ -140,6 +200,8 @@ vfio_get_group_fd(int iommu_group_no); int vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info); +int vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd); + int vfio_enable(const char *modname); int vfio_is_enabled(const char *modname); @@ -150,6 +212,7 @@ int vfio_mp_sync_setup(void); #define SOCKET_REQ_CONTAINER 0x100 #define SOCKET_REQ_GROUP 0x200 +#define SOCKET_CLR_GROUP 0x300 #define SOCKET_OK 0x0 #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c index fb4a2f84..7e8095cb 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c @@ -267,7 +267,7 @@ vfio_mp_sync_connect_to_primary(void) static __attribute__((noreturn)) void * vfio_mp_sync_thread(void __rte_unused * arg) { - int ret, fd, vfio_group_no; + int ret, fd, vfio_data; /* wait for requests on the socket */ for (;;) { @@ -305,13 +305,13 @@ vfio_mp_sync_thread(void __rte_unused * arg) break; case SOCKET_REQ_GROUP: /* wait for group number */ - vfio_group_no = vfio_mp_sync_receive_request(conn_sock); - if (vfio_group_no < 0) { + vfio_data = vfio_mp_sync_receive_request(conn_sock); + if (vfio_data < 0) { close(conn_sock); continue; } - fd = vfio_get_group_fd(vfio_group_no); + fd = vfio_get_group_fd(vfio_data); if (fd < 0) vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); @@ -324,6 +324,21 @@ vfio_mp_sync_thread(void __rte_unused * arg) vfio_mp_sync_send_fd(conn_sock, fd); } break; + case SOCKET_CLR_GROUP: + /* wait for group fd */ + vfio_data = vfio_mp_sync_receive_request(conn_sock); + if (vfio_data < 0) { + close(conn_sock); + continue; + } + + ret = clear_group(vfio_data); + + if (ret < 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); + else + vfio_mp_sync_send_request(conn_sock, SOCKET_OK); + break; default: vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); break; diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h index d459bf48..6daffebf 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -49,8 +49,9 @@ enum rte_intr_handle_type { RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */ RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */ - RTE_INTR_HANDLE_ALARM, /**< alarm handle */ - RTE_INTR_HANDLE_EXT, /**< external handler */ + RTE_INTR_HANDLE_ALARM, /**< alarm handle */ + RTE_INTR_HANDLE_EXT, /**< external handler */ + RTE_INTR_HANDLE_VDEV, /**< virtual device */ RTE_INTR_HANDLE_MAX }; @@ -170,6 +171,15 @@ int rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, int op, unsigned int vec, void *data); +/** + * It deletes registered eventfds. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +void +rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle); + /** * It enables the packet I/O interrupt event if it's necessary. * It creates event fd for each interrupt vector when MSIX is used, diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h index 09713b0c..2ac879fd 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -116,11 +116,10 @@ struct rte_kni_fifo { struct rte_kni_mbuf { void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); uint64_t buf_physaddr; - char pad0[2]; uint16_t data_off; /**< Start address of data in segment buffer. */ char pad1[2]; - uint8_t nb_segs; /**< Number of segments. */ - char pad4[1]; + uint16_t nb_segs; /**< Number of segments. */ + char pad4[2]; uint64_t ol_flags; /**< Offload features. */ char pad2[4]; uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map index 83721ba5..670bab3a 100644 --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -6,8 +6,6 @@ DPDK_2.0 { eal_parse_sysfs_value; eal_timer_source; lcore_config; - pci_device_list; - pci_driver_list; per_lcore__lcore_id; per_lcore__rte_errno; rte_calloc; @@ -22,12 +20,9 @@ DPDK_2.0 { rte_dump_tailq; rte_eal_alarm_cancel; rte_eal_alarm_set; - rte_eal_dev_init; rte_eal_devargs_add; rte_eal_devargs_dump; rte_eal_devargs_type_count; - rte_eal_driver_register; - rte_eal_driver_unregister; rte_eal_get_configuration; rte_eal_get_lcore_state; rte_eal_get_physmem_layout; @@ -40,18 +35,10 @@ DPDK_2.0 { rte_eal_mp_remote_launch; rte_eal_mp_wait_lcore; rte_eal_parse_devargs_str; - rte_eal_pci_dump; - rte_eal_pci_probe; - rte_eal_pci_probe_one; - rte_eal_pci_register; - rte_eal_pci_scan; - rte_eal_pci_unregister; rte_eal_process_type; rte_eal_remote_launch; rte_eal_tailq_lookup; rte_eal_tailq_register; - rte_eal_vdev_init; - rte_eal_vdev_uninit; rte_eal_wait_lcore; rte_exit; rte_free; @@ -66,11 +53,8 @@ DPDK_2.0 { rte_intr_disable; rte_intr_enable; rte_log; - rte_log_add_in_history; rte_log_cur_msg_loglevel; rte_log_cur_msg_logtype; - rte_log_dump_history; - rte_log_set_history; rte_logs; rte_malloc; rte_malloc_dump_stats; @@ -114,9 +98,6 @@ DPDK_2.0 { DPDK_2.1 { global: - rte_eal_pci_detach; - rte_eal_pci_read_config; - rte_eal_pci_write_config; rte_epoll_ctl; rte_epoll_wait; rte_intr_allow_others; @@ -146,12 +127,6 @@ DPDK_16.04 { global: rte_cpu_get_flag_name; - rte_eal_pci_ioport_map; - rte_eal_pci_ioport_read; - rte_eal_pci_ioport_unmap; - rte_eal_pci_ioport_write; - rte_eal_pci_map_device; - rte_eal_pci_unmap_device; rte_eal_primary_proc_alive; } DPDK_2.2; @@ -174,7 +149,52 @@ DPDK_16.11 { rte_delay_us_callback_register; rte_eal_dev_attach; rte_eal_dev_detach; - rte_eal_vdrv_register; - rte_eal_vdrv_unregister; } DPDK_16.07; + +DPDK_17.02 { + global: + + rte_bus_dump; + rte_bus_probe; + rte_bus_register; + rte_bus_scan; + rte_bus_unregister; + +} DPDK_16.11; + +DPDK_17.05 { + global: + + rte_cpu_is_supported; + rte_intr_free_epoll_fd; + rte_log_dump; + rte_log_get_global_level; + rte_log_register; + rte_log_set_global_level; + rte_log_set_level; + rte_log_set_level_regexp; + rte_pci_detach; + rte_pci_dump; + rte_pci_ioport_map; + rte_pci_ioport_read; + rte_pci_ioport_unmap; + rte_pci_ioport_write; + rte_pci_map_device; + rte_pci_probe; + rte_pci_probe_one; + rte_pci_read_config; + rte_pci_register; + rte_pci_scan; + rte_pci_unmap_device; + rte_pci_unregister; + rte_pci_write_config; + rte_vdev_init; + rte_vdev_register; + rte_vdev_uninit; + rte_vdev_unregister; + vfio_get_container_fd; + vfio_get_group_fd; + vfio_get_group_no; + +} DPDK_17.02; -- cgit 1.2.3-korg