diff options
Diffstat (limited to 'drivers/bus/pci')
-rw-r--r-- | drivers/bus/pci/Makefile | 5 | ||||
-rw-r--r-- | drivers/bus/pci/bsd/pci.c | 6 | ||||
-rw-r--r-- | drivers/bus/pci/linux/Makefile | 2 | ||||
-rw-r--r-- | drivers/bus/pci/linux/pci.c | 46 | ||||
-rw-r--r-- | drivers/bus/pci/linux/pci_vfio.c | 268 | ||||
-rw-r--r-- | drivers/bus/pci/meson.build | 9 | ||||
-rw-r--r-- | drivers/bus/pci/pci_common.c | 143 | ||||
-rw-r--r-- | drivers/bus/pci/pci_common_uio.c | 33 | ||||
-rw-r--r-- | drivers/bus/pci/pci_params.c | 78 | ||||
-rw-r--r-- | drivers/bus/pci/private.h | 39 | ||||
-rw-r--r-- | drivers/bus/pci/rte_bus_pci.h | 10 |
11 files changed, 574 insertions, 65 deletions
diff --git a/drivers/bus/pci/Makefile b/drivers/bus/pci/Makefile index cf373068..f33e0120 100644 --- a/drivers/bus/pci/Makefile +++ b/drivers/bus/pci/Makefile @@ -4,7 +4,7 @@ include $(RTE_SDK)/mk/rte.vars.mk LIB = librte_bus_pci.a -LIBABIVER := 1 +LIBABIVER := 2 EXPORT_MAP := rte_bus_pci_version.map CFLAGS := -I$(SRCDIR) $(CFLAGS) @@ -26,10 +26,11 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_eal/$(SYSTEM)app/eal CFLAGS += -DALLOW_EXPERIMENTAL_API LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring -LDLIBS += -lrte_ethdev -lrte_pci +LDLIBS += -lrte_ethdev -lrte_pci -lrte_kvargs include $(RTE_SDK)/drivers/bus/pci/$(SYSTEM)/Makefile SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) := $(addprefix $(SYSTEM)/,$(SRCS)) +SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_params.c SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_common.c SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_common_uio.c diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c index 655b34b7..d09f8ee5 100644 --- a/drivers/bus/pci/bsd/pci.c +++ b/drivers/bus/pci/bsd/pci.c @@ -223,6 +223,8 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) } memset(dev, 0, sizeof(*dev)); + dev->device.bus = &rte_pci_bus.bus; + dev->addr.domain = conf->pc_sel.pc_domain; dev->addr.bus = conf->pc_sel.pc_bus; dev->addr.devid = conf->pc_sel.pc_dev; @@ -439,6 +441,8 @@ int rte_pci_read_config(const struct rte_pci_device *dev, { int fd = -1; int size; + /* Copy Linux implementation's behaviour */ + const int return_len = len; struct pci_io pi = { .pi_sel = { .pc_domain = dev->addr.domain, @@ -469,7 +473,7 @@ int rte_pci_read_config(const struct rte_pci_device *dev, } close(fd); - return 0; + return return_len; error: if (fd >= 0) diff --git a/drivers/bus/pci/linux/Makefile b/drivers/bus/pci/linux/Makefile index 96ea1d54..90404468 100644 --- a/drivers/bus/pci/linux/Makefile +++ b/drivers/bus/pci/linux/Makefile @@ -4,5 +4,3 @@ SRCS += pci.c SRCS += pci_uio.c SRCS += pci_vfio.c - -CFLAGS += -D_GNU_SOURCE diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index 04648ac9..45c24ef7 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -119,7 +119,7 @@ rte_pci_unmap_device(struct rte_pci_device *dev) static int find_max_end_va(const struct rte_memseg_list *msl, void *arg) { - size_t sz = msl->memseg_arr.len * msl->page_sz; + size_t sz = msl->len; void *end_va = RTE_PTR_ADD(msl->base_va, sz); void **max_va = arg; @@ -228,6 +228,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) return -1; memset(dev, 0, sizeof(*dev)); + dev->device.bus = &rte_pci_bus.bus; dev->addr = *addr; /* get vendor id */ @@ -588,10 +589,8 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev) fclose(fp); mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1; - if (mgaw < X86_VA_WIDTH) - return false; - return true; + return rte_eal_check_dma_mask(mgaw) == 0 ? true : false; } #elif defined(RTE_ARCH_PPC_64) static bool @@ -620,8 +619,11 @@ pci_devices_iommu_support_va(void) FOREACH_DEVICE_ON_PCIBUS(dev) { if (!rte_pci_match(drv, dev)) continue; - if (!pci_one_device_iommu_support_va(dev)) - return false; + /* + * just one PCI device needs to be checked out because + * the IOMMU hardware is the same for all of them. + */ + return pci_one_device_iommu_support_va(dev); } } return true; @@ -672,23 +674,21 @@ rte_pci_get_iommu_class(void) int rte_pci_read_config(const struct rte_pci_device *device, void *buf, size_t len, off_t offset) { + char devname[RTE_DEV_NAME_MAX_LEN] = ""; const struct rte_intr_handle *intr_handle = &device->intr_handle; - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: + switch (device->kdrv) { + case RTE_KDRV_IGB_UIO: return pci_uio_read_config(intr_handle, buf, len, offset); - #ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: + case RTE_KDRV_VFIO: return pci_vfio_read_config(intr_handle, buf, len, offset); #endif default: + rte_pci_device_name(&device->addr, devname, + RTE_DEV_NAME_MAX_LEN); RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); + "Unknown driver type for %s\n", devname); return -1; } } @@ -697,23 +697,21 @@ int rte_pci_read_config(const struct rte_pci_device *device, int rte_pci_write_config(const struct rte_pci_device *device, const void *buf, size_t len, off_t offset) { + char devname[RTE_DEV_NAME_MAX_LEN] = ""; const struct rte_intr_handle *intr_handle = &device->intr_handle; - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: + switch (device->kdrv) { + case RTE_KDRV_IGB_UIO: return pci_uio_write_config(intr_handle, buf, len, offset); - #ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: + case RTE_KDRV_VFIO: return pci_vfio_write_config(intr_handle, buf, len, offset); #endif default: + rte_pci_device_name(&device->addr, devname, + RTE_DEV_NAME_MAX_LEN); RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); + "Unknown driver type for %s\n", devname); return -1; } } diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index 686386d6..305cc060 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -17,6 +17,8 @@ #include <rte_eal_memconfig.h> #include <rte_malloc.h> #include <rte_vfio.h> +#include <rte_eal.h> +#include <rte_bus.h> #include "eal_filesystem.h" @@ -35,7 +37,9 @@ #ifdef VFIO_PRESENT +#ifndef PAGE_SIZE #define PAGE_SIZE (sysconf(_SC_PAGESIZE)) +#endif #define PAGE_MASK (~(PAGE_SIZE - 1)) static struct rte_tailq_elem rte_vfio_tailq = { @@ -277,6 +281,114 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE +static void +pci_vfio_req_handler(void *param) +{ + struct rte_bus *bus; + int ret; + struct rte_device *device = (struct rte_device *)param; + + bus = rte_bus_find_by_device(device); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n", + device->name); + return; + } + + /* + * vfio kernel module request user space to release allocated + * resources before device be deleted in kernel, so it can directly + * call the vfio bus hot-unplug handler to process it. + */ + ret = bus->hot_unplug_handler(device); + if (ret) + RTE_LOG(ERR, EAL, + "Can not handle hot-unplug for device (%s)\n", + device->name); +} + +/* enable notifier (only enable req now) */ +static int +pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd) +{ + int ret; + int fd = -1; + + /* set up an eventfd for req notifier */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot set up eventfd, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + + dev->vfio_req_intr_handle.fd = fd; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_VFIO_REQ; + dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd; + + ret = rte_intr_callback_register(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret) { + RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n"); + goto error; + } + + ret = rte_intr_enable(&dev->vfio_req_intr_handle); + if (ret) { + RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n"); + ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret < 0) + RTE_LOG(ERR, EAL, + "Fail to unregister req notifier handler.\n"); + goto error; + } + + return 0; +error: + close(fd); + + dev->vfio_req_intr_handle.fd = -1; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + dev->vfio_req_intr_handle.vfio_dev_fd = -1; + + return -1; +} + +/* disable notifier (only disable req now) */ +static int +pci_vfio_disable_notifier(struct rte_pci_device *dev) +{ + int ret; + + ret = rte_intr_disable(&dev->vfio_req_intr_handle); + if (ret) { + RTE_LOG(ERR, EAL, "fail to disable req notifier.\n"); + return -1; + } + + ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret < 0) { + RTE_LOG(ERR, EAL, + "fail to unregister req notifier handler.\n"); + return -1; + } + + close(dev->vfio_req_intr_handle.fd); + + dev->vfio_req_intr_handle.fd = -1; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + dev->vfio_req_intr_handle.vfio_dev_fd = -1; + + return 0; +} +#endif + static int pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index) { @@ -415,6 +527,93 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, return 0; } +/* + * region info may contain capability headers, so we need to keep reallocating + * the memory until we match allocated memory size with argsz. + */ +static int +pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info, + int region) +{ + struct vfio_region_info *ri; + size_t argsz = sizeof(*ri); + int ret; + + ri = malloc(sizeof(*ri)); + if (ri == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n"); + return -1; + } +again: + memset(ri, 0, argsz); + ri->argsz = argsz; + ri->index = region; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri); + if (ret < 0) { + free(ri); + return ret; + } + if (ri->argsz != argsz) { + struct vfio_region_info *tmp; + + argsz = ri->argsz; + tmp = realloc(ri, argsz); + + if (tmp == NULL) { + /* realloc failed but the ri is still there */ + free(ri); + RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n"); + return -1; + } + ri = tmp; + goto again; + } + *info = ri; + + return 0; +} + +static struct vfio_info_cap_header * +pci_vfio_info_cap(struct vfio_region_info *info, int cap) +{ + struct vfio_info_cap_header *h; + size_t offset; + + if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) { + /* VFIO info does not advertise capabilities */ + return NULL; + } + + offset = VFIO_CAP_OFFSET(info); + while (offset != 0) { + h = RTE_PTR_ADD(info, offset); + if (h->id == cap) + return h; + offset = h->next; + } + return NULL; +} + +static int +pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) +{ + struct vfio_region_info *info; + int ret; + + ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region); + if (ret < 0) + return -1; + + ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL; + + /* cleanup */ + free(info); + + return ret; +} + + static int pci_vfio_map_resource_primary(struct rte_pci_device *dev) { @@ -430,6 +629,9 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) struct pci_map *maps; dev->intr_handle.fd = -1; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.fd = -1; +#endif /* store PCI address string */ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, @@ -464,56 +666,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); - goto err_vfio_dev_fd; + goto err_vfio_res; + } + /* if we found our MSI-X BAR region, check if we can mmap it */ + if (vfio_res->msix_table.bar_index != -1) { + int ret = pci_vfio_msix_is_mappable(vfio_dev_fd, + vfio_res->msix_table.bar_index); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n"); + goto err_vfio_res; + } else if (ret != 0) { + /* we can map it, so we don't care where it is */ + RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n"); + vfio_res->msix_table.bar_index = -1; + } } for (i = 0; i < (int) vfio_res->nb_maps; i++) { - struct vfio_region_info reg = { .argsz = sizeof(reg) }; + struct vfio_region_info *reg = NULL; void *bar_addr; - reg.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); - if (ret) { + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); + if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get device region info " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); + "error %i (%s)\n", pci_addr, errno, + strerror(errno)); goto err_vfio_res; } /* chk for io port region */ ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i); - if (ret < 0) + if (ret < 0) { + free(reg); goto err_vfio_res; - else if (ret) { + } else if (ret) { RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n", i); + free(reg); continue; } /* skip non-mmapable BARs */ - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) + if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) { + free(reg); continue; + } /* try mapping somewhere close to the end of hugepages */ if (pci_map_addr == NULL) pci_map_addr = pci_find_max_end_va(); bar_addr = pci_map_addr; - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size); maps[i].addr = bar_addr; - maps[i].offset = reg.offset; - maps[i].size = reg.size; + maps[i].offset = reg->offset; + maps[i].size = reg->size; maps[i].path = NULL; /* vfio doesn't have per-resource paths */ ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0); if (ret < 0) { RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, strerror(errno)); + free(reg); goto err_vfio_res; } dev->mem_resource[i].addr = maps[i].addr; + + free(reg); } if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) { @@ -521,6 +742,13 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) goto err_vfio_res; } +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) { + RTE_LOG(ERR, EAL, "Error setting up notifier!\n"); + goto err_vfio_res; + } + +#endif TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); return 0; @@ -546,6 +774,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) struct pci_map *maps; dev->intr_handle.fd = -1; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.fd = -1; +#endif /* store PCI address string */ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, @@ -586,6 +817,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) /* we need save vfio_dev_fd, so it can be used during release */ dev->intr_handle.vfio_dev_fd = vfio_dev_fd; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd; +#endif return 0; err_vfio_dev_fd: @@ -658,6 +892,14 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, loc->domain, loc->bus, loc->devid, loc->function); +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + ret = pci_vfio_disable_notifier(dev); + if (ret) { + RTE_LOG(ERR, EAL, "fail to disable req notifier.\n"); + return -1; + } + +#endif if (close(dev->intr_handle.fd) < 0) { RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", pci_addr); diff --git a/drivers/bus/pci/meson.build b/drivers/bus/pci/meson.build index 72939e59..a3140ff9 100644 --- a/drivers/bus/pci/meson.build +++ b/drivers/bus/pci/meson.build @@ -1,15 +1,18 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation +version = 2 + deps += ['pci'] install_headers('rte_bus_pci.h') -sources = files('pci_common.c', 'pci_common_uio.c') +sources = files('pci_common.c', + 'pci_common_uio.c', + 'pci_params.c') if host_machine.system() == 'linux' sources += files('linux/pci.c', 'linux/pci_uio.c', 'linux/pci_vfio.c') includes += include_directories('linux') - cflags += ['-D_GNU_SOURCE'] else sources += files('bsd/pci.c') includes += include_directories('bsd') @@ -17,3 +20,5 @@ endif # memseg walk is not part of stable API yet allow_experimental_apis = true + +deps += ['kvargs'] diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c index 7736b3f9..6276e5d6 100644 --- a/drivers/bus/pci/pci_common.c +++ b/drivers/bus/pci/pci_common.c @@ -6,6 +6,7 @@ #include <string.h> #include <inttypes.h> #include <stdint.h> +#include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <sys/queue.h> @@ -23,12 +24,11 @@ #include <rte_string_fns.h> #include <rte_common.h> #include <rte_devargs.h> +#include <rte_vfio.h> #include "private.h" -extern struct rte_pci_bus rte_pci_bus; - #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" const char *rte_pci_get_sysfs_path(void) @@ -123,6 +123,7 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev) { int ret; + bool already_probed; struct rte_pci_addr *loc; if ((dr == NULL) || (dev == NULL)) @@ -153,6 +154,13 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, dev->device.numa_node = 0; } + already_probed = rte_dev_is_probed(&dev->device); + if (already_probed && !(dr->drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) { + RTE_LOG(DEBUG, EAL, "Device %s is already probed\n", + dev->device.name); + return -EEXIST; + } + RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, dev->id.device_id, dr->driver.name); @@ -161,24 +169,24 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, * This needs to be before rte_pci_map_device(), as it enables to use * driver flags for adjusting configuration. */ - dev->driver = dr; - dev->device.driver = &dr->driver; + if (!already_probed) + dev->driver = dr; - if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { + if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) { /* map resources for devices that use igb_uio */ ret = rte_pci_map_device(dev); if (ret != 0) { dev->driver = NULL; - dev->device.driver = NULL; return ret; } } /* call the driver probe() function */ ret = dr->probe(dr, dev); + if (already_probed) + return ret; /* no rollback if already succeeded earlier */ if (ret) { dev->driver = NULL; - dev->device.driver = NULL; if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) && /* Don't unmap if device is unsupported and * driver needs mapped resources. @@ -186,6 +194,8 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, !(ret > 0 && (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES))) rte_pci_unmap_device(dev); + } else { + dev->device.driver = &dr->driver; } return ret; @@ -233,7 +243,7 @@ rte_pci_detach_dev(struct rte_pci_device *dev) /* * If vendor/device ID match, call the probe() function of all - * registered driver for the given device. Return -1 if initialization + * registered driver for the given device. Return < 0 if initialization * failed, return 1 if no driver is found for this device. */ static int @@ -243,17 +253,13 @@ pci_probe_all_drivers(struct rte_pci_device *dev) int rc = 0; if (dev == NULL) - return -1; - - /* Check if a driver is already loaded */ - if (dev->driver != NULL) - return 0; + return -EINVAL; FOREACH_DRIVER_ON_PCIBUS(dr) { rc = rte_pci_probe_one_driver(dr, dev); if (rc < 0) /* negative value is an error */ - return -1; + return rc; if (rc > 0) /* positive value means driver doesn't support it */ continue; @@ -290,11 +296,14 @@ rte_pci_probe(void) devargs->policy == RTE_DEV_WHITELISTED) ret = pci_probe_all_drivers(dev); if (ret < 0) { - RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT - " cannot be used\n", dev->addr.domain, dev->addr.bus, - dev->addr.devid, dev->addr.function); - rte_errno = errno; - failed++; + if (ret != -EEXIST) { + RTE_LOG(ERR, EAL, "Requested device " + PCI_PRI_FMT " cannot be used\n", + dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + rte_errno = errno; + failed++; + } ret = 0; } } @@ -405,6 +414,98 @@ pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, return NULL; } +/* + * find the device which encounter the failure, by iterate over all device on + * PCI bus to check if the memory failure address is located in the range + * of the BARs of the device. + */ +static struct rte_pci_device * +pci_find_device_by_addr(const void *failure_addr) +{ + struct rte_pci_device *pdev = NULL; + uint64_t check_point, start, end, len; + int i; + + check_point = (uint64_t)(uintptr_t)failure_addr; + + FOREACH_DEVICE_ON_PCIBUS(pdev) { + for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) { + start = (uint64_t)(uintptr_t)pdev->mem_resource[i].addr; + len = pdev->mem_resource[i].len; + end = start + len; + if (check_point >= start && check_point < end) { + RTE_LOG(DEBUG, EAL, "Failure address %16.16" + PRIx64" belongs to device %s!\n", + check_point, pdev->device.name); + return pdev; + } + } + } + return NULL; +} + +static int +pci_hot_unplug_handler(struct rte_device *dev) +{ + struct rte_pci_device *pdev = NULL; + int ret = 0; + + pdev = RTE_DEV_TO_PCI(dev); + if (!pdev) + return -1; + + switch (pdev->kdrv) { +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + case RTE_KDRV_VFIO: + /* + * vfio kernel module guaranty the pci device would not be + * deleted until the user space release the resource, so no + * need to remap BARs resource here, just directly notify + * the req event to the user space to handle it. + */ + rte_dev_event_callback_process(dev->name, + RTE_DEV_EVENT_REMOVE); + break; +#endif + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + case RTE_KDRV_NIC_UIO: + /* BARs resource is invalid, remap it to be safe. */ + ret = pci_uio_remap_resource(pdev); + break; + default: + RTE_LOG(DEBUG, EAL, + "Not managed by a supported kernel driver, skipped\n"); + ret = -1; + break; + } + + return ret; +} + +static int +pci_sigbus_handler(const void *failure_addr) +{ + struct rte_pci_device *pdev = NULL; + int ret = 0; + + pdev = pci_find_device_by_addr(failure_addr); + if (!pdev) { + /* It is a generic sigbus error, no bus would handle it. */ + ret = 1; + } else { + /* The sigbus error is caused of hot-unplug. */ + ret = pci_hot_unplug_handler(&pdev->device); + if (ret) { + RTE_LOG(ERR, EAL, + "Failed to handle hot-unplug for device %s", + pdev->name); + ret = -1; + } + } + return ret; +} + static int pci_plug(struct rte_device *dev) { @@ -421,6 +522,7 @@ pci_unplug(struct rte_device *dev) ret = rte_pci_detach_dev(pdev); if (ret == 0) { rte_pci_remove_device(pdev); + rte_devargs_remove(dev->devargs); free(pdev); } return ret; @@ -435,6 +537,9 @@ struct rte_pci_bus rte_pci_bus = { .unplug = pci_unplug, .parse = pci_parse, .get_iommu_class = rte_pci_get_iommu_class, + .dev_iterate = rte_pci_dev_iterate, + .hot_unplug_handler = pci_hot_unplug_handler, + .sigbus_handler = pci_sigbus_handler, }, .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c index 54bc20b5..7ea73dbc 100644 --- a/drivers/bus/pci/pci_common_uio.c +++ b/drivers/bus/pci/pci_common_uio.c @@ -146,6 +146,39 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res) } } +/* remap the PCI resource of a PCI device in anonymous virtual memory */ +int +pci_uio_remap_resource(struct rte_pci_device *dev) +{ + int i; + void *map_address; + + if (dev == NULL) + return -1; + + /* Remap all BARs */ + for (i = 0; i != PCI_MAX_RESOURCE; i++) { + /* skip empty BAR */ + if (dev->mem_resource[i].phys_addr == 0) + continue; + map_address = mmap(dev->mem_resource[i].addr, + (size_t)dev->mem_resource[i].len, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (map_address == MAP_FAILED) { + RTE_LOG(ERR, EAL, + "Cannot remap resource for device %s\n", + dev->name); + return -1; + } + RTE_LOG(INFO, EAL, + "Successful remap resource for device %s\n", + dev->name); + } + + return 0; +} + static struct mapped_pci_resource * pci_uio_find_resource(struct rte_pci_device *dev) { diff --git a/drivers/bus/pci/pci_params.c b/drivers/bus/pci/pci_params.c new file mode 100644 index 00000000..3192e9c9 --- /dev/null +++ b/drivers/bus/pci/pci_params.c @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 GaĆ«tan Rivet + */ + +#include <rte_bus.h> +#include <rte_bus_pci.h> +#include <rte_dev.h> +#include <rte_errno.h> +#include <rte_kvargs.h> +#include <rte_pci.h> + +#include "private.h" + +enum pci_params { + RTE_PCI_PARAM_ADDR, + RTE_PCI_PARAM_MAX, +}; + +static const char * const pci_params_keys[] = { + [RTE_PCI_PARAM_ADDR] = "addr", + [RTE_PCI_PARAM_MAX] = NULL, +}; + +static int +pci_addr_kv_cmp(const char *key __rte_unused, + const char *value, + void *_addr2) +{ + struct rte_pci_addr _addr1; + struct rte_pci_addr *addr1 = &_addr1; + struct rte_pci_addr *addr2 = _addr2; + + if (rte_pci_addr_parse(value, addr1)) + return -1; + return -abs(rte_pci_addr_cmp(addr1, addr2)); +} + +static int +pci_dev_match(const struct rte_device *dev, + const void *_kvlist) +{ + const struct rte_kvargs *kvlist = _kvlist; + const struct rte_pci_device *pdev; + + if (kvlist == NULL) + /* Empty string matches everything. */ + return 0; + pdev = RTE_DEV_TO_PCI_CONST(dev); + /* if any field does not match. */ + if (rte_kvargs_process(kvlist, pci_params_keys[RTE_PCI_PARAM_ADDR], + &pci_addr_kv_cmp, + (void *)(intptr_t)&pdev->addr)) + return 1; + return 0; +} + +void * +rte_pci_dev_iterate(const void *start, + const char *str, + const struct rte_dev_iterator *it __rte_unused) +{ + rte_bus_find_device_t find_device; + struct rte_kvargs *kvargs = NULL; + struct rte_device *dev; + + if (str != NULL) { + kvargs = rte_kvargs_parse(str, pci_params_keys); + if (kvargs == NULL) { + RTE_LOG(ERR, EAL, "cannot parse argument list\n"); + rte_errno = EINVAL; + return NULL; + } + } + find_device = rte_pci_bus.bus.find_device; + dev = find_device(start, pci_dev_match, kvargs); + rte_kvargs_free(kvargs); + return dev; +} diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h index 8ddd03e1..13c3324b 100644 --- a/drivers/bus/pci/private.h +++ b/drivers/bus/pci/private.h @@ -10,9 +10,13 @@ #include <rte_pci.h> #include <rte_bus_pci.h> +extern struct rte_pci_bus rte_pci_bus; + struct rte_pci_driver; struct rte_pci_device; +extern struct rte_pci_bus rte_pci_bus; + /** * Probe the PCI bus * @@ -123,6 +127,18 @@ void pci_uio_free_resource(struct rte_pci_device *dev, struct mapped_pci_resource *uio_res); /** + * Remap the PCI resource of a PCI device in anonymous virtual memory. + * + * @param dev + * Point to the struct rte pci device. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +pci_uio_remap_resource(struct rte_pci_device *dev); + +/** * Map device memory to uio resource * * This function is private to EAL. @@ -166,4 +182,27 @@ rte_pci_match(const struct rte_pci_driver *pci_drv, enum rte_iova_mode rte_pci_get_iommu_class(void); +/* + * Iterate over internal devices, + * matching any device against the provided + * string. + * + * @param start + * Iteration starting point. + * + * @param str + * Device string to match against. + * + * @param it + * (unused) iterator structure. + * + * @return + * A pointer to the next matching device if any. + * NULL otherwise. + */ +void * +rte_pci_dev_iterate(const void *start, + const char *str, + const struct rte_dev_iterator *it); + #endif /* _PCI_PRIVATE_H_ */ diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h index 0d1955ff..f0d6d81c 100644 --- a/drivers/bus/pci/rte_bus_pci.h +++ b/drivers/bus/pci/rte_bus_pci.h @@ -62,10 +62,12 @@ struct rte_pci_device { struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */ struct rte_intr_handle intr_handle; /**< Interrupt handle */ - struct rte_pci_driver *driver; /**< Associated driver */ + struct rte_pci_driver *driver; /**< PCI driver used in probing */ uint16_t max_vfs; /**< sriov enable if not zero */ enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */ + struct rte_intr_handle vfio_req_intr_handle; + /**< Handler of VFIO request interrupt */ }; /** @@ -121,7 +123,7 @@ struct rte_pci_driver { pci_probe_t *probe; /**< Device Probe function. */ pci_remove_t *remove; /**< Device Remove function. */ const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ - uint32_t drv_flags; /**< Flags contolling handling of device. */ + uint32_t drv_flags; /**< Flags RTE_PCI_DRV_*. */ }; /** @@ -137,6 +139,8 @@ struct rte_pci_bus { #define RTE_PCI_DRV_NEED_MAPPING 0x0001 /** Device needs PCI BAR mapping with enabled write combining (wc) */ #define RTE_PCI_DRV_WC_ACTIVATE 0x0002 +/** Device already probed can be probed again to check for new ports. */ +#define RTE_PCI_DRV_PROBE_AGAIN 0x0004 /** Device driver supports link state interrupt */ #define RTE_PCI_DRV_INTR_LSC 0x0008 /** Device driver supports device removal interrupt */ @@ -219,6 +223,8 @@ void rte_pci_unregister(struct rte_pci_driver *driver); * The length of the data buffer. * @param offset * The offset into PCI config space + * @return + * Number of bytes read on success, negative on error. */ int rte_pci_read_config(const struct rte_pci_device *device, void *buf, size_t len, off_t offset); |