diff options
Diffstat (limited to 'drivers/bus/pci/linux')
-rw-r--r-- | drivers/bus/pci/linux/Makefile | 2 | ||||
-rw-r--r-- | drivers/bus/pci/linux/pci.c | 46 | ||||
-rw-r--r-- | drivers/bus/pci/linux/pci_vfio.c | 268 |
3 files changed, 277 insertions, 39 deletions
diff --git a/drivers/bus/pci/linux/Makefile b/drivers/bus/pci/linux/Makefile index 96ea1d54..90404468 100644 --- a/drivers/bus/pci/linux/Makefile +++ b/drivers/bus/pci/linux/Makefile @@ -4,5 +4,3 @@ SRCS += pci.c SRCS += pci_uio.c SRCS += pci_vfio.c - -CFLAGS += -D_GNU_SOURCE diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index 04648ac9..45c24ef7 100644 --- a/drivers/bus/pci/linux/pci.c +++ b/drivers/bus/pci/linux/pci.c @@ -119,7 +119,7 @@ rte_pci_unmap_device(struct rte_pci_device *dev) static int find_max_end_va(const struct rte_memseg_list *msl, void *arg) { - size_t sz = msl->memseg_arr.len * msl->page_sz; + size_t sz = msl->len; void *end_va = RTE_PTR_ADD(msl->base_va, sz); void **max_va = arg; @@ -228,6 +228,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) return -1; memset(dev, 0, sizeof(*dev)); + dev->device.bus = &rte_pci_bus.bus; dev->addr = *addr; /* get vendor id */ @@ -588,10 +589,8 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev) fclose(fp); mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1; - if (mgaw < X86_VA_WIDTH) - return false; - return true; + return rte_eal_check_dma_mask(mgaw) == 0 ? true : false; } #elif defined(RTE_ARCH_PPC_64) static bool @@ -620,8 +619,11 @@ pci_devices_iommu_support_va(void) FOREACH_DEVICE_ON_PCIBUS(dev) { if (!rte_pci_match(drv, dev)) continue; - if (!pci_one_device_iommu_support_va(dev)) - return false; + /* + * just one PCI device needs to be checked out because + * the IOMMU hardware is the same for all of them. + */ + return pci_one_device_iommu_support_va(dev); } } return true; @@ -672,23 +674,21 @@ rte_pci_get_iommu_class(void) int rte_pci_read_config(const struct rte_pci_device *device, void *buf, size_t len, off_t offset) { + char devname[RTE_DEV_NAME_MAX_LEN] = ""; const struct rte_intr_handle *intr_handle = &device->intr_handle; - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: + switch (device->kdrv) { + case RTE_KDRV_IGB_UIO: return pci_uio_read_config(intr_handle, buf, len, offset); - #ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: + case RTE_KDRV_VFIO: return pci_vfio_read_config(intr_handle, buf, len, offset); #endif default: + rte_pci_device_name(&device->addr, devname, + RTE_DEV_NAME_MAX_LEN); RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); + "Unknown driver type for %s\n", devname); return -1; } } @@ -697,23 +697,21 @@ int rte_pci_read_config(const struct rte_pci_device *device, int rte_pci_write_config(const struct rte_pci_device *device, const void *buf, size_t len, off_t offset) { + char devname[RTE_DEV_NAME_MAX_LEN] = ""; const struct rte_intr_handle *intr_handle = &device->intr_handle; - switch (intr_handle->type) { - case RTE_INTR_HANDLE_UIO: - case RTE_INTR_HANDLE_UIO_INTX: + switch (device->kdrv) { + case RTE_KDRV_IGB_UIO: return pci_uio_write_config(intr_handle, buf, len, offset); - #ifdef VFIO_PRESENT - case RTE_INTR_HANDLE_VFIO_MSIX: - case RTE_INTR_HANDLE_VFIO_MSI: - case RTE_INTR_HANDLE_VFIO_LEGACY: + case RTE_KDRV_VFIO: return pci_vfio_write_config(intr_handle, buf, len, offset); #endif default: + rte_pci_device_name(&device->addr, devname, + RTE_DEV_NAME_MAX_LEN); RTE_LOG(ERR, EAL, - "Unknown handle type of fd %d\n", - intr_handle->fd); + "Unknown driver type for %s\n", devname); return -1; } } diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index 686386d6..305cc060 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -17,6 +17,8 @@ #include <rte_eal_memconfig.h> #include <rte_malloc.h> #include <rte_vfio.h> +#include <rte_eal.h> +#include <rte_bus.h> #include "eal_filesystem.h" @@ -35,7 +37,9 @@ #ifdef VFIO_PRESENT +#ifndef PAGE_SIZE #define PAGE_SIZE (sysconf(_SC_PAGESIZE)) +#endif #define PAGE_MASK (~(PAGE_SIZE - 1)) static struct rte_tailq_elem rte_vfio_tailq = { @@ -277,6 +281,114 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE +static void +pci_vfio_req_handler(void *param) +{ + struct rte_bus *bus; + int ret; + struct rte_device *device = (struct rte_device *)param; + + bus = rte_bus_find_by_device(device); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n", + device->name); + return; + } + + /* + * vfio kernel module request user space to release allocated + * resources before device be deleted in kernel, so it can directly + * call the vfio bus hot-unplug handler to process it. + */ + ret = bus->hot_unplug_handler(device); + if (ret) + RTE_LOG(ERR, EAL, + "Can not handle hot-unplug for device (%s)\n", + device->name); +} + +/* enable notifier (only enable req now) */ +static int +pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd) +{ + int ret; + int fd = -1; + + /* set up an eventfd for req notifier */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot set up eventfd, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + + dev->vfio_req_intr_handle.fd = fd; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_VFIO_REQ; + dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd; + + ret = rte_intr_callback_register(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret) { + RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n"); + goto error; + } + + ret = rte_intr_enable(&dev->vfio_req_intr_handle); + if (ret) { + RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n"); + ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret < 0) + RTE_LOG(ERR, EAL, + "Fail to unregister req notifier handler.\n"); + goto error; + } + + return 0; +error: + close(fd); + + dev->vfio_req_intr_handle.fd = -1; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + dev->vfio_req_intr_handle.vfio_dev_fd = -1; + + return -1; +} + +/* disable notifier (only disable req now) */ +static int +pci_vfio_disable_notifier(struct rte_pci_device *dev) +{ + int ret; + + ret = rte_intr_disable(&dev->vfio_req_intr_handle); + if (ret) { + RTE_LOG(ERR, EAL, "fail to disable req notifier.\n"); + return -1; + } + + ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret < 0) { + RTE_LOG(ERR, EAL, + "fail to unregister req notifier handler.\n"); + return -1; + } + + close(dev->vfio_req_intr_handle.fd); + + dev->vfio_req_intr_handle.fd = -1; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + dev->vfio_req_intr_handle.vfio_dev_fd = -1; + + return 0; +} +#endif + static int pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index) { @@ -415,6 +527,93 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, return 0; } +/* + * region info may contain capability headers, so we need to keep reallocating + * the memory until we match allocated memory size with argsz. + */ +static int +pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info, + int region) +{ + struct vfio_region_info *ri; + size_t argsz = sizeof(*ri); + int ret; + + ri = malloc(sizeof(*ri)); + if (ri == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n"); + return -1; + } +again: + memset(ri, 0, argsz); + ri->argsz = argsz; + ri->index = region; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri); + if (ret < 0) { + free(ri); + return ret; + } + if (ri->argsz != argsz) { + struct vfio_region_info *tmp; + + argsz = ri->argsz; + tmp = realloc(ri, argsz); + + if (tmp == NULL) { + /* realloc failed but the ri is still there */ + free(ri); + RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n"); + return -1; + } + ri = tmp; + goto again; + } + *info = ri; + + return 0; +} + +static struct vfio_info_cap_header * +pci_vfio_info_cap(struct vfio_region_info *info, int cap) +{ + struct vfio_info_cap_header *h; + size_t offset; + + if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) { + /* VFIO info does not advertise capabilities */ + return NULL; + } + + offset = VFIO_CAP_OFFSET(info); + while (offset != 0) { + h = RTE_PTR_ADD(info, offset); + if (h->id == cap) + return h; + offset = h->next; + } + return NULL; +} + +static int +pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) +{ + struct vfio_region_info *info; + int ret; + + ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region); + if (ret < 0) + return -1; + + ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL; + + /* cleanup */ + free(info); + + return ret; +} + + static int pci_vfio_map_resource_primary(struct rte_pci_device *dev) { @@ -430,6 +629,9 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) struct pci_map *maps; dev->intr_handle.fd = -1; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.fd = -1; +#endif /* store PCI address string */ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, @@ -464,56 +666,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); - goto err_vfio_dev_fd; + goto err_vfio_res; + } + /* if we found our MSI-X BAR region, check if we can mmap it */ + if (vfio_res->msix_table.bar_index != -1) { + int ret = pci_vfio_msix_is_mappable(vfio_dev_fd, + vfio_res->msix_table.bar_index); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n"); + goto err_vfio_res; + } else if (ret != 0) { + /* we can map it, so we don't care where it is */ + RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n"); + vfio_res->msix_table.bar_index = -1; + } } for (i = 0; i < (int) vfio_res->nb_maps; i++) { - struct vfio_region_info reg = { .argsz = sizeof(reg) }; + struct vfio_region_info *reg = NULL; void *bar_addr; - reg.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); - if (ret) { + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); + if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get device region info " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); + "error %i (%s)\n", pci_addr, errno, + strerror(errno)); goto err_vfio_res; } /* chk for io port region */ ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i); - if (ret < 0) + if (ret < 0) { + free(reg); goto err_vfio_res; - else if (ret) { + } else if (ret) { RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n", i); + free(reg); continue; } /* skip non-mmapable BARs */ - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) + if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) { + free(reg); continue; + } /* try mapping somewhere close to the end of hugepages */ if (pci_map_addr == NULL) pci_map_addr = pci_find_max_end_va(); bar_addr = pci_map_addr; - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size); maps[i].addr = bar_addr; - maps[i].offset = reg.offset; - maps[i].size = reg.size; + maps[i].offset = reg->offset; + maps[i].size = reg->size; maps[i].path = NULL; /* vfio doesn't have per-resource paths */ ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0); if (ret < 0) { RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, strerror(errno)); + free(reg); goto err_vfio_res; } dev->mem_resource[i].addr = maps[i].addr; + + free(reg); } if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) { @@ -521,6 +742,13 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) goto err_vfio_res; } +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) { + RTE_LOG(ERR, EAL, "Error setting up notifier!\n"); + goto err_vfio_res; + } + +#endif TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); return 0; @@ -546,6 +774,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) struct pci_map *maps; dev->intr_handle.fd = -1; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.fd = -1; +#endif /* store PCI address string */ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, @@ -586,6 +817,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) /* we need save vfio_dev_fd, so it can be used during release */ dev->intr_handle.vfio_dev_fd = vfio_dev_fd; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd; +#endif return 0; err_vfio_dev_fd: @@ -658,6 +892,14 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, loc->domain, loc->bus, loc->devid, loc->function); +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + ret = pci_vfio_disable_notifier(dev); + if (ret) { + RTE_LOG(ERR, EAL, "fail to disable req notifier.\n"); + return -1; + } + +#endif if (close(dev->intr_handle.fd) < 0) { RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", pci_addr); |