From 8d01b9cd70a67cdafd5b965a70420c3bd7fb3f82 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 1 Nov 2018 11:59:50 +0000 Subject: New upstream version 18.11-rc1 Change-Id: Iaa71986dd6332e878d8f4bf493101b2bbc6313bb Signed-off-by: Luca Boccassi --- drivers/bus/pci/linux/pci_vfio.c | 268 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 255 insertions(+), 13 deletions(-) (limited to 'drivers/bus/pci/linux/pci_vfio.c') diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c index 686386d6..305cc060 100644 --- a/drivers/bus/pci/linux/pci_vfio.c +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include "eal_filesystem.h" @@ -35,7 +37,9 @@ #ifdef VFIO_PRESENT +#ifndef PAGE_SIZE #define PAGE_SIZE (sysconf(_SC_PAGESIZE)) +#endif #define PAGE_MASK (~(PAGE_SIZE - 1)) static struct rte_tailq_elem rte_vfio_tailq = { @@ -277,6 +281,114 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) return -1; } +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE +static void +pci_vfio_req_handler(void *param) +{ + struct rte_bus *bus; + int ret; + struct rte_device *device = (struct rte_device *)param; + + bus = rte_bus_find_by_device(device); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n", + device->name); + return; + } + + /* + * vfio kernel module request user space to release allocated + * resources before device be deleted in kernel, so it can directly + * call the vfio bus hot-unplug handler to process it. + */ + ret = bus->hot_unplug_handler(device); + if (ret) + RTE_LOG(ERR, EAL, + "Can not handle hot-unplug for device (%s)\n", + device->name); +} + +/* enable notifier (only enable req now) */ +static int +pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd) +{ + int ret; + int fd = -1; + + /* set up an eventfd for req notifier */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot set up eventfd, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + + dev->vfio_req_intr_handle.fd = fd; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_VFIO_REQ; + dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd; + + ret = rte_intr_callback_register(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret) { + RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n"); + goto error; + } + + ret = rte_intr_enable(&dev->vfio_req_intr_handle); + if (ret) { + RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n"); + ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret < 0) + RTE_LOG(ERR, EAL, + "Fail to unregister req notifier handler.\n"); + goto error; + } + + return 0; +error: + close(fd); + + dev->vfio_req_intr_handle.fd = -1; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + dev->vfio_req_intr_handle.vfio_dev_fd = -1; + + return -1; +} + +/* disable notifier (only disable req now) */ +static int +pci_vfio_disable_notifier(struct rte_pci_device *dev) +{ + int ret; + + ret = rte_intr_disable(&dev->vfio_req_intr_handle); + if (ret) { + RTE_LOG(ERR, EAL, "fail to disable req notifier.\n"); + return -1; + } + + ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret < 0) { + RTE_LOG(ERR, EAL, + "fail to unregister req notifier handler.\n"); + return -1; + } + + close(dev->vfio_req_intr_handle.fd); + + dev->vfio_req_intr_handle.fd = -1; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + dev->vfio_req_intr_handle.vfio_dev_fd = -1; + + return 0; +} +#endif + static int pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index) { @@ -415,6 +527,93 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, return 0; } +/* + * region info may contain capability headers, so we need to keep reallocating + * the memory until we match allocated memory size with argsz. + */ +static int +pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info, + int region) +{ + struct vfio_region_info *ri; + size_t argsz = sizeof(*ri); + int ret; + + ri = malloc(sizeof(*ri)); + if (ri == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n"); + return -1; + } +again: + memset(ri, 0, argsz); + ri->argsz = argsz; + ri->index = region; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri); + if (ret < 0) { + free(ri); + return ret; + } + if (ri->argsz != argsz) { + struct vfio_region_info *tmp; + + argsz = ri->argsz; + tmp = realloc(ri, argsz); + + if (tmp == NULL) { + /* realloc failed but the ri is still there */ + free(ri); + RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n"); + return -1; + } + ri = tmp; + goto again; + } + *info = ri; + + return 0; +} + +static struct vfio_info_cap_header * +pci_vfio_info_cap(struct vfio_region_info *info, int cap) +{ + struct vfio_info_cap_header *h; + size_t offset; + + if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) { + /* VFIO info does not advertise capabilities */ + return NULL; + } + + offset = VFIO_CAP_OFFSET(info); + while (offset != 0) { + h = RTE_PTR_ADD(info, offset); + if (h->id == cap) + return h; + offset = h->next; + } + return NULL; +} + +static int +pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) +{ + struct vfio_region_info *info; + int ret; + + ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region); + if (ret < 0) + return -1; + + ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL; + + /* cleanup */ + free(info); + + return ret; +} + + static int pci_vfio_map_resource_primary(struct rte_pci_device *dev) { @@ -430,6 +629,9 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) struct pci_map *maps; dev->intr_handle.fd = -1; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.fd = -1; +#endif /* store PCI address string */ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, @@ -464,56 +666,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); - goto err_vfio_dev_fd; + goto err_vfio_res; + } + /* if we found our MSI-X BAR region, check if we can mmap it */ + if (vfio_res->msix_table.bar_index != -1) { + int ret = pci_vfio_msix_is_mappable(vfio_dev_fd, + vfio_res->msix_table.bar_index); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n"); + goto err_vfio_res; + } else if (ret != 0) { + /* we can map it, so we don't care where it is */ + RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n"); + vfio_res->msix_table.bar_index = -1; + } } for (i = 0; i < (int) vfio_res->nb_maps; i++) { - struct vfio_region_info reg = { .argsz = sizeof(reg) }; + struct vfio_region_info *reg = NULL; void *bar_addr; - reg.index = i; - - ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); - if (ret) { + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); + if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get device region info " - "error %i (%s)\n", pci_addr, errno, strerror(errno)); + "error %i (%s)\n", pci_addr, errno, + strerror(errno)); goto err_vfio_res; } /* chk for io port region */ ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i); - if (ret < 0) + if (ret < 0) { + free(reg); goto err_vfio_res; - else if (ret) { + } else if (ret) { RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n", i); + free(reg); continue; } /* skip non-mmapable BARs */ - if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) + if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) { + free(reg); continue; + } /* try mapping somewhere close to the end of hugepages */ if (pci_map_addr == NULL) pci_map_addr = pci_find_max_end_va(); bar_addr = pci_map_addr; - pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size); maps[i].addr = bar_addr; - maps[i].offset = reg.offset; - maps[i].size = reg.size; + maps[i].offset = reg->offset; + maps[i].size = reg->size; maps[i].path = NULL; /* vfio doesn't have per-resource paths */ ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0); if (ret < 0) { RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, strerror(errno)); + free(reg); goto err_vfio_res; } dev->mem_resource[i].addr = maps[i].addr; + + free(reg); } if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) { @@ -521,6 +742,13 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) goto err_vfio_res; } +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) { + RTE_LOG(ERR, EAL, "Error setting up notifier!\n"); + goto err_vfio_res; + } + +#endif TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); return 0; @@ -546,6 +774,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) struct pci_map *maps; dev->intr_handle.fd = -1; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.fd = -1; +#endif /* store PCI address string */ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, @@ -586,6 +817,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev) /* we need save vfio_dev_fd, so it can be used during release */ dev->intr_handle.vfio_dev_fd = vfio_dev_fd; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd; +#endif return 0; err_vfio_dev_fd: @@ -658,6 +892,14 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, loc->domain, loc->bus, loc->devid, loc->function); +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + ret = pci_vfio_disable_notifier(dev); + if (ret) { + RTE_LOG(ERR, EAL, "fail to disable req notifier.\n"); + return -1; + } + +#endif if (close(dev->intr_handle.fd) < 0) { RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", pci_addr); -- cgit 1.2.3-korg