aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/bus/pci/linux/pci_vfio.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/bus/pci/linux/pci_vfio.c')
-rw-r--r--drivers/bus/pci/linux/pci_vfio.c268
1 files changed, 255 insertions, 13 deletions
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 686386d6..305cc060 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -17,6 +17,8 @@
#include <rte_eal_memconfig.h>
#include <rte_malloc.h>
#include <rte_vfio.h>
+#include <rte_eal.h>
+#include <rte_bus.h>
#include "eal_filesystem.h"
@@ -35,7 +37,9 @@
#ifdef VFIO_PRESENT
+#ifndef PAGE_SIZE
#define PAGE_SIZE (sysconf(_SC_PAGESIZE))
+#endif
#define PAGE_MASK (~(PAGE_SIZE - 1))
static struct rte_tailq_elem rte_vfio_tailq = {
@@ -277,6 +281,114 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
return -1;
}
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+static void
+pci_vfio_req_handler(void *param)
+{
+ struct rte_bus *bus;
+ int ret;
+ struct rte_device *device = (struct rte_device *)param;
+
+ bus = rte_bus_find_by_device(device);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
+ device->name);
+ return;
+ }
+
+ /*
+ * vfio kernel module request user space to release allocated
+ * resources before device be deleted in kernel, so it can directly
+ * call the vfio bus hot-unplug handler to process it.
+ */
+ ret = bus->hot_unplug_handler(device);
+ if (ret)
+ RTE_LOG(ERR, EAL,
+ "Can not handle hot-unplug for device (%s)\n",
+ device->name);
+}
+
+/* enable notifier (only enable req now) */
+static int
+pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+ int ret;
+ int fd = -1;
+
+ /* set up an eventfd for req notifier */
+ fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot set up eventfd, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ dev->vfio_req_intr_handle.fd = fd;
+ dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_VFIO_REQ;
+ dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd;
+
+ ret = rte_intr_callback_register(&dev->vfio_req_intr_handle,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n");
+ goto error;
+ }
+
+ ret = rte_intr_enable(&dev->vfio_req_intr_handle);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n");
+ ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret < 0)
+ RTE_LOG(ERR, EAL,
+ "Fail to unregister req notifier handler.\n");
+ goto error;
+ }
+
+ return 0;
+error:
+ close(fd);
+
+ dev->vfio_req_intr_handle.fd = -1;
+ dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ dev->vfio_req_intr_handle.vfio_dev_fd = -1;
+
+ return -1;
+}
+
+/* disable notifier (only disable req now) */
+static int
+pci_vfio_disable_notifier(struct rte_pci_device *dev)
+{
+ int ret;
+
+ ret = rte_intr_disable(&dev->vfio_req_intr_handle);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
+ return -1;
+ }
+
+ ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL,
+ "fail to unregister req notifier handler.\n");
+ return -1;
+ }
+
+ close(dev->vfio_req_intr_handle.fd);
+
+ dev->vfio_req_intr_handle.fd = -1;
+ dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ dev->vfio_req_intr_handle.vfio_dev_fd = -1;
+
+ return 0;
+}
+#endif
+
static int
pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
{
@@ -415,6 +527,93 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
return 0;
}
+/*
+ * region info may contain capability headers, so we need to keep reallocating
+ * the memory until we match allocated memory size with argsz.
+ */
+static int
+pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
+ int region)
+{
+ struct vfio_region_info *ri;
+ size_t argsz = sizeof(*ri);
+ int ret;
+
+ ri = malloc(sizeof(*ri));
+ if (ri == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
+ return -1;
+ }
+again:
+ memset(ri, 0, argsz);
+ ri->argsz = argsz;
+ ri->index = region;
+
+ ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri);
+ if (ret < 0) {
+ free(ri);
+ return ret;
+ }
+ if (ri->argsz != argsz) {
+ struct vfio_region_info *tmp;
+
+ argsz = ri->argsz;
+ tmp = realloc(ri, argsz);
+
+ if (tmp == NULL) {
+ /* realloc failed but the ri is still there */
+ free(ri);
+ RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
+ return -1;
+ }
+ ri = tmp;
+ goto again;
+ }
+ *info = ri;
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *
+pci_vfio_info_cap(struct vfio_region_info *info, int cap)
+{
+ struct vfio_info_cap_header *h;
+ size_t offset;
+
+ if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
+ /* VFIO info does not advertise capabilities */
+ return NULL;
+ }
+
+ offset = VFIO_CAP_OFFSET(info);
+ while (offset != 0) {
+ h = RTE_PTR_ADD(info, offset);
+ if (h->id == cap)
+ return h;
+ offset = h->next;
+ }
+ return NULL;
+}
+
+static int
+pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
+{
+ struct vfio_region_info *info;
+ int ret;
+
+ ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
+ if (ret < 0)
+ return -1;
+
+ ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
+
+ /* cleanup */
+ free(info);
+
+ return ret;
+}
+
+
static int
pci_vfio_map_resource_primary(struct rte_pci_device *dev)
{
@@ -430,6 +629,9 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
struct pci_map *maps;
dev->intr_handle.fd = -1;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ dev->vfio_req_intr_handle.fd = -1;
+#endif
/* store PCI address string */
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
@@ -464,56 +666,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
pci_addr);
- goto err_vfio_dev_fd;
+ goto err_vfio_res;
+ }
+ /* if we found our MSI-X BAR region, check if we can mmap it */
+ if (vfio_res->msix_table.bar_index != -1) {
+ int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
+ vfio_res->msix_table.bar_index);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
+ goto err_vfio_res;
+ } else if (ret != 0) {
+ /* we can map it, so we don't care where it is */
+ RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
+ vfio_res->msix_table.bar_index = -1;
+ }
}
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
- struct vfio_region_info reg = { .argsz = sizeof(reg) };
+ struct vfio_region_info *reg = NULL;
void *bar_addr;
- reg.index = i;
-
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
- if (ret) {
+ ret = pci_vfio_get_region_info(vfio_dev_fd, &reg, i);
+ if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get device region info "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ "error %i (%s)\n", pci_addr, errno,
+ strerror(errno));
goto err_vfio_res;
}
/* chk for io port region */
ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
- if (ret < 0)
+ if (ret < 0) {
+ free(reg);
goto err_vfio_res;
- else if (ret) {
+ } else if (ret) {
RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
i);
+ free(reg);
continue;
}
/* skip non-mmapable BARs */
- if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+ if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
+ free(reg);
continue;
+ }
/* try mapping somewhere close to the end of hugepages */
if (pci_map_addr == NULL)
pci_map_addr = pci_find_max_end_va();
bar_addr = pci_map_addr;
- pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+ pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
maps[i].addr = bar_addr;
- maps[i].offset = reg.offset;
- maps[i].size = reg.size;
+ maps[i].offset = reg->offset;
+ maps[i].size = reg->size;
maps[i].path = NULL; /* vfio doesn't have per-resource paths */
ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
pci_addr, i, strerror(errno));
+ free(reg);
goto err_vfio_res;
}
dev->mem_resource[i].addr = maps[i].addr;
+
+ free(reg);
}
if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
@@ -521,6 +742,13 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
goto err_vfio_res;
}
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) {
+ RTE_LOG(ERR, EAL, "Error setting up notifier!\n");
+ goto err_vfio_res;
+ }
+
+#endif
TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
return 0;
@@ -546,6 +774,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
struct pci_map *maps;
dev->intr_handle.fd = -1;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ dev->vfio_req_intr_handle.fd = -1;
+#endif
/* store PCI address string */
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
@@ -586,6 +817,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
/* we need save vfio_dev_fd, so it can be used during release */
dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd;
+#endif
return 0;
err_vfio_dev_fd:
@@ -658,6 +892,14 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ ret = pci_vfio_disable_notifier(dev);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
+ return -1;
+ }
+
+#endif
if (close(dev->intr_handle.fd) < 0) {
RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
pci_addr);