aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/bus/pci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/bus/pci')
-rw-r--r--drivers/bus/pci/Makefile5
-rw-r--r--drivers/bus/pci/bsd/pci.c6
-rw-r--r--drivers/bus/pci/linux/Makefile2
-rw-r--r--drivers/bus/pci/linux/pci.c46
-rw-r--r--drivers/bus/pci/linux/pci_vfio.c268
-rw-r--r--drivers/bus/pci/meson.build9
-rw-r--r--drivers/bus/pci/pci_common.c143
-rw-r--r--drivers/bus/pci/pci_common_uio.c33
-rw-r--r--drivers/bus/pci/pci_params.c78
-rw-r--r--drivers/bus/pci/private.h39
-rw-r--r--drivers/bus/pci/rte_bus_pci.h10
11 files changed, 574 insertions, 65 deletions
diff --git a/drivers/bus/pci/Makefile b/drivers/bus/pci/Makefile
index cf373068..f33e0120 100644
--- a/drivers/bus/pci/Makefile
+++ b/drivers/bus/pci/Makefile
@@ -4,7 +4,7 @@
include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_bus_pci.a
-LIBABIVER := 1
+LIBABIVER := 2
EXPORT_MAP := rte_bus_pci_version.map
CFLAGS := -I$(SRCDIR) $(CFLAGS)
@@ -26,10 +26,11 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_eal/$(SYSTEM)app/eal
CFLAGS += -DALLOW_EXPERIMENTAL_API
LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
-LDLIBS += -lrte_ethdev -lrte_pci
+LDLIBS += -lrte_ethdev -lrte_pci -lrte_kvargs
include $(RTE_SDK)/drivers/bus/pci/$(SYSTEM)/Makefile
SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) := $(addprefix $(SYSTEM)/,$(SRCS))
+SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_params.c
SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_common.c
SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_common_uio.c
diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c
index 655b34b7..d09f8ee5 100644
--- a/drivers/bus/pci/bsd/pci.c
+++ b/drivers/bus/pci/bsd/pci.c
@@ -223,6 +223,8 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
}
memset(dev, 0, sizeof(*dev));
+ dev->device.bus = &rte_pci_bus.bus;
+
dev->addr.domain = conf->pc_sel.pc_domain;
dev->addr.bus = conf->pc_sel.pc_bus;
dev->addr.devid = conf->pc_sel.pc_dev;
@@ -439,6 +441,8 @@ int rte_pci_read_config(const struct rte_pci_device *dev,
{
int fd = -1;
int size;
+ /* Copy Linux implementation's behaviour */
+ const int return_len = len;
struct pci_io pi = {
.pi_sel = {
.pc_domain = dev->addr.domain,
@@ -469,7 +473,7 @@ int rte_pci_read_config(const struct rte_pci_device *dev,
}
close(fd);
- return 0;
+ return return_len;
error:
if (fd >= 0)
diff --git a/drivers/bus/pci/linux/Makefile b/drivers/bus/pci/linux/Makefile
index 96ea1d54..90404468 100644
--- a/drivers/bus/pci/linux/Makefile
+++ b/drivers/bus/pci/linux/Makefile
@@ -4,5 +4,3 @@
SRCS += pci.c
SRCS += pci_uio.c
SRCS += pci_vfio.c
-
-CFLAGS += -D_GNU_SOURCE
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 04648ac9..45c24ef7 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -119,7 +119,7 @@ rte_pci_unmap_device(struct rte_pci_device *dev)
static int
find_max_end_va(const struct rte_memseg_list *msl, void *arg)
{
- size_t sz = msl->memseg_arr.len * msl->page_sz;
+ size_t sz = msl->len;
void *end_va = RTE_PTR_ADD(msl->base_va, sz);
void **max_va = arg;
@@ -228,6 +228,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
return -1;
memset(dev, 0, sizeof(*dev));
+ dev->device.bus = &rte_pci_bus.bus;
dev->addr = *addr;
/* get vendor id */
@@ -588,10 +589,8 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev)
fclose(fp);
mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1;
- if (mgaw < X86_VA_WIDTH)
- return false;
- return true;
+ return rte_eal_check_dma_mask(mgaw) == 0 ? true : false;
}
#elif defined(RTE_ARCH_PPC_64)
static bool
@@ -620,8 +619,11 @@ pci_devices_iommu_support_va(void)
FOREACH_DEVICE_ON_PCIBUS(dev) {
if (!rte_pci_match(drv, dev))
continue;
- if (!pci_one_device_iommu_support_va(dev))
- return false;
+ /*
+ * just one PCI device needs to be checked out because
+ * the IOMMU hardware is the same for all of them.
+ */
+ return pci_one_device_iommu_support_va(dev);
}
}
return true;
@@ -672,23 +674,21 @@ rte_pci_get_iommu_class(void)
int rte_pci_read_config(const struct rte_pci_device *device,
void *buf, size_t len, off_t offset)
{
+ char devname[RTE_DEV_NAME_MAX_LEN] = "";
const struct rte_intr_handle *intr_handle = &device->intr_handle;
- switch (intr_handle->type) {
- case RTE_INTR_HANDLE_UIO:
- case RTE_INTR_HANDLE_UIO_INTX:
+ switch (device->kdrv) {
+ case RTE_KDRV_IGB_UIO:
return pci_uio_read_config(intr_handle, buf, len, offset);
-
#ifdef VFIO_PRESENT
- case RTE_INTR_HANDLE_VFIO_MSIX:
- case RTE_INTR_HANDLE_VFIO_MSI:
- case RTE_INTR_HANDLE_VFIO_LEGACY:
+ case RTE_KDRV_VFIO:
return pci_vfio_read_config(intr_handle, buf, len, offset);
#endif
default:
+ rte_pci_device_name(&device->addr, devname,
+ RTE_DEV_NAME_MAX_LEN);
RTE_LOG(ERR, EAL,
- "Unknown handle type of fd %d\n",
- intr_handle->fd);
+ "Unknown driver type for %s\n", devname);
return -1;
}
}
@@ -697,23 +697,21 @@ int rte_pci_read_config(const struct rte_pci_device *device,
int rte_pci_write_config(const struct rte_pci_device *device,
const void *buf, size_t len, off_t offset)
{
+ char devname[RTE_DEV_NAME_MAX_LEN] = "";
const struct rte_intr_handle *intr_handle = &device->intr_handle;
- switch (intr_handle->type) {
- case RTE_INTR_HANDLE_UIO:
- case RTE_INTR_HANDLE_UIO_INTX:
+ switch (device->kdrv) {
+ case RTE_KDRV_IGB_UIO:
return pci_uio_write_config(intr_handle, buf, len, offset);
-
#ifdef VFIO_PRESENT
- case RTE_INTR_HANDLE_VFIO_MSIX:
- case RTE_INTR_HANDLE_VFIO_MSI:
- case RTE_INTR_HANDLE_VFIO_LEGACY:
+ case RTE_KDRV_VFIO:
return pci_vfio_write_config(intr_handle, buf, len, offset);
#endif
default:
+ rte_pci_device_name(&device->addr, devname,
+ RTE_DEV_NAME_MAX_LEN);
RTE_LOG(ERR, EAL,
- "Unknown handle type of fd %d\n",
- intr_handle->fd);
+ "Unknown driver type for %s\n", devname);
return -1;
}
}
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 686386d6..305cc060 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -17,6 +17,8 @@
#include <rte_eal_memconfig.h>
#include <rte_malloc.h>
#include <rte_vfio.h>
+#include <rte_eal.h>
+#include <rte_bus.h>
#include "eal_filesystem.h"
@@ -35,7 +37,9 @@
#ifdef VFIO_PRESENT
+#ifndef PAGE_SIZE
#define PAGE_SIZE (sysconf(_SC_PAGESIZE))
+#endif
#define PAGE_MASK (~(PAGE_SIZE - 1))
static struct rte_tailq_elem rte_vfio_tailq = {
@@ -277,6 +281,114 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
return -1;
}
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+static void
+pci_vfio_req_handler(void *param)
+{
+ struct rte_bus *bus;
+ int ret;
+ struct rte_device *device = (struct rte_device *)param;
+
+ bus = rte_bus_find_by_device(device);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
+ device->name);
+ return;
+ }
+
+ /*
+ * vfio kernel module request user space to release allocated
+ * resources before device be deleted in kernel, so it can directly
+ * call the vfio bus hot-unplug handler to process it.
+ */
+ ret = bus->hot_unplug_handler(device);
+ if (ret)
+ RTE_LOG(ERR, EAL,
+ "Can not handle hot-unplug for device (%s)\n",
+ device->name);
+}
+
+/* enable notifier (only enable req now) */
+static int
+pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+ int ret;
+ int fd = -1;
+
+ /* set up an eventfd for req notifier */
+ fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot set up eventfd, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ dev->vfio_req_intr_handle.fd = fd;
+ dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_VFIO_REQ;
+ dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd;
+
+ ret = rte_intr_callback_register(&dev->vfio_req_intr_handle,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n");
+ goto error;
+ }
+
+ ret = rte_intr_enable(&dev->vfio_req_intr_handle);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n");
+ ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret < 0)
+ RTE_LOG(ERR, EAL,
+ "Fail to unregister req notifier handler.\n");
+ goto error;
+ }
+
+ return 0;
+error:
+ close(fd);
+
+ dev->vfio_req_intr_handle.fd = -1;
+ dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ dev->vfio_req_intr_handle.vfio_dev_fd = -1;
+
+ return -1;
+}
+
+/* disable notifier (only disable req now) */
+static int
+pci_vfio_disable_notifier(struct rte_pci_device *dev)
+{
+ int ret;
+
+ ret = rte_intr_disable(&dev->vfio_req_intr_handle);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
+ return -1;
+ }
+
+ ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL,
+ "fail to unregister req notifier handler.\n");
+ return -1;
+ }
+
+ close(dev->vfio_req_intr_handle.fd);
+
+ dev->vfio_req_intr_handle.fd = -1;
+ dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ dev->vfio_req_intr_handle.vfio_dev_fd = -1;
+
+ return 0;
+}
+#endif
+
static int
pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
{
@@ -415,6 +527,93 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
return 0;
}
+/*
+ * region info may contain capability headers, so we need to keep reallocating
+ * the memory until we match allocated memory size with argsz.
+ */
+static int
+pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
+ int region)
+{
+ struct vfio_region_info *ri;
+ size_t argsz = sizeof(*ri);
+ int ret;
+
+ ri = malloc(sizeof(*ri));
+ if (ri == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
+ return -1;
+ }
+again:
+ memset(ri, 0, argsz);
+ ri->argsz = argsz;
+ ri->index = region;
+
+ ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri);
+ if (ret < 0) {
+ free(ri);
+ return ret;
+ }
+ if (ri->argsz != argsz) {
+ struct vfio_region_info *tmp;
+
+ argsz = ri->argsz;
+ tmp = realloc(ri, argsz);
+
+ if (tmp == NULL) {
+ /* realloc failed but the ri is still there */
+ free(ri);
+ RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
+ return -1;
+ }
+ ri = tmp;
+ goto again;
+ }
+ *info = ri;
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *
+pci_vfio_info_cap(struct vfio_region_info *info, int cap)
+{
+ struct vfio_info_cap_header *h;
+ size_t offset;
+
+ if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
+ /* VFIO info does not advertise capabilities */
+ return NULL;
+ }
+
+ offset = VFIO_CAP_OFFSET(info);
+ while (offset != 0) {
+ h = RTE_PTR_ADD(info, offset);
+ if (h->id == cap)
+ return h;
+ offset = h->next;
+ }
+ return NULL;
+}
+
+static int
+pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
+{
+ struct vfio_region_info *info;
+ int ret;
+
+ ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
+ if (ret < 0)
+ return -1;
+
+ ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
+
+ /* cleanup */
+ free(info);
+
+ return ret;
+}
+
+
static int
pci_vfio_map_resource_primary(struct rte_pci_device *dev)
{
@@ -430,6 +629,9 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
struct pci_map *maps;
dev->intr_handle.fd = -1;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ dev->vfio_req_intr_handle.fd = -1;
+#endif
/* store PCI address string */
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
@@ -464,56 +666,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
pci_addr);
- goto err_vfio_dev_fd;
+ goto err_vfio_res;
+ }
+ /* if we found our MSI-X BAR region, check if we can mmap it */
+ if (vfio_res->msix_table.bar_index != -1) {
+ int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
+ vfio_res->msix_table.bar_index);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
+ goto err_vfio_res;
+ } else if (ret != 0) {
+ /* we can map it, so we don't care where it is */
+ RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
+ vfio_res->msix_table.bar_index = -1;
+ }
}
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
- struct vfio_region_info reg = { .argsz = sizeof(reg) };
+ struct vfio_region_info *reg = NULL;
void *bar_addr;
- reg.index = i;
-
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
- if (ret) {
+ ret = pci_vfio_get_region_info(vfio_dev_fd, &reg, i);
+ if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get device region info "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ "error %i (%s)\n", pci_addr, errno,
+ strerror(errno));
goto err_vfio_res;
}
/* chk for io port region */
ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
- if (ret < 0)
+ if (ret < 0) {
+ free(reg);
goto err_vfio_res;
- else if (ret) {
+ } else if (ret) {
RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
i);
+ free(reg);
continue;
}
/* skip non-mmapable BARs */
- if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+ if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
+ free(reg);
continue;
+ }
/* try mapping somewhere close to the end of hugepages */
if (pci_map_addr == NULL)
pci_map_addr = pci_find_max_end_va();
bar_addr = pci_map_addr;
- pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+ pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
maps[i].addr = bar_addr;
- maps[i].offset = reg.offset;
- maps[i].size = reg.size;
+ maps[i].offset = reg->offset;
+ maps[i].size = reg->size;
maps[i].path = NULL; /* vfio doesn't have per-resource paths */
ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
pci_addr, i, strerror(errno));
+ free(reg);
goto err_vfio_res;
}
dev->mem_resource[i].addr = maps[i].addr;
+
+ free(reg);
}
if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
@@ -521,6 +742,13 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
goto err_vfio_res;
}
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) {
+ RTE_LOG(ERR, EAL, "Error setting up notifier!\n");
+ goto err_vfio_res;
+ }
+
+#endif
TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
return 0;
@@ -546,6 +774,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
struct pci_map *maps;
dev->intr_handle.fd = -1;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ dev->vfio_req_intr_handle.fd = -1;
+#endif
/* store PCI address string */
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
@@ -586,6 +817,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
/* we need save vfio_dev_fd, so it can be used during release */
dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd;
+#endif
return 0;
err_vfio_dev_fd:
@@ -658,6 +892,14 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ ret = pci_vfio_disable_notifier(dev);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
+ return -1;
+ }
+
+#endif
if (close(dev->intr_handle.fd) < 0) {
RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
pci_addr);
diff --git a/drivers/bus/pci/meson.build b/drivers/bus/pci/meson.build
index 72939e59..a3140ff9 100644
--- a/drivers/bus/pci/meson.build
+++ b/drivers/bus/pci/meson.build
@@ -1,15 +1,18 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2017 Intel Corporation
+version = 2
+
deps += ['pci']
install_headers('rte_bus_pci.h')
-sources = files('pci_common.c', 'pci_common_uio.c')
+sources = files('pci_common.c',
+ 'pci_common_uio.c',
+ 'pci_params.c')
if host_machine.system() == 'linux'
sources += files('linux/pci.c',
'linux/pci_uio.c',
'linux/pci_vfio.c')
includes += include_directories('linux')
- cflags += ['-D_GNU_SOURCE']
else
sources += files('bsd/pci.c')
includes += include_directories('bsd')
@@ -17,3 +20,5 @@ endif
# memseg walk is not part of stable API yet
allow_experimental_apis = true
+
+deps += ['kvargs']
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 7736b3f9..6276e5d6 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -6,6 +6,7 @@
#include <string.h>
#include <inttypes.h>
#include <stdint.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/queue.h>
@@ -23,12 +24,11 @@
#include <rte_string_fns.h>
#include <rte_common.h>
#include <rte_devargs.h>
+#include <rte_vfio.h>
#include "private.h"
-extern struct rte_pci_bus rte_pci_bus;
-
#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
const char *rte_pci_get_sysfs_path(void)
@@ -123,6 +123,7 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
struct rte_pci_device *dev)
{
int ret;
+ bool already_probed;
struct rte_pci_addr *loc;
if ((dr == NULL) || (dev == NULL))
@@ -153,6 +154,13 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
dev->device.numa_node = 0;
}
+ already_probed = rte_dev_is_probed(&dev->device);
+ if (already_probed && !(dr->drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) {
+ RTE_LOG(DEBUG, EAL, "Device %s is already probed\n",
+ dev->device.name);
+ return -EEXIST;
+ }
+
RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id,
dev->id.device_id, dr->driver.name);
@@ -161,24 +169,24 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
* This needs to be before rte_pci_map_device(), as it enables to use
* driver flags for adjusting configuration.
*/
- dev->driver = dr;
- dev->device.driver = &dr->driver;
+ if (!already_probed)
+ dev->driver = dr;
- if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
+ if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) {
/* map resources for devices that use igb_uio */
ret = rte_pci_map_device(dev);
if (ret != 0) {
dev->driver = NULL;
- dev->device.driver = NULL;
return ret;
}
}
/* call the driver probe() function */
ret = dr->probe(dr, dev);
+ if (already_probed)
+ return ret; /* no rollback if already succeeded earlier */
if (ret) {
dev->driver = NULL;
- dev->device.driver = NULL;
if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
/* Don't unmap if device is unsupported and
* driver needs mapped resources.
@@ -186,6 +194,8 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
!(ret > 0 &&
(dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES)))
rte_pci_unmap_device(dev);
+ } else {
+ dev->device.driver = &dr->driver;
}
return ret;
@@ -233,7 +243,7 @@ rte_pci_detach_dev(struct rte_pci_device *dev)
/*
* If vendor/device ID match, call the probe() function of all
- * registered driver for the given device. Return -1 if initialization
+ * registered driver for the given device. Return < 0 if initialization
* failed, return 1 if no driver is found for this device.
*/
static int
@@ -243,17 +253,13 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
int rc = 0;
if (dev == NULL)
- return -1;
-
- /* Check if a driver is already loaded */
- if (dev->driver != NULL)
- return 0;
+ return -EINVAL;
FOREACH_DRIVER_ON_PCIBUS(dr) {
rc = rte_pci_probe_one_driver(dr, dev);
if (rc < 0)
/* negative value is an error */
- return -1;
+ return rc;
if (rc > 0)
/* positive value means driver doesn't support it */
continue;
@@ -290,11 +296,14 @@ rte_pci_probe(void)
devargs->policy == RTE_DEV_WHITELISTED)
ret = pci_probe_all_drivers(dev);
if (ret < 0) {
- RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT
- " cannot be used\n", dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- rte_errno = errno;
- failed++;
+ if (ret != -EEXIST) {
+ RTE_LOG(ERR, EAL, "Requested device "
+ PCI_PRI_FMT " cannot be used\n",
+ dev->addr.domain, dev->addr.bus,
+ dev->addr.devid, dev->addr.function);
+ rte_errno = errno;
+ failed++;
+ }
ret = 0;
}
}
@@ -405,6 +414,98 @@ pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
return NULL;
}
+/*
+ * find the device which encounter the failure, by iterate over all device on
+ * PCI bus to check if the memory failure address is located in the range
+ * of the BARs of the device.
+ */
+static struct rte_pci_device *
+pci_find_device_by_addr(const void *failure_addr)
+{
+ struct rte_pci_device *pdev = NULL;
+ uint64_t check_point, start, end, len;
+ int i;
+
+ check_point = (uint64_t)(uintptr_t)failure_addr;
+
+ FOREACH_DEVICE_ON_PCIBUS(pdev) {
+ for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) {
+ start = (uint64_t)(uintptr_t)pdev->mem_resource[i].addr;
+ len = pdev->mem_resource[i].len;
+ end = start + len;
+ if (check_point >= start && check_point < end) {
+ RTE_LOG(DEBUG, EAL, "Failure address %16.16"
+ PRIx64" belongs to device %s!\n",
+ check_point, pdev->device.name);
+ return pdev;
+ }
+ }
+ }
+ return NULL;
+}
+
+static int
+pci_hot_unplug_handler(struct rte_device *dev)
+{
+ struct rte_pci_device *pdev = NULL;
+ int ret = 0;
+
+ pdev = RTE_DEV_TO_PCI(dev);
+ if (!pdev)
+ return -1;
+
+ switch (pdev->kdrv) {
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ case RTE_KDRV_VFIO:
+ /*
+ * vfio kernel module guaranty the pci device would not be
+ * deleted until the user space release the resource, so no
+ * need to remap BARs resource here, just directly notify
+ * the req event to the user space to handle it.
+ */
+ rte_dev_event_callback_process(dev->name,
+ RTE_DEV_EVENT_REMOVE);
+ break;
+#endif
+ case RTE_KDRV_IGB_UIO:
+ case RTE_KDRV_UIO_GENERIC:
+ case RTE_KDRV_NIC_UIO:
+ /* BARs resource is invalid, remap it to be safe. */
+ ret = pci_uio_remap_resource(pdev);
+ break;
+ default:
+ RTE_LOG(DEBUG, EAL,
+ "Not managed by a supported kernel driver, skipped\n");
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+static int
+pci_sigbus_handler(const void *failure_addr)
+{
+ struct rte_pci_device *pdev = NULL;
+ int ret = 0;
+
+ pdev = pci_find_device_by_addr(failure_addr);
+ if (!pdev) {
+ /* It is a generic sigbus error, no bus would handle it. */
+ ret = 1;
+ } else {
+ /* The sigbus error is caused of hot-unplug. */
+ ret = pci_hot_unplug_handler(&pdev->device);
+ if (ret) {
+ RTE_LOG(ERR, EAL,
+ "Failed to handle hot-unplug for device %s",
+ pdev->name);
+ ret = -1;
+ }
+ }
+ return ret;
+}
+
static int
pci_plug(struct rte_device *dev)
{
@@ -421,6 +522,7 @@ pci_unplug(struct rte_device *dev)
ret = rte_pci_detach_dev(pdev);
if (ret == 0) {
rte_pci_remove_device(pdev);
+ rte_devargs_remove(dev->devargs);
free(pdev);
}
return ret;
@@ -435,6 +537,9 @@ struct rte_pci_bus rte_pci_bus = {
.unplug = pci_unplug,
.parse = pci_parse,
.get_iommu_class = rte_pci_get_iommu_class,
+ .dev_iterate = rte_pci_dev_iterate,
+ .hot_unplug_handler = pci_hot_unplug_handler,
+ .sigbus_handler = pci_sigbus_handler,
},
.device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
.driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c
index 54bc20b5..7ea73dbc 100644
--- a/drivers/bus/pci/pci_common_uio.c
+++ b/drivers/bus/pci/pci_common_uio.c
@@ -146,6 +146,39 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res)
}
}
+/* remap the PCI resource of a PCI device in anonymous virtual memory */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev)
+{
+ int i;
+ void *map_address;
+
+ if (dev == NULL)
+ return -1;
+
+ /* Remap all BARs */
+ for (i = 0; i != PCI_MAX_RESOURCE; i++) {
+ /* skip empty BAR */
+ if (dev->mem_resource[i].phys_addr == 0)
+ continue;
+ map_address = mmap(dev->mem_resource[i].addr,
+ (size_t)dev->mem_resource[i].len,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ if (map_address == MAP_FAILED) {
+ RTE_LOG(ERR, EAL,
+ "Cannot remap resource for device %s\n",
+ dev->name);
+ return -1;
+ }
+ RTE_LOG(INFO, EAL,
+ "Successful remap resource for device %s\n",
+ dev->name);
+ }
+
+ return 0;
+}
+
static struct mapped_pci_resource *
pci_uio_find_resource(struct rte_pci_device *dev)
{
diff --git a/drivers/bus/pci/pci_params.c b/drivers/bus/pci/pci_params.c
new file mode 100644
index 00000000..3192e9c9
--- /dev/null
+++ b/drivers/bus/pci/pci_params.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaƫtan Rivet
+ */
+
+#include <rte_bus.h>
+#include <rte_bus_pci.h>
+#include <rte_dev.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_pci.h>
+
+#include "private.h"
+
+enum pci_params {
+ RTE_PCI_PARAM_ADDR,
+ RTE_PCI_PARAM_MAX,
+};
+
+static const char * const pci_params_keys[] = {
+ [RTE_PCI_PARAM_ADDR] = "addr",
+ [RTE_PCI_PARAM_MAX] = NULL,
+};
+
+static int
+pci_addr_kv_cmp(const char *key __rte_unused,
+ const char *value,
+ void *_addr2)
+{
+ struct rte_pci_addr _addr1;
+ struct rte_pci_addr *addr1 = &_addr1;
+ struct rte_pci_addr *addr2 = _addr2;
+
+ if (rte_pci_addr_parse(value, addr1))
+ return -1;
+ return -abs(rte_pci_addr_cmp(addr1, addr2));
+}
+
+static int
+pci_dev_match(const struct rte_device *dev,
+ const void *_kvlist)
+{
+ const struct rte_kvargs *kvlist = _kvlist;
+ const struct rte_pci_device *pdev;
+
+ if (kvlist == NULL)
+ /* Empty string matches everything. */
+ return 0;
+ pdev = RTE_DEV_TO_PCI_CONST(dev);
+ /* if any field does not match. */
+ if (rte_kvargs_process(kvlist, pci_params_keys[RTE_PCI_PARAM_ADDR],
+ &pci_addr_kv_cmp,
+ (void *)(intptr_t)&pdev->addr))
+ return 1;
+ return 0;
+}
+
+void *
+rte_pci_dev_iterate(const void *start,
+ const char *str,
+ const struct rte_dev_iterator *it __rte_unused)
+{
+ rte_bus_find_device_t find_device;
+ struct rte_kvargs *kvargs = NULL;
+ struct rte_device *dev;
+
+ if (str != NULL) {
+ kvargs = rte_kvargs_parse(str, pci_params_keys);
+ if (kvargs == NULL) {
+ RTE_LOG(ERR, EAL, "cannot parse argument list\n");
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ }
+ find_device = rte_pci_bus.bus.find_device;
+ dev = find_device(start, pci_dev_match, kvargs);
+ rte_kvargs_free(kvargs);
+ return dev;
+}
diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
index 8ddd03e1..13c3324b 100644
--- a/drivers/bus/pci/private.h
+++ b/drivers/bus/pci/private.h
@@ -10,9 +10,13 @@
#include <rte_pci.h>
#include <rte_bus_pci.h>
+extern struct rte_pci_bus rte_pci_bus;
+
struct rte_pci_driver;
struct rte_pci_device;
+extern struct rte_pci_bus rte_pci_bus;
+
/**
* Probe the PCI bus
*
@@ -123,6 +127,18 @@ void pci_uio_free_resource(struct rte_pci_device *dev,
struct mapped_pci_resource *uio_res);
/**
+ * Remap the PCI resource of a PCI device in anonymous virtual memory.
+ *
+ * @param dev
+ * Point to the struct rte pci device.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev);
+
+/**
* Map device memory to uio resource
*
* This function is private to EAL.
@@ -166,4 +182,27 @@ rte_pci_match(const struct rte_pci_driver *pci_drv,
enum rte_iova_mode
rte_pci_get_iommu_class(void);
+/*
+ * Iterate over internal devices,
+ * matching any device against the provided
+ * string.
+ *
+ * @param start
+ * Iteration starting point.
+ *
+ * @param str
+ * Device string to match against.
+ *
+ * @param it
+ * (unused) iterator structure.
+ *
+ * @return
+ * A pointer to the next matching device if any.
+ * NULL otherwise.
+ */
+void *
+rte_pci_dev_iterate(const void *start,
+ const char *str,
+ const struct rte_dev_iterator *it);
+
#endif /* _PCI_PRIVATE_H_ */
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 0d1955ff..f0d6d81c 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -62,10 +62,12 @@ struct rte_pci_device {
struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
/**< PCI Memory Resource */
struct rte_intr_handle intr_handle; /**< Interrupt handle */
- struct rte_pci_driver *driver; /**< Associated driver */
+ struct rte_pci_driver *driver; /**< PCI driver used in probing */
uint16_t max_vfs; /**< sriov enable if not zero */
enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */
char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */
+ struct rte_intr_handle vfio_req_intr_handle;
+ /**< Handler of VFIO request interrupt */
};
/**
@@ -121,7 +123,7 @@ struct rte_pci_driver {
pci_probe_t *probe; /**< Device Probe function. */
pci_remove_t *remove; /**< Device Remove function. */
const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
- uint32_t drv_flags; /**< Flags contolling handling of device. */
+ uint32_t drv_flags; /**< Flags RTE_PCI_DRV_*. */
};
/**
@@ -137,6 +139,8 @@ struct rte_pci_bus {
#define RTE_PCI_DRV_NEED_MAPPING 0x0001
/** Device needs PCI BAR mapping with enabled write combining (wc) */
#define RTE_PCI_DRV_WC_ACTIVATE 0x0002
+/** Device already probed can be probed again to check for new ports. */
+#define RTE_PCI_DRV_PROBE_AGAIN 0x0004
/** Device driver supports link state interrupt */
#define RTE_PCI_DRV_INTR_LSC 0x0008
/** Device driver supports device removal interrupt */
@@ -219,6 +223,8 @@ void rte_pci_unregister(struct rte_pci_driver *driver);
* The length of the data buffer.
* @param offset
* The offset into PCI config space
+ * @return
+ * Number of bytes read on success, negative on error.
*/
int rte_pci_read_config(const struct rte_pci_device *device,
void *buf, size_t len, off_t offset);