/*- * BSD LICENSE * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "rte_pci_dev_ids.h" #include "eal_filesystem.h" #include "eal_private.h" /** * @file * PCI probing under linux * * This code is used to simulate a PCI probe by parsing information in * sysfs. Moreover, when a registered driver matches a device, the * kernel driver currently using it is unloaded and replaced by * igb_uio module, which is a very minimal userland driver for Intel * network card, only providing access to PCI BAR to applications, and * enabling bus master. */ struct uio_map { void *addr; uint64_t offset; uint64_t size; uint64_t phaddr; }; /* * For multi-process we need to reproduce all PCI mappings in secondary * processes, so save them in a tailq. */ struct uio_resource { TAILQ_ENTRY(uio_resource) next; struct rte_pci_addr pci_addr; char path[PATH_MAX]; size_t nb_maps; struct uio_map maps[PCI_MAX_RESOURCE]; }; TAILQ_HEAD(uio_res_list, uio_resource); static struct uio_res_list *uio_res_list = NULL; /* unbind kernel driver for this device */ static int pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused) { RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented " "for BSD\n"); return -ENOTSUP; } /* map a particular resource from a file */ static void * pci_map_resource(void *requested_addr, const char *devname, off_t offset, size_t size) { int fd; void *mapaddr; /* * open devname, to mmap it */ fd = open(devname, O_RDWR); if (fd < 0) { RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", devname, strerror(errno)); goto fail; } /* Map the PCI memory resource of device */ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); close(fd); if (mapaddr == MAP_FAILED || (requested_addr != NULL && mapaddr != requested_addr)) { RTE_LOG(ERR, EAL, "%s(): cannot mmap(%s(%d), %p, 0x%lx, 0x%lx):" " %s (%p)\n", __func__, devname, fd, requested_addr, (unsigned long)size, (unsigned long)offset, strerror(errno), mapaddr); goto fail; } RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); return mapaddr; fail: return NULL; } static int pci_uio_map_secondary(struct rte_pci_device *dev) { size_t i; struct uio_resource *uio_res; TAILQ_FOREACH(uio_res, uio_res_list, next) { /* skip this element if it doesn't match our PCI address */ if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr))) continue; for (i = 0; i != uio_res->nb_maps; i++) { if (pci_map_resource(uio_res->maps[i].addr, uio_res->path, (off_t)uio_res->maps[i].offset, (size_t)uio_res->maps[i].size) != uio_res->maps[i].addr) { RTE_LOG(ERR, EAL, "Cannot mmap device resource\n"); return (-1); } } return (0); } RTE_LOG(ERR, EAL, "Cannot find resource for device\n"); return 1; } /* map the PCI resource of a PCI device in virtual memory */ static int pci_uio_map_resource(struct rte_pci_device *dev) { int i, j; char devname[PATH_MAX]; /* contains the /dev/uioX */ void *mapaddr; uint64_t phaddr; uint64_t offset; uint64_t pagesz; struct rte_pci_addr *loc = &dev->addr; struct uio_resource *uio_res; struct uio_map *maps; dev->intr_handle.fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; /* secondary processes - use already recorded details */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) return (pci_uio_map_secondary(dev)); snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u", dev->addr.bus, dev->addr.devid, dev->addr.function); if (access(devname, O_RDWR) < 0) { RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); return 1; } /* save fd if in primary process */ dev->intr_handle.fd = open(devname, O_RDWR); if (dev->intr_handle.fd < 0) { RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", devname, strerror(errno)); return -1; } dev->intr_handle.type = RTE_INTR_HANDLE_UIO; /* allocate the mapping details for secondary processes*/ if ((uio_res = rte_zmalloc("UIO_RES", sizeof (*uio_res), 0)) == NULL) { RTE_LOG(ERR, EAL, "%s(): cannot store uio mmap details\n", __func__); return (-1); } snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname); memcpy(&uio_res->pci_addr, &dev->addr, sizeof(uio_res->pci_addr)); /* Map all BARs */ pagesz = sysconf(_SC_PAGESIZE); maps = uio_res->maps; for (i = uio_res->nb_maps = 0; i != PCI_MAX_RESOURCE; i++) { j = uio_res->nb_maps; /* skip empty BAR */ if ((phaddr = dev->mem_resource[i].phys_addr) == 0) continue; /* if matching map is found, then use it */ offset = i * pagesz; maps[j].offset = offset; maps[j].phaddr = dev->mem_resource[i].phys_addr; maps[j].size = dev->mem_resource[i].len; if (maps[j].addr != NULL || (mapaddr = pci_map_resource(NULL, devname, (off_t)offset, (size_t)maps[j].size) ) == NULL) { rte_free(uio_res); return (-1); } maps[j].addr = mapaddr; uio_res->nb_maps++; dev->mem_resource[i].addr = mapaddr; } TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); return (0); } /* Compare two PCI device addresses. */ static int pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr *addr2) { uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr->function; uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) + addr2->function; if (dev_addr > dev_addr2) return 1; else return 0; } /* Scan one pci sysfs entry, and fill the devices list from it. */ static int pci_scan_one(int dev_pci_fd, struct pci_conf *conf) { struct rte_pci_device *dev; struct pci_bar_io bar; unsigned i, max; dev = malloc(sizeof(*dev)); if (dev == NULL) { return -1; } memset(dev, 0, sizeof(*dev)); dev->addr.domain = conf->pc_sel.pc_domain; dev->addr.bus = conf->pc_sel.pc_bus; dev->addr.devid = conf->pc_sel.pc_dev; dev->addr.function = conf->pc_sel.pc_func; /* get vendor id */ dev->id.vendor_id = conf->pc_vendor; /* get device id */ dev->id.device_id = conf->pc_device; /* get subsystem_vendor id */ dev->id.subsystem_vendor_id = conf->pc_subvendor; /* get subsystem_device id */ dev->id.subsystem_device_id = conf->pc_subdevice; /* TODO: get max_vfs */ dev->max_vfs = 0; /* FreeBSD has no NUMA support (yet) */ dev->numa_node = 0; /* parse resources */ switch (conf->pc_hdr & PCIM_HDRTYPE) { case PCIM_HDRTYPE_NORMAL: max = PCIR_MAX_BAR_0; break; case PCIM_HDRTYPE_BRIDGE: max = PCIR_MAX_BAR_1; break; case PCIM_HDRTYPE_CARDBUS: max = PCIR_MAX_BAR_2; break; default: goto skipdev; } for (i = 0; i <= max; i++) { bar.pbi_sel = conf->pc_sel; bar.pbi_reg = PCIR_BAR(i); if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0) continue; dev->mem_resource[i].len = bar.pbi_length; if (PCI_BAR_IO(bar.pbi_base)) { dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf)); continue; } dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf); } /* device is valid, add in list (sorted) */ if (TAILQ_EMPTY(&pci_device_list)) { TAILQ_INSERT_TAIL(&pci_device_list, dev, next); } else { struct rte_pci_device *dev2 = NULL; TAILQ_FOREACH(dev2, &pci_device_list, next) { if (pci_addr_comparison(&dev->addr, &dev2->addr)) continue; else { TAILQ_INSERT_BEFORE(dev2, dev, next); return 0; } } TAILQ_INSERT_TAIL(&pci_device_list, dev, next); } return 0; skipdev: free(dev); return 0; } /* * Scan the content of the PCI bus, and add the devices in the devices * list. Call pci_scan_one() for each pci entry found. */ static int pci_scan(void) { int fd = -1; unsigned dev_count = 0; struct pci_conf matches[16]; struct pci_conf_io conf_io = { .pat_buf_len = 0, .num_patterns = 0, .patterns = NULL, .match_buf_len = sizeof(matches), .matches = &matches[0], }; fd = open("/dev/pci", O_RDONLY); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); goto error; } do { unsigned i; if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", __func__, strerror(errno)); goto error; } for (i = 0; i < conf_io.num_matches; i++) if (pci_scan_one(fd, &matches[i]) < 0) goto error; dev_count += conf_io.num_matches; } while(conf_io.status == PCI_GETCONF_MORE_DEVS); close(fd); RTE_LOG(ERR, EAL, "PCI scan found %u devices\n", dev_count); return 0; error: if (fd >= 0) close(fd); return -1; } /* * If vendor/device ID match, call the devinit() function of the * driver. */ int rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev) { struct rte_pci_id *id_table; int ret; for (id_table = dr->id_table ; id_table->vendor_id != 0; id_table++) { /* check if device's identifiers match the driver's ones */ if (id_table->vendor_id != dev->id.vendor_id && id_table->vendor_id != PCI_ANY_ID) continue; if (id_table->device_id != dev->id.device_id && id_table->device_id != PCI_ANY_ID) continue; if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && id_table->subsystem_vendor_id != PCI_ANY_ID) continue; if (id_table->subsystem_device_id != dev->id.subsystem_device_id && id_table->subsystem_device_id != PCI_ANY_ID) continue; struct rte_pci_addr *loc = &dev->addr; RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", loc->domain, loc->bus, loc->devid, loc->function, dev->numa_node); RTE_LOG(DEBUG, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, dev->id.device_id, dr->name); /* no initialization when blacklisted, return without error */ if (dev->devargs != NULL && dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) { RTE_LOG(DEBUG, EAL, " Device is blacklisted, not initializing\n"); return 0; } if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { /* map resources for devices that use igb_uio */ ret = pci_uio_map_resource(dev); if (ret != 0) return ret; } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND && rte_eal_process_type() == RTE_PROC_PRIMARY) { /* unbind current driver */ if (pci_unbind_kernel_driver(dev) < 0) return -1; } /* reference driver structure */ dev->driver = dr; /* call the driver devinit() function */ return dr->devinit(dr, dev); } /* return positive value if driver is not found */ return 1; } /* Init the PCI EAL subsystem */ int rte_eal_pci_init(void) { TAILQ_INIT(&pci_driver_list); TAILQ_INIT(&pci_device_list); uio_res_list = RTE_TAILQ_RESERVE_BY_IDX(RTE_TAILQ_PCI, uio_res_list); /* for debug purposes, PCI can be disabled */ if (internal_config.no_pci) return 0; if (pci_scan() < 0) { RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); return -1; } return 0; }