diff options
Diffstat (limited to 'drivers/bus/pci')
-rw-r--r-- | drivers/bus/pci/Makefile | 62 | ||||
-rw-r--r-- | drivers/bus/pci/bsd/Makefile | 32 | ||||
-rw-r--r-- | drivers/bus/pci/bsd/pci.c | 680 | ||||
-rw-r--r-- | drivers/bus/pci/linux/Makefile | 36 | ||||
-rw-r--r-- | drivers/bus/pci/linux/pci.c | 856 | ||||
-rw-r--r-- | drivers/bus/pci/linux/pci_init.h | 111 | ||||
-rw-r--r-- | drivers/bus/pci/linux/pci_uio.c | 568 | ||||
-rw-r--r-- | drivers/bus/pci/linux/pci_vfio.c | 763 | ||||
-rw-r--r-- | drivers/bus/pci/pci_common.c | 540 | ||||
-rw-r--r-- | drivers/bus/pci/pci_common_uio.c | 235 | ||||
-rw-r--r-- | drivers/bus/pci/private.h | 248 | ||||
-rw-r--r-- | drivers/bus/pci/rte_bus_pci.h | 340 | ||||
-rw-r--r-- | drivers/bus/pci/rte_bus_pci_version.map | 18 |
13 files changed, 4489 insertions, 0 deletions
diff --git a/drivers/bus/pci/Makefile b/drivers/bus/pci/Makefile new file mode 100644 index 00000000..f3df1c4c --- /dev/null +++ b/drivers/bus/pci/Makefile @@ -0,0 +1,62 @@ +# BSD LICENSE +# +# Copyright(c) 2017 6WIND S.A. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of 6WIND nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +LIB = librte_bus_pci.a +LIBABIVER := 1 +EXPORT_MAP := rte_bus_pci_version.map + +CFLAGS := -I$(SRCDIR) $(CFLAGS) +CFLAGS += -O3 $(WERROR_FLAGS) + +ifneq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),) +SYSTEM := linux +endif +ifneq ($(CONFIG_RTE_EXEC_ENV_BSDAPP),) +SYSTEM := bsd +endif + +CFLAGS += -I$(RTE_SDK)/drivers/bus/pci/$(SYSTEM) +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/$(SYSTEM)app/eal + +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_ethdev -lrte_pci + +include $(RTE_SDK)/drivers/bus/pci/$(SYSTEM)/Makefile +SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) := $(addprefix $(SYSTEM)/,$(SRCS)) +SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_common.c +SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_common_uio.c + +SYMLINK-$(CONFIG_RTE_LIBRTE_PCI_BUS)-include += rte_bus_pci.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/bus/pci/bsd/Makefile b/drivers/bus/pci/bsd/Makefile new file mode 100644 index 00000000..4450913e --- /dev/null +++ b/drivers/bus/pci/bsd/Makefile @@ -0,0 +1,32 @@ +# BSD LICENSE +# +# Copyright(c) 2017 6WIND S.A. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of 6WIND nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +SRCS += pci.c diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c new file mode 100644 index 00000000..facc4b12 --- /dev/null +++ b/drivers/bus/pci/bsd/pci.c @@ -0,0 +1,680 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <unistd.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <dirent.h> +#include <limits.h> +#include <sys/queue.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/pciio.h> +#include <dev/pci/pcireg.h> + +#if defined(RTE_ARCH_X86) +#include <machine/cpufunc.h> +#endif + +#include <rte_interrupts.h> +#include <rte_log.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_common.h> +#include <rte_launch.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_eal_memconfig.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_malloc.h> +#include <rte_string_fns.h> +#include <rte_debug.h> +#include <rte_devargs.h> + +#include "eal_filesystem.h" +#include "private.h" + +/** + * @file + * PCI probing under linux + * + * This code is used to simulate a PCI probe by parsing information in + * sysfs. Moreover, when a registered driver matches a device, the + * kernel driver currently using it is unloaded and replaced by + * igb_uio module, which is a very minimal userland driver for Intel + * network card, only providing access to PCI BAR to applications, and + * enabling bus master. + */ + +extern struct rte_pci_bus rte_pci_bus; + +/* Map pci device */ +int +rte_pci_map_device(struct rte_pci_device *dev) +{ + int ret = -1; + + /* try mapping the NIC resources */ + switch (dev->kdrv) { + case RTE_KDRV_NIC_UIO: + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + +/* Unmap pci device */ +void +rte_pci_unmap_device(struct rte_pci_device *dev) +{ + /* try unmapping the NIC resources */ + switch (dev->kdrv) { + case RTE_KDRV_NIC_UIO: + /* unmap resources for devices that use uio */ + pci_uio_unmap_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + break; + } +} + +void +pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.fd) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res) +{ + char devname[PATH_MAX]; /* contains the /dev/uioX */ + struct rte_pci_addr *loc; + + loc = &dev->addr; + + snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u", + dev->addr.bus, dev->addr.devid, dev->addr.function); + + if (access(devname, O_RDWR) < 0) { + RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " + "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); + return 1; + } + + /* save fd if in primary process */ + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + goto error; + } + + snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); + memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); + + return 0; + +error: + pci_uio_free_resource(dev, *uio_res); + return -1; +} + +int +pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx) +{ + int fd; + char *devname; + void *mapaddr; + uint64_t offset; + uint64_t pagesz; + struct pci_map *maps; + + maps = uio_res->maps; + devname = uio_res->path; + pagesz = sysconf(_SC_PAGESIZE); + + /* allocate memory to keep path */ + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); + if (maps[map_idx].path == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", + strerror(errno)); + return -1; + } + + /* + * open resource file, to mmap it + */ + fd = open(devname, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + /* if matching map is found, then use it */ + offset = res_idx * pagesz; + mapaddr = pci_map_resource(NULL, fd, (off_t)offset, + (size_t)dev->mem_resource[res_idx].len, 0); + close(fd); + if (mapaddr == MAP_FAILED) + goto error; + + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; + maps[map_idx].size = dev->mem_resource[res_idx].len; + maps[map_idx].addr = mapaddr; + maps[map_idx].offset = offset; + strcpy(maps[map_idx].path, devname); + dev->mem_resource[res_idx].addr = mapaddr; + + return 0; + +error: + rte_free(maps[map_idx].path); + return -1; +} + +static int +pci_scan_one(int dev_pci_fd, struct pci_conf *conf) +{ + struct rte_pci_device *dev; + struct pci_bar_io bar; + unsigned i, max; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) { + return -1; + } + + memset(dev, 0, sizeof(*dev)); + dev->addr.domain = conf->pc_sel.pc_domain; + dev->addr.bus = conf->pc_sel.pc_bus; + dev->addr.devid = conf->pc_sel.pc_dev; + dev->addr.function = conf->pc_sel.pc_func; + + /* get vendor id */ + dev->id.vendor_id = conf->pc_vendor; + + /* get device id */ + dev->id.device_id = conf->pc_device; + + /* get subsystem_vendor id */ + dev->id.subsystem_vendor_id = conf->pc_subvendor; + + /* get subsystem_device id */ + dev->id.subsystem_device_id = conf->pc_subdevice; + + /* get class id */ + dev->id.class_id = (conf->pc_class << 16) | + (conf->pc_subclass << 8) | + (conf->pc_progif); + + /* TODO: get max_vfs */ + dev->max_vfs = 0; + + /* FreeBSD has no NUMA support (yet) */ + dev->device.numa_node = 0; + + pci_name_set(dev); + + /* FreeBSD has only one pass through driver */ + dev->kdrv = RTE_KDRV_NIC_UIO; + + /* parse resources */ + switch (conf->pc_hdr & PCIM_HDRTYPE) { + case PCIM_HDRTYPE_NORMAL: + max = PCIR_MAX_BAR_0; + break; + case PCIM_HDRTYPE_BRIDGE: + max = PCIR_MAX_BAR_1; + break; + case PCIM_HDRTYPE_CARDBUS: + max = PCIR_MAX_BAR_2; + break; + default: + goto skipdev; + } + + for (i = 0; i <= max; i++) { + bar.pbi_sel = conf->pc_sel; + bar.pbi_reg = PCIR_BAR(i); + if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0) + continue; + + dev->mem_resource[i].len = bar.pbi_length; + if (PCI_BAR_IO(bar.pbi_base)) { + dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf)); + continue; + } + dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf); + } + + /* device is valid, add in list (sorted) */ + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); + } + else { + struct rte_pci_device *dev2 = NULL; + int ret; + + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { + ret = rte_pci_addr_cmp(&dev->addr, &dev2->addr); + if (ret > 0) + continue; + else if (ret < 0) { + rte_pci_insert_device(dev2, dev); + } else { /* already registered */ + dev2->kdrv = dev->kdrv; + dev2->max_vfs = dev->max_vfs; + pci_name_set(dev2); + memmove(dev2->mem_resource, + dev->mem_resource, + sizeof(dev->mem_resource)); + free(dev); + } + return 0; + } + rte_pci_add_device(dev); + } + + return 0; + +skipdev: + free(dev); + return 0; +} + +/* + * Scan the content of the PCI bus, and add the devices in the devices + * list. Call pci_scan_one() for each pci entry found. + */ +int +rte_pci_scan(void) +{ + int fd; + unsigned dev_count = 0; + struct pci_conf matches[16]; + struct pci_conf_io conf_io = { + .pat_buf_len = 0, + .num_patterns = 0, + .patterns = NULL, + .match_buf_len = sizeof(matches), + .matches = &matches[0], + }; + + /* for debug purposes, PCI can be disabled */ + if (!rte_eal_has_pci()) + return 0; + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + do { + unsigned i; + if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { + RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", + __func__, strerror(errno)); + goto error; + } + + for (i = 0; i < conf_io.num_matches; i++) + if (pci_scan_one(fd, &matches[i]) < 0) + goto error; + + dev_count += conf_io.num_matches; + } while(conf_io.status == PCI_GETCONF_MORE_DEVS); + + close(fd); + + RTE_LOG(DEBUG, EAL, "PCI scan found %u devices\n", dev_count); + return 0; + +error: + if (fd >= 0) + close(fd); + return -1; +} + +/* + * Get iommu class of PCI devices on the bus. + */ +enum rte_iova_mode +rte_pci_get_iommu_class(void) +{ + /* Supports only RTE_KDRV_NIC_UIO */ + return RTE_IOVA_PA; +} + +int +pci_update_device(const struct rte_pci_addr *addr) +{ + int fd; + struct pci_conf matches[2]; + struct pci_match_conf match = { + .pc_sel = { + .pc_domain = addr->domain, + .pc_bus = addr->bus, + .pc_dev = addr->devid, + .pc_func = addr->function, + }, + }; + struct pci_conf_io conf_io = { + .pat_buf_len = 0, + .num_patterns = 1, + .patterns = &match, + .match_buf_len = sizeof(matches), + .matches = &matches[0], + }; + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { + RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", + __func__, strerror(errno)); + goto error; + } + + if (conf_io.num_matches != 1) + goto error; + + if (pci_scan_one(fd, &matches[0]) < 0) + goto error; + + close(fd); + + return 0; + +error: + if (fd >= 0) + close(fd); + return -1; +} + +/* Read PCI config space. */ +int rte_pci_read_config(const struct rte_pci_device *dev, + void *buf, size_t len, off_t offset) +{ + int fd = -1; + int size; + struct pci_io pi = { + .pi_sel = { + .pc_domain = dev->addr.domain, + .pc_bus = dev->addr.bus, + .pc_dev = dev->addr.devid, + .pc_func = dev->addr.function, + }, + .pi_reg = offset, + }; + + fd = open("/dev/pci", O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + while (len > 0) { + size = (len >= 4) ? 4 : ((len >= 2) ? 2 : 1); + pi.pi_width = size; + + if (ioctl(fd, PCIOCREAD, &pi) < 0) + goto error; + memcpy(buf, &pi.pi_data, size); + + buf = (char *)buf + size; + pi.pi_reg += size; + len -= size; + } + close(fd); + + return 0; + + error: + if (fd >= 0) + close(fd); + return -1; +} + +/* Write PCI config space. */ +int rte_pci_write_config(const struct rte_pci_device *dev, + const void *buf, size_t len, off_t offset) +{ + int fd = -1; + + struct pci_io pi = { + .pi_sel = { + .pc_domain = dev->addr.domain, + .pc_bus = dev->addr.bus, + .pc_dev = dev->addr.devid, + .pc_func = dev->addr.function, + }, + .pi_reg = offset, + .pi_data = *(const uint32_t *)buf, + .pi_width = len, + }; + + if (len == 3 || len > sizeof(pi.pi_data)) { + RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__); + goto error; + } + + memcpy(&pi.pi_data, buf, len); + + fd = open("/dev/pci", O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCWRITE, &pi) < 0) + goto error; + + close(fd); + return 0; + + error: + if (fd >= 0) + close(fd); + return -1; +} + +int +rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + int ret; + + switch (dev->kdrv) { +#if defined(RTE_ARCH_X86) + case RTE_KDRV_NIC_UIO: + if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) { + p->base = (uintptr_t)dev->mem_resource[bar].addr; + ret = 0; + } else + ret = -1; + break; +#endif + default: + ret = -1; + break; + } + + if (!ret) + p->dev = dev; + + return ret; +} + +static void +pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + uint8_t *d; + int size; + unsigned short reg = p->base + offset; + + for (d = data; len > 0; d += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + *(uint32_t *)d = inl(reg); + } else if (len >= 2) { + size = 2; + *(uint16_t *)d = inw(reg); + } else { + size = 1; + *d = inb(reg); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +void +rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { + case RTE_KDRV_NIC_UIO: + pci_uio_ioport_read(p, data, len, offset); + break; + default: + break; + } +} + +static void +pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + const uint8_t *s; + int size; + unsigned short reg = p->base + offset; + + for (s = data; len > 0; s += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + outl(reg, *(const uint32_t *)s); + } else if (len >= 2) { + size = 2; + outw(reg, *(const uint16_t *)s); + } else { + size = 1; + outb(reg, *s); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +void +rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { + case RTE_KDRV_NIC_UIO: + pci_uio_ioport_write(p, data, len, offset); + break; + default: + break; + } +} + +int +rte_pci_ioport_unmap(struct rte_pci_ioport *p) +{ + int ret; + + switch (p->dev->kdrv) { +#if defined(RTE_ARCH_X86) + case RTE_KDRV_NIC_UIO: + ret = 0; + break; +#endif + default: + ret = -1; + break; + } + + return ret; +} diff --git a/drivers/bus/pci/linux/Makefile b/drivers/bus/pci/linux/Makefile new file mode 100644 index 00000000..77c5f970 --- /dev/null +++ b/drivers/bus/pci/linux/Makefile @@ -0,0 +1,36 @@ +# BSD LICENSE +# +# Copyright(c) 2017 6WIND S.A. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of 6WIND nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +SRCS += pci.c +SRCS += pci_uio.c +SRCS += pci_vfio.c + +CFLAGS += -D_GNU_SOURCE diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c new file mode 100644 index 00000000..5da6728f --- /dev/null +++ b/drivers/bus/pci/linux/pci.c @@ -0,0 +1,856 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include <dirent.h> + +#include <rte_log.h> +#include <rte_bus.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_eal_memconfig.h> +#include <rte_malloc.h> +#include <rte_devargs.h> +#include <rte_memcpy.h> +#include <rte_vfio.h> + +#include "eal_private.h" +#include "eal_filesystem.h" + +#include "private.h" +#include "pci_init.h" + +/** + * @file + * PCI probing under linux + * + * This code is used to simulate a PCI probe by parsing information in sysfs. + * When a registered device matches a driver, it is then initialized with + * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). + */ + +extern struct rte_pci_bus rte_pci_bus; + +static int +pci_get_kernel_driver_by_path(const char *filename, char *dri_name) +{ + int count; + char path[PATH_MAX]; + char *name; + + if (!filename || !dri_name) + return -1; + + count = readlink(filename, path, PATH_MAX); + if (count >= PATH_MAX) + return -1; + + /* For device does not have a driver */ + if (count < 0) + return 1; + + path[count] = '\0'; + + name = strrchr(path, '/'); + if (name) { + strncpy(dri_name, name + 1, strlen(name + 1) + 1); + return 0; + } + + return -1; +} + +/* Map pci device */ +int +rte_pci_map_device(struct rte_pci_device *dev) +{ + int ret = -1; + + /* try mapping the NIC resources using VFIO if it exists */ + switch (dev->kdrv) { + case RTE_KDRV_VFIO: +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + ret = pci_vfio_map_resource(dev); +#endif + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + if (rte_eal_using_phys_addrs()) { + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + } + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + +/* Unmap pci device */ +void +rte_pci_unmap_device(struct rte_pci_device *dev) +{ + /* try unmapping the NIC resources using VFIO if it exists */ + switch (dev->kdrv) { + case RTE_KDRV_VFIO: +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + pci_vfio_unmap_resource(dev); +#endif + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + /* unmap resources for devices that use uio */ + pci_uio_unmap_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + break; + } +} + +void * +pci_find_max_end_va(void) +{ + const struct rte_memseg *seg = rte_eal_get_physmem_layout(); + const struct rte_memseg *last = seg; + unsigned i = 0; + + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { + if (seg->addr == NULL) + break; + + if (seg->addr > last->addr) + last = seg; + + } + return RTE_PTR_ADD(last->addr, last->len); +} + +/* parse one line of the "resource" sysfs file (note that the 'line' + * string is modified) + */ +int +pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, + uint64_t *end_addr, uint64_t *flags) +{ + union pci_resource_info { + struct { + char *phys_addr; + char *end_addr; + char *flags; + }; + char *ptrs[PCI_RESOURCE_FMT_NVAL]; + } res_info; + + if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) { + RTE_LOG(ERR, EAL, + "%s(): bad resource format\n", __func__); + return -1; + } + errno = 0; + *phys_addr = strtoull(res_info.phys_addr, NULL, 16); + *end_addr = strtoull(res_info.end_addr, NULL, 16); + *flags = strtoull(res_info.flags, NULL, 16); + if (errno != 0) { + RTE_LOG(ERR, EAL, + "%s(): bad resource format\n", __func__); + return -1; + } + + return 0; +} + +/* parse the "resource" sysfs file */ +static int +pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +{ + FILE *f; + char buf[BUFSIZ]; + int i; + uint64_t phys_addr, end_addr, flags; + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); + return -1; + } + + for (i = 0; i<PCI_MAX_RESOURCE; i++) { + + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot read resource\n", __func__); + goto error; + } + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, + &end_addr, &flags) < 0) + goto error; + + if (flags & IORESOURCE_MEM) { + dev->mem_resource[i].phys_addr = phys_addr; + dev->mem_resource[i].len = end_addr - phys_addr + 1; + /* not mapped for now */ + dev->mem_resource[i].addr = NULL; + } + } + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +/* Scan one pci sysfs entry, and fill the devices list from it. */ +static int +pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) +{ + char filename[PATH_MAX]; + unsigned long tmp; + struct rte_pci_device *dev; + char driver[PATH_MAX]; + int ret; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) + return -1; + + memset(dev, 0, sizeof(*dev)); + dev->addr = *addr; + + /* get vendor id */ + snprintf(filename, sizeof(filename), "%s/vendor", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.vendor_id = (uint16_t)tmp; + + /* get device id */ + snprintf(filename, sizeof(filename), "%s/device", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.device_id = (uint16_t)tmp; + + /* get subsystem_vendor id */ + snprintf(filename, sizeof(filename), "%s/subsystem_vendor", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.subsystem_vendor_id = (uint16_t)tmp; + + /* get subsystem_device id */ + snprintf(filename, sizeof(filename), "%s/subsystem_device", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.subsystem_device_id = (uint16_t)tmp; + + /* get class_id */ + snprintf(filename, sizeof(filename), "%s/class", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + /* the least 24 bits are valid: class, subclass, program interface */ + dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID; + + /* get max_vfs */ + dev->max_vfs = 0; + snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); + if (!access(filename, F_OK) && + eal_parse_sysfs_value(filename, &tmp) == 0) + dev->max_vfs = (uint16_t)tmp; + else { + /* for non igb_uio driver, need kernel version >= 3.8 */ + snprintf(filename, sizeof(filename), + "%s/sriov_numvfs", dirname); + if (!access(filename, F_OK) && + eal_parse_sysfs_value(filename, &tmp) == 0) + dev->max_vfs = (uint16_t)tmp; + } + + /* get numa node, default to 0 if not present */ + snprintf(filename, sizeof(filename), "%s/numa_node", + dirname); + + if (access(filename, F_OK) != -1) { + if (eal_parse_sysfs_value(filename, &tmp) == 0) + dev->device.numa_node = tmp; + else + dev->device.numa_node = -1; + } else { + dev->device.numa_node = 0; + } + + pci_name_set(dev); + + /* parse resources */ + snprintf(filename, sizeof(filename), "%s/resource", dirname); + if (pci_parse_sysfs_resource(filename, dev) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); + free(dev); + return -1; + } + + /* parse driver */ + snprintf(filename, sizeof(filename), "%s/driver", dirname); + ret = pci_get_kernel_driver_by_path(filename, driver); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); + free(dev); + return -1; + } + + if (!ret) { + if (!strcmp(driver, "vfio-pci")) + dev->kdrv = RTE_KDRV_VFIO; + else if (!strcmp(driver, "igb_uio")) + dev->kdrv = RTE_KDRV_IGB_UIO; + else if (!strcmp(driver, "uio_pci_generic")) + dev->kdrv = RTE_KDRV_UIO_GENERIC; + else + dev->kdrv = RTE_KDRV_UNKNOWN; + } else + dev->kdrv = RTE_KDRV_NONE; + + /* device is valid, add in list (sorted) */ + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); + } else { + struct rte_pci_device *dev2; + int ret; + + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { + ret = rte_pci_addr_cmp(&dev->addr, &dev2->addr); + if (ret > 0) + continue; + + if (ret < 0) { + rte_pci_insert_device(dev2, dev); + } else { /* already registered */ + dev2->kdrv = dev->kdrv; + dev2->max_vfs = dev->max_vfs; + pci_name_set(dev2); + memmove(dev2->mem_resource, dev->mem_resource, + sizeof(dev->mem_resource)); + free(dev); + } + return 0; + } + + rte_pci_add_device(dev); + } + + return 0; +} + +int +pci_update_device(const struct rte_pci_addr *addr) +{ + char filename[PATH_MAX]; + + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT, + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, + addr->function); + + return pci_scan_one(filename, addr); +} + +/* + * split up a pci address into its constituent parts. + */ +static int +parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) +{ + /* first split on ':' */ + union splitaddr { + struct { + char *domain; + char *bus; + char *devid; + char *function; + }; + char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */ + } splitaddr; + + char *buf_copy = strndup(buf, bufsize); + if (buf_copy == NULL) + return -1; + + if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':') + != PCI_FMT_NVAL - 1) + goto error; + /* final split is on '.' between devid and function */ + splitaddr.function = strchr(splitaddr.devid,'.'); + if (splitaddr.function == NULL) + goto error; + *splitaddr.function++ = '\0'; + + /* now convert to int values */ + errno = 0; + addr->domain = strtoul(splitaddr.domain, NULL, 16); + addr->bus = strtoul(splitaddr.bus, NULL, 16); + addr->devid = strtoul(splitaddr.devid, NULL, 16); + addr->function = strtoul(splitaddr.function, NULL, 10); + if (errno != 0) + goto error; + + free(buf_copy); /* free the copy made with strdup */ + return 0; +error: + free(buf_copy); + return -1; +} + +/* + * Scan the content of the PCI bus, and the devices in the devices + * list + */ +int +rte_pci_scan(void) +{ + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + struct rte_pci_addr addr; + + /* for debug purposes, PCI can be disabled */ + if (!rte_eal_has_pci()) + return 0; + +#ifdef VFIO_PRESENT + if (!pci_vfio_is_enabled()) + RTE_LOG(DEBUG, EAL, "VFIO PCI modules not loaded\n"); +#endif + + dir = opendir(rte_pci_get_sysfs_path()); + if (dir == NULL) { + RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", + __func__, strerror(errno)); + return -1; + } + + while ((e = readdir(dir)) != NULL) { + if (e->d_name[0] == '.') + continue; + + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) + continue; + + snprintf(dirname, sizeof(dirname), "%s/%s", + rte_pci_get_sysfs_path(), e->d_name); + + if (pci_scan_one(dirname, &addr) < 0) + goto error; + } + closedir(dir); + return 0; + +error: + closedir(dir); + return -1; +} + +/* + * Is pci device bound to any kdrv + */ +static inline int +pci_one_device_is_bound(void) +{ + struct rte_pci_device *dev = NULL; + int ret = 0; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (dev->kdrv == RTE_KDRV_UNKNOWN || + dev->kdrv == RTE_KDRV_NONE) { + continue; + } else { + ret = 1; + break; + } + } + return ret; +} + +/* + * Any one of the device bound to uio + */ +static inline int +pci_one_device_bound_uio(void) +{ + struct rte_pci_device *dev = NULL; + struct rte_devargs *devargs; + int need_check; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + devargs = dev->device.devargs; + + need_check = 0; + switch (rte_pci_bus.bus.conf.scan_mode) { + case RTE_BUS_SCAN_WHITELIST: + if (devargs && devargs->policy == RTE_DEV_WHITELISTED) + need_check = 1; + break; + case RTE_BUS_SCAN_UNDEFINED: + case RTE_BUS_SCAN_BLACKLIST: + if (devargs == NULL || + devargs->policy != RTE_DEV_BLACKLISTED) + need_check = 1; + break; + } + + if (!need_check) + continue; + + if (dev->kdrv == RTE_KDRV_IGB_UIO || + dev->kdrv == RTE_KDRV_UIO_GENERIC) { + return 1; + } + } + return 0; +} + +/* + * Any one of the device has iova as va + */ +static inline int +pci_one_device_has_iova_va(void) +{ + struct rte_pci_device *dev = NULL; + struct rte_pci_driver *drv = NULL; + + FOREACH_DRIVER_ON_PCIBUS(drv) { + if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) { + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (dev->kdrv == RTE_KDRV_VFIO && + rte_pci_match(drv, dev)) + return 1; + } + } + } + return 0; +} + +/* + * Get iommu class of PCI devices on the bus. + */ +enum rte_iova_mode +rte_pci_get_iommu_class(void) +{ + bool is_bound; + bool is_vfio_noiommu_enabled = true; + bool has_iova_va; + bool is_bound_uio; + bool spapr_iommu = +#if defined(RTE_ARCH_PPC_64) + true; +#else + false; +#endif + + is_bound = pci_one_device_is_bound(); + if (!is_bound) + return RTE_IOVA_DC; + + has_iova_va = pci_one_device_has_iova_va(); + is_bound_uio = pci_one_device_bound_uio(); +#ifdef VFIO_PRESENT + is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ? + true : false; +#endif + + if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled && + !spapr_iommu) + return RTE_IOVA_VA; + + if (has_iova_va) { + RTE_LOG(WARNING, EAL, "Some devices want iova as va but pa will be used because.. "); + if (is_vfio_noiommu_enabled) + RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n"); + if (is_bound_uio) + RTE_LOG(WARNING, EAL, "few device bound to UIO\n"); + if (spapr_iommu) + RTE_LOG(WARNING, EAL, "sPAPR IOMMU does not support IOVA as VA\n"); + } + + return RTE_IOVA_PA; +} + +/* Read PCI config space. */ +int rte_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &device->intr_handle; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + return pci_uio_read_config(intr_handle, buf, len, offset); + +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + return pci_vfio_read_config(intr_handle, buf, len, offset); +#endif + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } +} + +/* Write PCI config space. */ +int rte_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &device->intr_handle; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + return pci_uio_write_config(intr_handle, buf, len, offset); + +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + return pci_vfio_write_config(intr_handle, buf, len, offset); +#endif + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } +} + +#if defined(RTE_ARCH_X86) +static int +pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, + struct rte_pci_ioport *p) +{ + uint16_t start, end; + FILE *fp; + char *line = NULL; + char pci_id[16]; + int found = 0; + size_t linesz; + + snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT, + dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + + fp = fopen("/proc/ioports", "r"); + if (fp == NULL) { + RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__); + return -1; + } + + while (getdelim(&line, &linesz, '\n', fp) > 0) { + char *ptr = line; + char *left; + int n; + + n = strcspn(ptr, ":"); + ptr[n] = 0; + left = &ptr[n + 1]; + + while (*left && isspace(*left)) + left++; + + if (!strncmp(left, pci_id, strlen(pci_id))) { + found = 1; + + while (*ptr && isspace(*ptr)) + ptr++; + + sscanf(ptr, "%04hx-%04hx", &start, &end); + + break; + } + } + + free(line); + fclose(fp); + + if (!found) + return -1; + + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + p->base = start; + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start); + + return 0; +} +#endif + +int +rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + int ret = -1; + + switch (dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + if (pci_vfio_is_enabled()) + ret = pci_vfio_ioport_map(dev, bar, p); + break; +#endif + case RTE_KDRV_IGB_UIO: + ret = pci_uio_ioport_map(dev, bar, p); + break; + case RTE_KDRV_UIO_GENERIC: +#if defined(RTE_ARCH_X86) + ret = pci_ioport_map(dev, bar, p); +#else + ret = pci_uio_ioport_map(dev, bar, p); +#endif + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + ret = pci_ioport_map(dev, bar, p); +#endif + break; + default: + break; + } + + if (!ret) + p->dev = dev; + + return ret; +} + +void +rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + pci_vfio_ioport_read(p, data, len, offset); + break; +#endif + case RTE_KDRV_IGB_UIO: + pci_uio_ioport_read(p, data, len, offset); + break; + case RTE_KDRV_UIO_GENERIC: + pci_uio_ioport_read(p, data, len, offset); + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + pci_uio_ioport_read(p, data, len, offset); +#endif + break; + default: + break; + } +} + +void +rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + pci_vfio_ioport_write(p, data, len, offset); + break; +#endif + case RTE_KDRV_IGB_UIO: + pci_uio_ioport_write(p, data, len, offset); + break; + case RTE_KDRV_UIO_GENERIC: + pci_uio_ioport_write(p, data, len, offset); + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + pci_uio_ioport_write(p, data, len, offset); +#endif + break; + default: + break; + } +} + +int +rte_pci_ioport_unmap(struct rte_pci_ioport *p) +{ + int ret = -1; + + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + if (pci_vfio_is_enabled()) + ret = pci_vfio_ioport_unmap(p); + break; +#endif + case RTE_KDRV_IGB_UIO: + ret = pci_uio_ioport_unmap(p); + break; + case RTE_KDRV_UIO_GENERIC: +#if defined(RTE_ARCH_X86) + ret = 0; +#else + ret = pci_uio_ioport_unmap(p); +#endif + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + ret = 0; +#endif + break; + default: + break; + } + + return ret; +} diff --git a/drivers/bus/pci/linux/pci_init.h b/drivers/bus/pci/linux/pci_init.h new file mode 100644 index 00000000..f342c47d --- /dev/null +++ b/drivers/bus/pci/linux/pci_init.h @@ -0,0 +1,111 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_PCI_INIT_H_ +#define EAL_PCI_INIT_H_ + +#include <linux/version.h> + +#include <rte_vfio.h> + +/** IO resource type: */ +#define IORESOURCE_IO 0x00000100 +#define IORESOURCE_MEM 0x00000200 + +/* + * Helper function to map PCI resources right after hugepages in virtual memory + */ +extern void *pci_map_addr; +void *pci_find_max_end_va(void); + +/* parse one line of the "resource" sysfs file (note that the 'line' + * string is modified) + */ +int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, + uint64_t *end_addr, uint64_t *flags); + +int pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res); +void pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res); +int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx); + +int pci_uio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs); +int pci_uio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs); + +int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); +void pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); +void pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); +int pci_uio_ioport_unmap(struct rte_pci_ioport *p); + +#ifdef VFIO_PRESENT + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) +#define RTE_PCI_MSIX_TABLE_BIR 0x7 +#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 +#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff +#else +#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR +#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET +#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE +#endif + +/* access config space */ +int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs); +int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs); + +int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); +void pci_vfio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); +void pci_vfio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); +int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); + +/* map/unmap VFIO resource prototype */ +int pci_vfio_map_resource(struct rte_pci_device *dev); +int pci_vfio_unmap_resource(struct rte_pci_device *dev); + +int pci_vfio_is_enabled(void); + +#endif + +#endif /* EAL_PCI_INIT_H_ */ diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c new file mode 100644 index 00000000..92b7f027 --- /dev/null +++ b/drivers/bus/pci/linux/pci_uio.c @@ -0,0 +1,568 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <dirent.h> +#include <inttypes.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/sysmacros.h> +#include <linux/pci_regs.h> + +#if defined(RTE_ARCH_X86) +#include <sys/io.h> +#endif + +#include <rte_log.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_eal_memconfig.h> +#include <rte_common.h> +#include <rte_malloc.h> + +#include "eal_filesystem.h" +#include "pci_init.h" + +void *pci_map_addr = NULL; + +#define OFF_MAX ((uint64_t)(off_t)-1) + +int +pci_uio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offset) +{ + return pread(intr_handle->uio_cfg_fd, buf, len, offset); +} + +int +pci_uio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offset) +{ + return pwrite(intr_handle->uio_cfg_fd, buf, len, offset); +} + +static int +pci_uio_set_bus_master(int dev_fd) +{ + uint16_t reg; + int ret; + + ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, + "Cannot read command from PCI config space!\n"); + return -1; + } + + /* return if bus mastering is already on */ + if (reg & PCI_COMMAND_MASTER) + return 0; + + reg |= PCI_COMMAND_MASTER; + + ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, + "Cannot write command to PCI config space!\n"); + return -1; + } + + return 0; +} + +static int +pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) +{ + FILE *f; + char filename[PATH_MAX]; + int ret; + unsigned major, minor; + dev_t dev; + + /* get the name of the sysfs file that contains the major and minor + * of the uio device and read its content */ + snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path); + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n", + __func__); + return -1; + } + + ret = fscanf(f, "%u:%u", &major, &minor); + if (ret != 2) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n", + __func__); + fclose(f); + return -1; + } + fclose(f); + + /* create the char device "mknod /dev/uioX c major minor" */ + snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); + dev = makedev(major, minor); + ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); + if (ret != 0) { + RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", + __func__, strerror(errno)); + return -1; + } + + return ret; +} + +/* + * Return the uioX char device used for a pci device. On success, return + * the UIO number and fill dstbuf string with the path of the device in + * sysfs. On error, return a negative value. In this case dstbuf is + * invalid. + */ +static int +pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, + unsigned int buflen, int create) +{ + struct rte_pci_addr *loc = &dev->addr; + int uio_num = -1; + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + + /* depending on kernel version, uio can be located in uio/uioX + * or uio:uioX */ + + snprintf(dirname, sizeof(dirname), + "%s/" PCI_PRI_FMT "/uio", rte_pci_get_sysfs_path(), + loc->domain, loc->bus, loc->devid, loc->function); + + dir = opendir(dirname); + if (dir == NULL) { + /* retry with the parent directory */ + snprintf(dirname, sizeof(dirname), + "%s/" PCI_PRI_FMT, rte_pci_get_sysfs_path(), + loc->domain, loc->bus, loc->devid, loc->function); + dir = opendir(dirname); + + if (dir == NULL) { + RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); + return -1; + } + } + + /* take the first file starting with "uio" */ + while ((e = readdir(dir)) != NULL) { + /* format could be uio%d ...*/ + int shortprefix_len = sizeof("uio") - 1; + /* ... or uio:uio%d */ + int longprefix_len = sizeof("uio:uio") - 1; + char *endptr; + + if (strncmp(e->d_name, "uio", 3) != 0) + continue; + + /* first try uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + shortprefix_len)) { + snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); + break; + } + + /* then try uio:uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + longprefix_len)) { + snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num); + break; + } + } + closedir(dir); + + /* No uio resource found */ + if (e == NULL) + return -1; + + /* create uio device if we've been asked to */ + if (rte_eal_create_uio_dev() && create && + pci_mknod_uio_dev(dstbuf, uio_num) < 0) + RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num); + + return uio_num; +} + +void +pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + if (dev->intr_handle.fd >= 0) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res) +{ + char dirname[PATH_MAX]; + char cfgname[PATH_MAX]; + char devname[PATH_MAX]; /* contains the /dev/uioX */ + int uio_num; + struct rte_pci_addr *loc; + + loc = &dev->addr; + + /* find uio resource */ + uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1); + if (uio_num < 0) { + RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " + "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); + return 1; + } + snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); + + /* save fd if in primary process */ + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + snprintf(cfgname, sizeof(cfgname), + "/sys/class/uio/uio%u/device/config", uio_num); + dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); + if (dev->intr_handle.uio_cfg_fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + cfgname, strerror(errno)); + goto error; + } + + if (dev->kdrv == RTE_KDRV_IGB_UIO) + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + else { + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; + + /* set bus master that is not done by uio_pci_generic */ + if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { + RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); + goto error; + } + } + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + goto error; + } + + snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); + memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); + + return 0; + +error: + pci_uio_free_resource(dev, *uio_res); + return -1; +} + +int +pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx) +{ + int fd; + char devname[PATH_MAX]; + void *mapaddr; + struct rte_pci_addr *loc; + struct pci_map *maps; + + loc = &dev->addr; + maps = uio_res->maps; + + /* update devname for mmap */ + snprintf(devname, sizeof(devname), + "%s/" PCI_PRI_FMT "/resource%d", + rte_pci_get_sysfs_path(), + loc->domain, loc->bus, loc->devid, + loc->function, res_idx); + + /* allocate memory to keep path */ + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); + if (maps[map_idx].path == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", + strerror(errno)); + return -1; + } + + /* + * open resource file, to mmap it + */ + fd = open(devname, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + mapaddr = pci_map_resource(pci_map_addr, fd, 0, + (size_t)dev->mem_resource[res_idx].len, 0); + close(fd); + if (mapaddr == MAP_FAILED) + goto error; + + pci_map_addr = RTE_PTR_ADD(mapaddr, + (size_t)dev->mem_resource[res_idx].len); + + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; + maps[map_idx].size = dev->mem_resource[res_idx].len; + maps[map_idx].addr = mapaddr; + maps[map_idx].offset = 0; + strcpy(maps[map_idx].path, devname); + dev->mem_resource[res_idx].addr = mapaddr; + + return 0; + +error: + rte_free(maps[map_idx].path); + return -1; +} + +#if defined(RTE_ARCH_X86) +int +pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + char dirname[PATH_MAX]; + char filename[PATH_MAX]; + int uio_num; + unsigned long start; + + uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0); + if (uio_num < 0) + return -1; + + /* get portio start */ + snprintf(filename, sizeof(filename), + "%s/portio/port%d/start", dirname, bar); + if (eal_parse_sysfs_value(filename, &start) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n", + __func__); + return -1; + } + /* ensure we don't get anything funny here, read/write will cast to + * uin16_t */ + if (start > UINT16_MAX) + return -1; + + /* FIXME only for primary process ? */ + if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) { + + snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); + dev->intr_handle.fd = open(filename, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + filename, strerror(errno)); + return -1; + } + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + } + + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start); + + p->base = start; + p->len = 0; + return 0; +} +#else +int +pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + FILE *f; + char buf[BUFSIZ]; + char filename[PATH_MAX]; + uint64_t phys_addr, end_addr, flags; + int fd, i; + void *addr; + + /* open and read addresses of the corresponding resource in sysfs */ + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource", + rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n", + strerror(errno)); + return -1; + } + for (i = 0; i < bar + 1; i++) { + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n"); + goto error; + } + } + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, + &end_addr, &flags) < 0) + goto error; + if ((flags & IORESOURCE_IO) == 0) { + RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar); + goto error; + } + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d", + rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function, bar); + + /* mmap the pci resource */ + fd = open(filename, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, + strerror(errno)); + goto error; + } + addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + close(fd); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n", + strerror(errno)); + goto error; + } + + /* strangely, the base address is mmap addr + phys_addr */ + p->base = (uintptr_t)addr + phys_addr; + p->len = end_addr + 1; + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base); + fclose(f); + + return 0; + +error: + fclose(f); + return -1; +} +#endif + +void +pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + uint8_t *d; + int size; + uintptr_t reg = p->base + offset; + + for (d = data; len > 0; d += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; +#if defined(RTE_ARCH_X86) + *(uint32_t *)d = inl(reg); +#else + *(uint32_t *)d = *(volatile uint32_t *)reg; +#endif + } else if (len >= 2) { + size = 2; +#if defined(RTE_ARCH_X86) + *(uint16_t *)d = inw(reg); +#else + *(uint16_t *)d = *(volatile uint16_t *)reg; +#endif + } else { + size = 1; +#if defined(RTE_ARCH_X86) + *d = inb(reg); +#else + *d = *(volatile uint8_t *)reg; +#endif + } + } +} + +void +pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + const uint8_t *s; + int size; + uintptr_t reg = p->base + offset; + + for (s = data; len > 0; s += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; +#if defined(RTE_ARCH_X86) + outl_p(*(const uint32_t *)s, reg); +#else + *(volatile uint32_t *)reg = *(const uint32_t *)s; +#endif + } else if (len >= 2) { + size = 2; +#if defined(RTE_ARCH_X86) + outw_p(*(const uint16_t *)s, reg); +#else + *(volatile uint16_t *)reg = *(const uint16_t *)s; +#endif + } else { + size = 1; +#if defined(RTE_ARCH_X86) + outb_p(*s, reg); +#else + *(volatile uint8_t *)reg = *s; +#endif + } + } +} + +int +pci_uio_ioport_unmap(struct rte_pci_ioport *p) +{ +#if defined(RTE_ARCH_X86) + RTE_SET_USED(p); + /* FIXME close intr fd ? */ + return 0; +#else + return munmap((void *)(uintptr_t)p->base, p->len); +#endif +} diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c new file mode 100644 index 00000000..1f93fa4d --- /dev/null +++ b/drivers/bus/pci/linux/pci_vfio.c @@ -0,0 +1,763 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include <fcntl.h> +#include <linux/pci_regs.h> +#include <sys/eventfd.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <stdbool.h> + +#include <rte_log.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_eal_memconfig.h> +#include <rte_malloc.h> +#include <rte_vfio.h> + +#include "eal_filesystem.h" + +#include "pci_init.h" +#include "private.h" + +/** + * @file + * PCI probing under linux (VFIO version) + * + * This code tries to determine if the PCI device is bound to VFIO driver, + * and initialize it (map BARs, set up interrupts) if that's the case. + * + * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". + */ + +#ifdef VFIO_PRESENT + +#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +static struct rte_tailq_elem rte_vfio_tailq = { + .name = "VFIO_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(rte_vfio_tailq) + +int +pci_vfio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs) +{ + return pread64(intr_handle->vfio_dev_fd, buf, len, + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); +} + +int +pci_vfio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs) +{ + return pwrite64(intr_handle->vfio_dev_fd, buf, len, + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); +} + +/* get PCI BAR number where MSI-X interrupts are */ +static int +pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) +{ + int ret; + uint32_t reg; + uint16_t flags; + uint8_t cap_id, cap_offset; + + /* read PCI capability pointer from config space */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_CAPABILITY_LIST); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " + "config space!\n"); + return -1; + } + + /* we need first byte */ + cap_offset = reg & 0xFF; + + while (cap_offset) { + + /* read PCI capability ID */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " + "config space!\n"); + return -1; + } + + /* we need first byte */ + cap_id = reg & 0xFF; + + /* if we haven't reached MSI-X, check next capability */ + if (cap_id != PCI_CAP_ID_MSIX) { + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " + "config space!\n"); + return -1; + } + + /* we need second byte */ + cap_offset = (reg & 0xFF00) >> 8; + + continue; + } + /* else, read table offset */ + else { + /* table offset resides in the next 4 bytes */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 4); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " + "space!\n"); + return -1; + } + + ret = pread64(fd, &flags, sizeof(flags), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 2); + if (ret != sizeof(flags)) { + RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " + "space!\n"); + return -1; + } + + msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR; + msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; + msix_table->size = + 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); + + return 0; + } + } + return 0; +} + +/* set PCI bus mastering */ +static int +pci_vfio_set_bus_master(int dev_fd, bool op) +{ + uint16_t reg; + int ret; + + ret = pread64(dev_fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); + return -1; + } + + if (op) + /* set the master bit */ + reg |= PCI_COMMAND_MASTER; + else + reg &= ~(PCI_COMMAND_MASTER); + + ret = pwrite64(dev_fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_COMMAND); + + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); + return -1; + } + + return 0; +} + +/* set up interrupt support (but not enable interrupts) */ +static int +pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) +{ + int i, ret, intr_idx; + enum rte_intr_mode intr_mode; + + /* default to invalid index */ + intr_idx = VFIO_PCI_NUM_IRQS; + + /* Get default / configured intr_mode */ + intr_mode = rte_eal_vfio_intr_mode(); + + /* get interrupt type from internal config (MSI-X by default, can be + * overridden from the command line + */ + switch (intr_mode) { + case RTE_INTR_MODE_MSIX: + intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; + break; + case RTE_INTR_MODE_MSI: + intr_idx = VFIO_PCI_MSI_IRQ_INDEX; + break; + case RTE_INTR_MODE_LEGACY: + intr_idx = VFIO_PCI_INTX_IRQ_INDEX; + break; + /* don't do anything if we want to automatically determine interrupt type */ + case RTE_INTR_MODE_NONE: + break; + default: + RTE_LOG(ERR, EAL, " unknown default interrupt type!\n"); + return -1; + } + + /* start from MSI-X interrupt type */ + for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { + struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; + + /* skip interrupt modes we don't want */ + if (intr_mode != RTE_INTR_MODE_NONE && + i != intr_idx) + continue; + + irq.index = i; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); + if (ret < 0) { + RTE_LOG(ERR, EAL, " cannot get IRQ info, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* if this vector cannot be used with eventfd, fail if we explicitly + * specified interrupt type, otherwise continue */ + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; + + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } + + return 0; + } + + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; +} + +static int +pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index) +{ + uint32_t ioport_bar; + int ret; + + ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_BASE_ADDRESS_0 + bar_index*4); + if (ret != sizeof(ioport_bar)) { + RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n", + PCI_BASE_ADDRESS_0 + bar_index*4); + return -1; + } + + return (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) != 0; +} + +static int +pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) +{ + if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { + RTE_LOG(ERR, EAL, "Error setting up interrupts!\n"); + return -1; + } + + /* set bus mastering for the device */ + if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { + RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); + return -1; + } + + /* + * Reset the device. If the device is not capable of resetting, + * then it updates errno as EINVAL. + */ + if (ioctl(vfio_dev_fd, VFIO_DEVICE_RESET) && errno != EINVAL) { + RTE_LOG(ERR, EAL, "Unable to reset device! Error: %d (%s)\n", + errno, strerror(errno)); + return -1; + } + + return 0; +} + +static int +pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, + int bar_index, int additional_flags) +{ + struct memreg { + unsigned long offset, size; + } memreg[2] = {}; + void *bar_addr; + struct pci_msix_table *msix_table = &vfio_res->msix_table; + struct pci_map *bar = &vfio_res->maps[bar_index]; + + if (bar->size == 0) + /* Skip this BAR */ + return 0; + + if (msix_table->bar_index == bar_index) { + /* + * VFIO will not let us map the MSI-X table, + * but we can map around it. + */ + uint32_t table_start = msix_table->offset; + uint32_t table_end = table_start + msix_table->size; + table_end = (table_end + ~PAGE_MASK) & PAGE_MASK; + table_start &= PAGE_MASK; + + if (table_start == 0 && table_end >= bar->size) { + /* Cannot map this BAR */ + RTE_LOG(DEBUG, EAL, "Skipping BAR%d\n", bar_index); + bar->size = 0; + bar->addr = 0; + return 0; + } + + memreg[0].offset = bar->offset; + memreg[0].size = table_start; + memreg[1].offset = bar->offset + table_end; + memreg[1].size = bar->size - table_end; + + RTE_LOG(DEBUG, EAL, + "Trying to map BAR%d that contains the MSI-X " + "table. Trying offsets: " + "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", bar_index, + memreg[0].offset, memreg[0].size, + memreg[1].offset, memreg[1].size); + } else { + memreg[0].offset = bar->offset; + memreg[0].size = bar->size; + } + + /* reserve the address using an inaccessible mapping */ + bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE | + MAP_ANONYMOUS | additional_flags, -1, 0); + if (bar_addr != MAP_FAILED) { + void *map_addr = NULL; + if (memreg[0].size) { + /* actual map of first part */ + map_addr = pci_map_resource(bar_addr, vfio_dev_fd, + memreg[0].offset, + memreg[0].size, + MAP_FIXED); + } + + /* if there's a second part, try to map it */ + if (map_addr != MAP_FAILED + && memreg[1].offset && memreg[1].size) { + void *second_addr = RTE_PTR_ADD(bar_addr, + memreg[1].offset - + (uintptr_t)bar->offset); + map_addr = pci_map_resource(second_addr, + vfio_dev_fd, + memreg[1].offset, + memreg[1].size, + MAP_FIXED); + } + + if (map_addr == MAP_FAILED || !map_addr) { + munmap(bar_addr, bar->size); + bar_addr = MAP_FAILED; + RTE_LOG(ERR, EAL, "Failed to map pci BAR%d\n", + bar_index); + return -1; + } + } else { + RTE_LOG(ERR, EAL, + "Failed to create inaccessible mapping for BAR%d\n", + bar_index); + return -1; + } + + bar->addr = bar_addr; + return 0; +} + +static int +pci_vfio_map_resource_primary(struct rte_pci_device *dev) +{ + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + char pci_addr[PATH_MAX] = {0}; + int vfio_dev_fd; + struct rte_pci_addr *loc = &dev->addr; + int i, ret; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list = + RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + + struct pci_map *maps; + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr, + &vfio_dev_fd, &device_info); + if (ret) + return ret; + + /* allocate vfio_res and get region info */ + vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + goto err_vfio_dev_fd; + } + memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); + + /* get number of registers (up to BAR5) */ + vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions, + VFIO_PCI_BAR5_REGION_INDEX + 1); + + /* map BARs */ + maps = vfio_res->maps; + + vfio_res->msix_table.bar_index = -1; + /* get MSI-X BAR, if any (we have to know where it is because we can't + * easily mmap it when using VFIO) + */ + ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table); + if (ret < 0) { + RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", + pci_addr); + goto err_vfio_dev_fd; + } + + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + struct vfio_region_info reg = { .argsz = sizeof(reg) }; + void *bar_addr; + + reg.index = i; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot get device region info " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + goto err_vfio_res; + } + + /* chk for io port region */ + ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i); + if (ret < 0) + goto err_vfio_res; + else if (ret) { + RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n", + i); + continue; + } + + /* skip non-mmapable BARs */ + if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) + continue; + + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + bar_addr = pci_map_addr; + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); + + maps[i].addr = bar_addr; + maps[i].offset = reg.offset; + maps[i].size = reg.size; + maps[i].path = NULL; /* vfio doesn't have per-resource paths */ + + ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0); + if (ret < 0) { + RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", + pci_addr, i, strerror(errno)); + goto err_vfio_res; + } + + dev->mem_resource[i].addr = maps[i].addr; + } + + if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) { + RTE_LOG(ERR, EAL, " %s setup device failed\n", pci_addr); + goto err_vfio_res; + } + + TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); + + return 0; +err_vfio_res: + rte_free(vfio_res); +err_vfio_dev_fd: + close(vfio_dev_fd); + return -1; +} + +static int +pci_vfio_map_resource_secondary(struct rte_pci_device *dev) +{ + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + char pci_addr[PATH_MAX] = {0}; + int vfio_dev_fd; + struct rte_pci_addr *loc = &dev->addr; + int i, ret; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list = + RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + + struct pci_map *maps; + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr, + &vfio_dev_fd, &device_info); + if (ret) + return ret; + + /* if we're in a secondary process, just find our tailq entry */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (rte_pci_addr_cmp(&vfio_res->pci_addr, + &dev->addr)) + continue; + break; + } + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + goto err_vfio_dev_fd; + } + + /* map BARs */ + maps = vfio_res->maps; + + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED); + if (ret < 0) { + RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", + pci_addr, i, strerror(errno)); + goto err_vfio_dev_fd; + } + + dev->mem_resource[i].addr = maps[i].addr; + } + + return 0; +err_vfio_dev_fd: + close(vfio_dev_fd); + return -1; +} + +/* + * map the PCI resources of a PCI device in virtual memory (VFIO version). + * primary and secondary processes follow almost exactly the same path + */ +int +pci_vfio_map_resource(struct rte_pci_device *dev) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + return pci_vfio_map_resource_primary(dev); + else + return pci_vfio_map_resource_secondary(dev); +} + +int +pci_vfio_unmap_resource(struct rte_pci_device *dev) +{ + char pci_addr[PATH_MAX] = {0}; + struct rte_pci_addr *loc = &dev->addr; + int i, ret; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list; + + struct pci_map *maps; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + + if (close(dev->intr_handle.fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", + pci_addr); + return -1; + } + + if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { + RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", + pci_addr); + return -1; + } + + ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr, + dev->intr_handle.vfio_dev_fd); + if (ret < 0) { + RTE_LOG(ERR, EAL, + "%s(): cannot release device\n", __func__); + return ret; + } + + vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + /* Get vfio_res */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + continue; + break; + } + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + return -1; + } + + /* unmap BARs */ + maps = vfio_res->maps; + + RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n", + pci_addr); + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + + /* + * We do not need to be aware of MSI-X table BAR mappings as + * when mapping. Just using current maps array is enough + */ + if (maps[i].addr) { + RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n", + pci_addr, maps[i].addr); + pci_unmap_resource(maps[i].addr, maps[i].size); + } + } + + TAILQ_REMOVE(vfio_res_list, vfio_res, next); + + return 0; +} + +int +pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + if (bar < VFIO_PCI_BAR0_REGION_INDEX || + bar > VFIO_PCI_BAR5_REGION_INDEX) { + RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); + return -1; + } + + p->dev = dev; + p->base = VFIO_GET_REGION_ADDR(bar); + return 0; +} + +void +pci_vfio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; + + if (pread64(intr_handle->vfio_dev_fd, data, + len, p->base + offset) <= 0) + RTE_LOG(ERR, EAL, + "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n", + VFIO_GET_REGION_IDX(p->base), (int)offset); +} + +void +pci_vfio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; + + if (pwrite64(intr_handle->vfio_dev_fd, data, + len, p->base + offset) <= 0) + RTE_LOG(ERR, EAL, + "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n", + VFIO_GET_REGION_IDX(p->base), (int)offset); +} + +int +pci_vfio_ioport_unmap(struct rte_pci_ioport *p) +{ + RTE_SET_USED(p); + return -1; +} + +int +pci_vfio_is_enabled(void) +{ + return rte_vfio_is_enabled("vfio_pci"); +} +#endif diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c new file mode 100644 index 00000000..104fdf90 --- /dev/null +++ b/drivers/bus/pci/pci_common.c @@ -0,0 +1,540 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright 2013-2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include <inttypes.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/queue.h> +#include <sys/mman.h> + +#include <rte_errno.h> +#include <rte_interrupts.h> +#include <rte_log.h> +#include <rte_bus.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_per_lcore.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_string_fns.h> +#include <rte_common.h> +#include <rte_devargs.h> + +#include "private.h" + +extern struct rte_pci_bus rte_pci_bus; + +#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" + +const char *rte_pci_get_sysfs_path(void) +{ + const char *path = NULL; + + path = getenv("SYSFS_PCI_DEVICES"); + if (path == NULL) + return SYSFS_PCI_DEVICES; + + return path; +} + +static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev) +{ + struct rte_devargs *devargs; + struct rte_pci_addr addr; + struct rte_bus *pbus; + + pbus = rte_bus_find_by_name("pci"); + TAILQ_FOREACH(devargs, &devargs_list, next) { + if (devargs->bus != pbus) + continue; + devargs->bus->parse(devargs->name, &addr); + if (!rte_pci_addr_cmp(&dev->addr, &addr)) + return devargs; + } + return NULL; +} + +void +pci_name_set(struct rte_pci_device *dev) +{ + struct rte_devargs *devargs; + + /* Each device has its internal, canonical name set. */ + rte_pci_device_name(&dev->addr, + dev->name, sizeof(dev->name)); + devargs = pci_devargs_lookup(dev); + dev->device.devargs = devargs; + /* In blacklist mode, if the device is not blacklisted, no + * rte_devargs exists for it. + */ + if (devargs != NULL) + /* If an rte_devargs exists, the generic rte_device uses the + * given name as its namea + */ + dev->device.name = dev->device.devargs->name; + else + /* Otherwise, it uses the internal, canonical form. */ + dev->device.name = dev->name; +} + +/* + * Match the PCI Driver and Device using the ID Table + */ +int +rte_pci_match(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev) +{ + const struct rte_pci_id *id_table; + + for (id_table = pci_drv->id_table; id_table->vendor_id != 0; + id_table++) { + /* check if device's identifiers match the driver's ones */ + if (id_table->vendor_id != pci_dev->id.vendor_id && + id_table->vendor_id != PCI_ANY_ID) + continue; + if (id_table->device_id != pci_dev->id.device_id && + id_table->device_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_vendor_id != + pci_dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_device_id != + pci_dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) + continue; + if (id_table->class_id != pci_dev->id.class_id && + id_table->class_id != RTE_CLASS_ANY_ID) + continue; + + return 1; + } + + return 0; +} + +/* + * If vendor/device ID match, call the probe() function of the + * driver. + */ +static int +rte_pci_probe_one_driver(struct rte_pci_driver *dr, + struct rte_pci_device *dev) +{ + int ret; + struct rte_pci_addr *loc; + + if ((dr == NULL) || (dev == NULL)) + return -EINVAL; + + loc = &dev->addr; + + /* The device is not blacklisted; Check if driver supports it */ + if (!rte_pci_match(dr, dev)) + /* Match of device and driver failed */ + return 1; + + RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, loc->function, + dev->device.numa_node); + + /* no initialization when blacklisted, return without error */ + if (dev->device.devargs != NULL && + dev->device.devargs->policy == + RTE_DEV_BLACKLISTED) { + RTE_LOG(INFO, EAL, " Device is blacklisted, not" + " initializing\n"); + return 1; + } + + if (dev->device.numa_node < 0) { + RTE_LOG(WARNING, EAL, " Invalid NUMA socket, default to 0\n"); + dev->device.numa_node = 0; + } + + RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->driver.name); + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { + /* map resources for devices that use igb_uio */ + ret = rte_pci_map_device(dev); + if (ret != 0) + return ret; + } + + /* reference driver structure */ + dev->driver = dr; + dev->device.driver = &dr->driver; + + /* call the driver probe() function */ + ret = dr->probe(dr, dev); + if (ret) { + dev->driver = NULL; + dev->device.driver = NULL; + if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) && + /* Don't unmap if device is unsupported and + * driver needs mapped resources. + */ + !(ret > 0 && + (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES))) + rte_pci_unmap_device(dev); + } + + return ret; +} + +/* + * If vendor/device ID match, call the remove() function of the + * driver. + */ +static int +rte_pci_detach_dev(struct rte_pci_device *dev) +{ + struct rte_pci_addr *loc; + struct rte_pci_driver *dr; + int ret = 0; + + if (dev == NULL) + return -EINVAL; + + dr = dev->driver; + loc = &dev->addr; + + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, + loc->function, dev->device.numa_node); + + RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->driver.name); + + if (dr->remove) { + ret = dr->remove(dev); + if (ret < 0) + return ret; + } + + /* clear driver structure */ + dev->driver = NULL; + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) + /* unmap resources for devices that use igb_uio */ + rte_pci_unmap_device(dev); + + return 0; +} + +/* + * If vendor/device ID match, call the probe() function of all + * registered driver for the given device. Return -1 if initialization + * failed, return 1 if no driver is found for this device. + */ +static int +pci_probe_all_drivers(struct rte_pci_device *dev) +{ + struct rte_pci_driver *dr = NULL; + int rc = 0; + + if (dev == NULL) + return -1; + + /* Check if a driver is already loaded */ + if (dev->driver != NULL) + return 0; + + FOREACH_DRIVER_ON_PCIBUS(dr) { + rc = rte_pci_probe_one_driver(dr, dev); + if (rc < 0) + /* negative value is an error */ + return -1; + if (rc > 0) + /* positive value means driver doesn't support it */ + continue; + return 0; + } + return 1; +} + +/* + * Find the pci device specified by pci address, then invoke probe function of + * the driver of the device. + */ +int +rte_pci_probe_one(const struct rte_pci_addr *addr) +{ + struct rte_pci_device *dev = NULL; + + int ret = 0; + + if (addr == NULL) + return -1; + + /* update current pci device in global list, kernel bindings might have + * changed since last time we looked at it. + */ + if (pci_update_device(addr) < 0) + goto err_return; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (rte_pci_addr_cmp(&dev->addr, addr)) + continue; + + ret = pci_probe_all_drivers(dev); + if (ret) + goto err_return; + return 0; + } + return -1; + +err_return: + RTE_LOG(WARNING, EAL, + "Requested device " PCI_PRI_FMT " cannot be used\n", + addr->domain, addr->bus, addr->devid, addr->function); + return -1; +} + +/* + * Detach device specified by its pci address. + */ +int +rte_pci_detach(const struct rte_pci_addr *addr) +{ + struct rte_pci_device *dev = NULL; + int ret = 0; + + if (addr == NULL) + return -1; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (rte_pci_addr_cmp(&dev->addr, addr)) + continue; + + ret = rte_pci_detach_dev(dev); + if (ret < 0) + /* negative value is an error */ + goto err_return; + if (ret > 0) + /* positive value means driver doesn't support it */ + continue; + + rte_pci_remove_device(dev); + free(dev); + return 0; + } + return -1; + +err_return: + RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + return -1; +} + +/* + * Scan the content of the PCI bus, and call the probe() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + */ +int +rte_pci_probe(void) +{ + struct rte_pci_device *dev = NULL; + size_t probed = 0, failed = 0; + struct rte_devargs *devargs; + int probe_all = 0; + int ret = 0; + + if (rte_pci_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST) + probe_all = 1; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + probed++; + + devargs = dev->device.devargs; + /* probe all or only whitelisted devices */ + if (probe_all) + ret = pci_probe_all_drivers(dev); + else if (devargs != NULL && + devargs->policy == RTE_DEV_WHITELISTED) + ret = pci_probe_all_drivers(dev); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + rte_errno = errno; + failed++; + ret = 0; + } + } + + return (probed && probed == failed) ? -1 : 0; +} + +/* dump one device */ +static int +pci_dump_one_device(FILE *f, struct rte_pci_device *dev) +{ + int i; + + fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id, + dev->id.device_id); + + for (i = 0; i != sizeof(dev->mem_resource) / + sizeof(dev->mem_resource[0]); i++) { + fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n", + dev->mem_resource[i].phys_addr, + dev->mem_resource[i].len); + } + return 0; +} + +/* dump devices on the bus */ +void +rte_pci_dump(FILE *f) +{ + struct rte_pci_device *dev = NULL; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + pci_dump_one_device(f, dev); + } +} + +static int +pci_parse(const char *name, void *addr) +{ + struct rte_pci_addr *out = addr; + struct rte_pci_addr pci_addr; + bool parse; + + parse = (rte_pci_addr_parse(name, &pci_addr) == 0); + if (parse && addr != NULL) + *out = pci_addr; + return parse == false; +} + +/* register a driver */ +void +rte_pci_register(struct rte_pci_driver *driver) +{ + TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next); + driver->bus = &rte_pci_bus; +} + +/* unregister a driver */ +void +rte_pci_unregister(struct rte_pci_driver *driver) +{ + TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next); + driver->bus = NULL; +} + +/* Add a device to PCI bus */ +void +rte_pci_add_device(struct rte_pci_device *pci_dev) +{ + TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next); +} + +/* Insert a device into a predefined position in PCI bus */ +void +rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, + struct rte_pci_device *new_pci_dev) +{ + TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next); +} + +/* Remove a device from PCI bus */ +void +rte_pci_remove_device(struct rte_pci_device *pci_dev) +{ + TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next); +} + +static struct rte_device * +pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, + const void *data) +{ + struct rte_pci_device *dev; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (start && &dev->device == start) { + start = NULL; /* starting point found */ + continue; + } + if (cmp(&dev->device, data) == 0) + return &dev->device; + } + + return NULL; +} + +static int +pci_plug(struct rte_device *dev) +{ + return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev)); +} + +static int +pci_unplug(struct rte_device *dev) +{ + struct rte_pci_device *pdev; + int ret; + + pdev = RTE_DEV_TO_PCI(dev); + ret = rte_pci_detach_dev(pdev); + if (ret == 0) { + rte_pci_remove_device(pdev); + free(pdev); + } + return ret; +} + +struct rte_pci_bus rte_pci_bus = { + .bus = { + .scan = rte_pci_scan, + .probe = rte_pci_probe, + .find_device = pci_find_device, + .plug = pci_plug, + .unplug = pci_unplug, + .parse = pci_parse, + .get_iommu_class = rte_pci_get_iommu_class, + }, + .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), + .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), +}; + +RTE_REGISTER_BUS(pci, rte_pci_bus.bus); diff --git a/drivers/bus/pci/pci_common_uio.c b/drivers/bus/pci/pci_common_uio.c new file mode 100644 index 00000000..06711316 --- /dev/null +++ b/drivers/bus/pci/pci_common_uio.c @@ -0,0 +1,235 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include <rte_eal.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_tailq.h> +#include <rte_log.h> +#include <rte_malloc.h> + +#include "private.h" + +static struct rte_tailq_elem rte_uio_tailq = { + .name = "UIO_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(rte_uio_tailq) + +static int +pci_uio_map_secondary(struct rte_pci_device *dev) +{ + int fd, i, j; + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + + /* skip this element if it doesn't match our PCI address */ + if (rte_pci_addr_cmp(&uio_res->pci_addr, &dev->addr)) + continue; + + for (i = 0; i != uio_res->nb_maps; i++) { + /* + * open devname, to mmap it + */ + fd = open(uio_res->maps[i].path, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + uio_res->maps[i].path, strerror(errno)); + return -1; + } + + void *mapaddr = pci_map_resource(uio_res->maps[i].addr, + fd, (off_t)uio_res->maps[i].offset, + (size_t)uio_res->maps[i].size, 0); + /* fd is not needed in slave process, close it */ + close(fd); + if (mapaddr != uio_res->maps[i].addr) { + RTE_LOG(ERR, EAL, + "Cannot mmap device resource file %s to address: %p\n", + uio_res->maps[i].path, + uio_res->maps[i].addr); + if (mapaddr != MAP_FAILED) { + /* unmap addrs correctly mapped */ + for (j = 0; j < i; j++) + pci_unmap_resource( + uio_res->maps[j].addr, + (size_t)uio_res->maps[j].size); + /* unmap addr wrongly mapped */ + pci_unmap_resource(mapaddr, + (size_t)uio_res->maps[i].size); + } + return -1; + } + } + return 0; + } + + RTE_LOG(ERR, EAL, "Cannot find resource for device\n"); + return 1; +} + +/* map the PCI resource of a PCI device in virtual memory */ +int +pci_uio_map_resource(struct rte_pci_device *dev) +{ + int i, map_idx = 0, ret; + uint64_t phaddr; + struct mapped_pci_resource *uio_res = NULL; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + dev->intr_handle.fd = -1; + dev->intr_handle.uio_cfg_fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + + /* secondary processes - use already recorded details */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return pci_uio_map_secondary(dev); + + /* allocate uio resource */ + ret = pci_uio_alloc_resource(dev, &uio_res); + if (ret) + return ret; + + /* Map all BARs */ + for (i = 0; i != PCI_MAX_RESOURCE; i++) { + /* skip empty BAR */ + phaddr = dev->mem_resource[i].phys_addr; + if (phaddr == 0) + continue; + + ret = pci_uio_map_resource_by_index(dev, i, + uio_res, map_idx); + if (ret) + goto error; + + map_idx++; + } + + uio_res->nb_maps = map_idx; + + TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); + + return 0; +error: + for (i = 0; i < map_idx; i++) { + pci_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + rte_free(uio_res->maps[i].path); + } + pci_uio_free_resource(dev, uio_res); + return -1; +} + +static void +pci_uio_unmap(struct mapped_pci_resource *uio_res) +{ + int i; + + if (uio_res == NULL) + return; + + for (i = 0; i != uio_res->nb_maps; i++) { + pci_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(uio_res->maps[i].path); + } +} + +static struct mapped_pci_resource * +pci_uio_find_resource(struct rte_pci_device *dev) +{ + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + if (dev == NULL) + return NULL; + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + + /* skip this element if it doesn't match our PCI address */ + if (!rte_pci_addr_cmp(&uio_res->pci_addr, &dev->addr)) + return uio_res; + } + return NULL; +} + +/* unmap the PCI resource of a PCI device in virtual memory */ +void +pci_uio_unmap_resource(struct rte_pci_device *dev) +{ + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + if (dev == NULL) + return; + + /* find an entry for the device */ + uio_res = pci_uio_find_resource(dev); + if (uio_res == NULL) + return; + + /* secondary processes - just free maps */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return pci_uio_unmap(uio_res); + + TAILQ_REMOVE(uio_res_list, uio_res, next); + + /* unmap all resources */ + pci_uio_unmap(uio_res); + + /* free uio resource */ + rte_free(uio_res); + + /* close fd if in primary process */ + close(dev->intr_handle.fd); + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; +} diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h new file mode 100644 index 00000000..2283f093 --- /dev/null +++ b/drivers/bus/pci/private.h @@ -0,0 +1,248 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 6WIND. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PCI_PRIVATE_H_ +#define _PCI_PRIVATE_H_ + +#include <stdbool.h> +#include <stdio.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> + +struct rte_pci_driver; +struct rte_pci_device; + +/** + * Probe the PCI bus + * + * @return + * - 0 on success. + * - !0 on error. + */ +int +rte_pci_probe(void); + +/** + * Scan the content of the PCI bus, and the devices in the devices + * list + * + * @return + * 0 on success, negative on error + */ +int rte_pci_scan(void); + +/** + * Probe the single PCI device. + * + * Scan the content of the PCI bus, and find the pci device specified by pci + * address, then call the probe() function for registered driver that has a + * matching entry in its id_table for discovered device. + * + * @param addr + * The PCI Bus-Device-Function address to probe. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_pci_probe_one(const struct rte_pci_addr *addr); + +/** + * Close the single PCI device. + * + * Scan the content of the PCI bus, and find the pci device specified by pci + * address, then call the remove() function for registered driver that has a + * matching entry in its id_table for discovered device. + * + * @param addr + * The PCI Bus-Device-Function address to close. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_pci_detach(const struct rte_pci_addr *addr); + +/** + * Find the name of a PCI device. + */ +void +pci_name_set(struct rte_pci_device *dev); + +/** + * Add a PCI device to the PCI Bus (append to PCI Device list). This function + * also updates the bus references of the PCI Device (and the generic device + * object embedded within. + * + * @param pci_dev + * PCI device to add + * @return void + */ +void rte_pci_add_device(struct rte_pci_device *pci_dev); + +/** + * Insert a PCI device in the PCI Bus at a particular location in the device + * list. It also updates the PCI Bus reference of the new devices to be + * inserted. + * + * @param exist_pci_dev + * Existing PCI device in PCI Bus + * @param new_pci_dev + * PCI device to be added before exist_pci_dev + * @return void + */ +void rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, + struct rte_pci_device *new_pci_dev); + +/** + * Remove a PCI device from the PCI Bus. This sets to NULL the bus references + * in the PCI device object as well as the generic device object. + * + * @param pci_device + * PCI device to be removed from PCI Bus + * @return void + */ +void rte_pci_remove_device(struct rte_pci_device *pci_device); + +/** + * Update a pci device object by asking the kernel for the latest information. + * + * This function is private to EAL. + * + * @param addr + * The PCI Bus-Device-Function address to look for + * @return + * - 0 on success. + * - negative on error. + */ +int pci_update_device(const struct rte_pci_addr *addr); + +/** + * Unbind kernel driver for this device + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int pci_unbind_kernel_driver(struct rte_pci_device *dev); + +/** + * Map the PCI resource of a PCI device in virtual memory + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int pci_uio_map_resource(struct rte_pci_device *dev); + +/** + * Unmap the PCI resource of a PCI device + * + * This function is private to EAL. + */ +void pci_uio_unmap_resource(struct rte_pci_device *dev); + +/** + * Allocate uio resource for PCI device + * + * This function is private to EAL. + * + * @param dev + * PCI device to allocate uio resource + * @param uio_res + * Pointer to uio resource. + * If the function returns 0, the pointer will be filled. + * @return + * 0 on success, negative on error + */ +int pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res); + +/** + * Free uio resource for PCI device + * + * This function is private to EAL. + * + * @param dev + * PCI device to free uio resource + * @param uio_res + * Pointer to uio resource. + */ +void pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res); + +/** + * Map device memory to uio resource + * + * This function is private to EAL. + * + * @param dev + * PCI device that has memory information. + * @param res_idx + * Memory resource index of the PCI device. + * @param uio_res + * uio resource that will keep mapping information. + * @param map_idx + * Mapping information index of the uio resource. + * @return + * 0 on success, negative on error + */ +int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx); + +/* + * Match the PCI Driver and Device using the ID Table + * + * @param pci_drv + * PCI driver from which ID table would be extracted + * @param pci_dev + * PCI device to match against the driver + * @return + * 1 for successful match + * 0 for unsuccessful match + */ +int +rte_pci_match(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev); + +/** + * Get iommu class of PCI devices on the bus. + * And return their preferred iova mapping mode. + * + * @return + * - enum rte_iova_mode. + */ +enum rte_iova_mode +rte_pci_get_iommu_class(void); + +#endif /* _PCI_PRIVATE_H_ */ diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h new file mode 100644 index 00000000..d4a29964 --- /dev/null +++ b/drivers/bus/pci/rte_bus_pci.h @@ -0,0 +1,340 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright 2013-2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BUS_PCI_H_ +#define _RTE_BUS_PCI_H_ + +/** + * @file + * + * RTE PCI Bus Interface + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <errno.h> +#include <sys/queue.h> +#include <stdint.h> +#include <inttypes.h> + +#include <rte_debug.h> +#include <rte_interrupts.h> +#include <rte_dev.h> +#include <rte_bus.h> +#include <rte_pci.h> + +/** Pathname of PCI devices directory. */ +const char *rte_pci_get_sysfs_path(void); + +/* Forward declarations */ +struct rte_pci_device; +struct rte_pci_driver; + +/** List of PCI devices */ +TAILQ_HEAD(rte_pci_device_list, rte_pci_device); +/** List of PCI drivers */ +TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver); + +/* PCI Bus iterators */ +#define FOREACH_DEVICE_ON_PCIBUS(p) \ + TAILQ_FOREACH(p, &(rte_pci_bus.device_list), next) + +#define FOREACH_DRIVER_ON_PCIBUS(p) \ + TAILQ_FOREACH(p, &(rte_pci_bus.driver_list), next) + +struct rte_devargs; + +/** + * A structure describing a PCI device. + */ +struct rte_pci_device { + TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ + struct rte_device device; /**< Inherit core device */ + struct rte_pci_addr addr; /**< PCI location. */ + struct rte_pci_id id; /**< PCI ID. */ + struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; + /**< PCI Memory Resource */ + struct rte_intr_handle intr_handle; /**< Interrupt handle */ + struct rte_pci_driver *driver; /**< Associated driver */ + uint16_t max_vfs; /**< sriov enable if not zero */ + enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ + char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */ +}; + +/** + * @internal + * Helper macro for drivers that need to convert to struct rte_pci_device. + */ +#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device) + +#define RTE_ETH_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device) + +/** Any PCI device identifier (vendor, device, ...) */ +#define PCI_ANY_ID (0xffff) +#define RTE_CLASS_ANY_ID (0xffffff) + +#ifdef __cplusplus +/** C++ macro used to help building up tables of device IDs */ +#define RTE_PCI_DEVICE(vend, dev) \ + RTE_CLASS_ANY_ID, \ + (vend), \ + (dev), \ + PCI_ANY_ID, \ + PCI_ANY_ID +#else +/** Macro used to help building up tables of device IDs */ +#define RTE_PCI_DEVICE(vend, dev) \ + .class_id = RTE_CLASS_ANY_ID, \ + .vendor_id = (vend), \ + .device_id = (dev), \ + .subsystem_vendor_id = PCI_ANY_ID, \ + .subsystem_device_id = PCI_ANY_ID +#endif + +/** + * Initialisation function for the driver called during PCI probing. + */ +typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *); + +/** + * Uninitialisation function for the driver called during hotplugging. + */ +typedef int (pci_remove_t)(struct rte_pci_device *); + +/** + * A structure describing a PCI driver. + */ +struct rte_pci_driver { + TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ + struct rte_driver driver; /**< Inherit core driver. */ + struct rte_pci_bus *bus; /**< PCI bus reference. */ + pci_probe_t *probe; /**< Device Probe function. */ + pci_remove_t *remove; /**< Device Remove function. */ + const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ + uint32_t drv_flags; /**< Flags contolling handling of device. */ +}; + +/** + * Structure describing the PCI bus + */ +struct rte_pci_bus { + struct rte_bus bus; /**< Inherit the generic class */ + struct rte_pci_device_list device_list; /**< List of PCI devices */ + struct rte_pci_driver_list driver_list; /**< List of PCI drivers */ +}; + +/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ +#define RTE_PCI_DRV_NEED_MAPPING 0x0001 +/** Device driver supports link state interrupt */ +#define RTE_PCI_DRV_INTR_LSC 0x0008 +/** Device driver supports device removal interrupt */ +#define RTE_PCI_DRV_INTR_RMV 0x0010 +/** Device driver needs to keep mapped resources if unsupported dev detected */ +#define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020 +/** Device driver supports IOVA as VA */ +#define RTE_PCI_DRV_IOVA_AS_VA 0X0040 + +/** + * Map the PCI device resources in user space virtual memory address + * + * Note that driver should not call this function when flag + * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for + * you when it's on. + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + * + * @return + * 0 on success, negative on error and positive if no driver + * is found for the device. + */ +int rte_pci_map_device(struct rte_pci_device *dev); + +/** + * Unmap this device + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + */ +void rte_pci_unmap_device(struct rte_pci_device *dev); + +/** + * Dump the content of the PCI bus. + * + * @param f + * A pointer to a file for output + */ +void rte_pci_dump(FILE *f); + +/** + * Register a PCI driver. + * + * @param driver + * A pointer to a rte_pci_driver structure describing the driver + * to be registered. + */ +void rte_pci_register(struct rte_pci_driver *driver); + +/** Helper for PCI device registration from driver (eth, crypto) instance */ +#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ +RTE_INIT(pciinitfn_ ##nm); \ +static void pciinitfn_ ##nm(void) \ +{\ + (pci_drv).driver.name = RTE_STR(nm);\ + rte_pci_register(&pci_drv); \ +} \ +RTE_PMD_EXPORT_NAME(nm, __COUNTER__) + +/** + * Unregister a PCI driver. + * + * @param driver + * A pointer to a rte_pci_driver structure describing the driver + * to be unregistered. + */ +void rte_pci_unregister(struct rte_pci_driver *driver); + +/** + * Read PCI config space. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param buf + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into PCI config space + */ +int rte_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset); + +/** + * Write PCI config space. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param buf + * A data buffer containing the bytes should be written + * @param len + * The length of the data buffer. + * @param offset + * The offset into PCI config space + */ +int rte_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset); + +/** + * A structure used to access io resources for a pci device. + * rte_pci_ioport is arch, os, driver specific, and should not be used outside + * of pci ioport api. + */ +struct rte_pci_ioport { + struct rte_pci_device *dev; + uint64_t base; + uint64_t len; /* only filled for memory mapped ports */ +}; + +/** + * Initialize a rte_pci_ioport object for a pci device io resource. + * + * This object is then used to gain access to those io resources (see below). + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use. + * @param bar + * Index of the io pci resource we want to access. + * @param p + * The rte_pci_ioport object to be initialized. + * @return + * 0 on success, negative on error. + */ +int rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); + +/** + * Release any resources used in a rte_pci_ioport object. + * + * @param p + * The rte_pci_ioport object to be uninitialized. + * @return + * 0 on success, negative on error. + */ +int rte_pci_ioport_unmap(struct rte_pci_ioport *p); + +/** + * Read from a io pci resource. + * + * @param p + * The rte_pci_ioport object from which we want to read. + * @param data + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into the pci io resource. + */ +void rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); + +/** + * Write to a io pci resource. + * + * @param p + * The rte_pci_ioport object to which we want to write. + * @param data + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into the pci io resource. + */ +void rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BUS_PCI_H_ */ diff --git a/drivers/bus/pci/rte_bus_pci_version.map b/drivers/bus/pci/rte_bus_pci_version.map new file mode 100644 index 00000000..27e9c4f1 --- /dev/null +++ b/drivers/bus/pci/rte_bus_pci_version.map @@ -0,0 +1,18 @@ +DPDK_17.11 { + global: + + rte_pci_dump; + rte_pci_get_sysfs_path; + rte_pci_ioport_map; + rte_pci_ioport_read; + rte_pci_ioport_unmap; + rte_pci_ioport_write; + rte_pci_map_device; + rte_pci_read_config; + rte_pci_register; + rte_pci_unmap_device; + rte_pci_unregister; + rte_pci_write_config; + + local: *; +}; |