diff options
Diffstat (limited to 'drivers/bus/vmbus/linux')
-rw-r--r-- | drivers/bus/vmbus/linux/Makefile | 3 | ||||
-rw-r--r-- | drivers/bus/vmbus/linux/vmbus_bus.c | 355 | ||||
-rw-r--r-- | drivers/bus/vmbus/linux/vmbus_uio.c | 398 |
3 files changed, 756 insertions, 0 deletions
diff --git a/drivers/bus/vmbus/linux/Makefile b/drivers/bus/vmbus/linux/Makefile new file mode 100644 index 00000000..ef0d30b2 --- /dev/null +++ b/drivers/bus/vmbus/linux/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: BSD-3-Clause + +SRCS += vmbus_bus.c vmbus_uio.c diff --git a/drivers/bus/vmbus/linux/vmbus_bus.c b/drivers/bus/vmbus/linux/vmbus_bus.c new file mode 100644 index 00000000..52d6a3c0 --- /dev/null +++ b/drivers/bus/vmbus/linux/vmbus_bus.c @@ -0,0 +1,355 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018, Microsoft Corporation. + * All Rights Reserved. + */ + +#include <string.h> +#include <unistd.h> +#include <dirent.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <rte_eal.h> +#include <rte_uuid.h> +#include <rte_tailq.h> +#include <rte_log.h> +#include <rte_devargs.h> +#include <rte_memory.h> +#include <rte_malloc.h> +#include <rte_bus_vmbus.h> + +#include "eal_filesystem.h" +#include "private.h" + +/** Pathname of VMBUS devices directory. */ +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices" + +extern struct rte_vmbus_bus rte_vmbus_bus; + +/* Read sysfs file to get UUID */ +static int +parse_sysfs_uuid(const char *filename, rte_uuid_t uu) +{ + char buf[BUFSIZ]; + char *cp, *in = buf; + FILE *f; + + f = fopen(filename, "r"); + if (f == NULL) { + VMBUS_LOG(ERR, "cannot open sysfs value %s: %s", + filename, strerror(errno)); + return -1; + } + + if (fgets(buf, sizeof(buf), f) == NULL) { + VMBUS_LOG(ERR, "cannot read sysfs value %s", + filename); + fclose(f); + return -1; + } + fclose(f); + + cp = strchr(buf, '\n'); + if (cp) + *cp = '\0'; + + /* strip { } notation */ + if (buf[0] == '{') { + in = buf + 1; + cp = strchr(in, '}'); + if (cp) + *cp = '\0'; + } + + if (rte_uuid_parse(in, uu) < 0) { + VMBUS_LOG(ERR, "%s %s not a valid UUID", + filename, buf); + return -1; + } + + return 0; +} + +static int +get_sysfs_string(const char *filename, char *buf, size_t buflen) +{ + char *cp; + FILE *f; + + f = fopen(filename, "r"); + if (f == NULL) { + VMBUS_LOG(ERR, "cannot open sysfs value %s:%s", + filename, strerror(errno)); + return -1; + } + + if (fgets(buf, buflen, f) == NULL) { + VMBUS_LOG(ERR, "cannot read sysfs value %s", + filename); + fclose(f); + return -1; + } + fclose(f); + + /* remove trailing newline */ + cp = memchr(buf, '\n', buflen); + if (cp) + *cp = '\0'; + + return 0; +} + +static int +vmbus_get_uio_dev(const struct rte_vmbus_device *dev, + char *dstbuf, size_t buflen) +{ + char dirname[PATH_MAX]; + unsigned int uio_num; + struct dirent *e; + DIR *dir; + + /* Assume recent kernel where uio is in uio/uioX */ + snprintf(dirname, sizeof(dirname), + SYSFS_VMBUS_DEVICES "/%s/uio", dev->device.name); + + dir = opendir(dirname); + if (dir == NULL) + return -1; /* Not a UIO device */ + + /* take the first file starting with "uio" */ + while ((e = readdir(dir)) != NULL) { + const int prefix_len = 3; + char *endptr; + + if (strncmp(e->d_name, "uio", prefix_len) != 0) + continue; + + /* try uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + prefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + prefix_len)) { + snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); + break; + } + } + closedir(dir); + + if (e == NULL) + return -1; + + return uio_num; +} + +/* Check map names with kernel names */ +static const char *map_names[VMBUS_MAX_RESOURCE] = { + [HV_TXRX_RING_MAP] = "txrx_rings", + [HV_INT_PAGE_MAP] = "int_page", + [HV_MON_PAGE_MAP] = "monitor_page", + [HV_RECV_BUF_MAP] = "recv:", + [HV_SEND_BUF_MAP] = "send:", +}; + + +/* map the resources of a vmbus device in virtual memory */ +int +rte_vmbus_map_device(struct rte_vmbus_device *dev) +{ + char uioname[PATH_MAX], filename[PATH_MAX]; + char dirname[PATH_MAX], mapname[64]; + int i; + + dev->uio_num = vmbus_get_uio_dev(dev, uioname, sizeof(uioname)); + if (dev->uio_num < 0) { + VMBUS_LOG(DEBUG, "Not managed by UIO driver, skipped"); + return 1; + } + + /* Extract resource value */ + for (i = 0; i < VMBUS_MAX_RESOURCE; i++) { + struct rte_mem_resource *res = &dev->resource[i]; + unsigned long len, gpad = 0; + char *cp; + + snprintf(dirname, sizeof(dirname), + "%s/maps/map%d", uioname, i); + + snprintf(filename, sizeof(filename), + "%s/name", dirname); + + if (get_sysfs_string(filename, mapname, sizeof(mapname)) < 0) { + VMBUS_LOG(ERR, "could not read %s", filename); + return -1; + } + + if (strncmp(map_names[i], mapname, strlen(map_names[i])) != 0) { + VMBUS_LOG(ERR, + "unexpected resource %s (expected %s)", + mapname, map_names[i]); + return -1; + } + + snprintf(filename, sizeof(filename), + "%s/size", dirname); + if (eal_parse_sysfs_value(filename, &len) < 0) { + VMBUS_LOG(ERR, + "could not read %s", filename); + return -1; + } + res->len = len; + + /* both send and receive buffers have gpad in name */ + cp = memchr(mapname, ':', sizeof(mapname)); + if (cp) + gpad = strtoul(cp+1, NULL, 0); + + /* put the GPAD value in physical address */ + res->phys_addr = gpad; + } + + return vmbus_uio_map_resource(dev); +} + +void +rte_vmbus_unmap_device(struct rte_vmbus_device *dev) +{ + vmbus_uio_unmap_resource(dev); +} + +/* Scan one vmbus sysfs entry, and fill the devices list from it. */ +static int +vmbus_scan_one(const char *name) +{ + struct rte_vmbus_device *dev, *dev2; + char filename[PATH_MAX]; + char dirname[PATH_MAX]; + unsigned long tmp; + + dev = calloc(1, sizeof(*dev)); + if (dev == NULL) + return -1; + + dev->device.name = strdup(name); + if (!dev->device.name) + goto error; + + /* sysfs base directory + * /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df + * or on older kernel + * /sys/bus/vmbus/devices/vmbus_1 + */ + snprintf(dirname, sizeof(dirname), "%s/%s", + SYSFS_VMBUS_DEVICES, name); + + /* get device id */ + snprintf(filename, sizeof(filename), "%s/device_id", dirname); + if (parse_sysfs_uuid(filename, dev->device_id) < 0) + goto error; + + /* get device class */ + snprintf(filename, sizeof(filename), "%s/class_id", dirname); + if (parse_sysfs_uuid(filename, dev->class_id) < 0) + goto error; + + /* get relid */ + snprintf(filename, sizeof(filename), "%s/id", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) + goto error; + dev->relid = tmp; + + /* get monitor id */ + snprintf(filename, sizeof(filename), "%s/monitor_id", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) + goto error; + dev->monitor_id = tmp; + + /* get numa node (if present) */ + snprintf(filename, sizeof(filename), "%s/numa_node", + dirname); + + if (access(filename, R_OK) == 0) { + if (eal_parse_sysfs_value(filename, &tmp) < 0) + goto error; + dev->device.numa_node = tmp; + } else { + /* if no NUMA support, set default to 0 */ + dev->device.numa_node = SOCKET_ID_ANY; + } + + /* device is valid, add in list (sorted) */ + VMBUS_LOG(DEBUG, "Adding vmbus device %s", name); + + TAILQ_FOREACH(dev2, &rte_vmbus_bus.device_list, next) { + int ret; + + ret = rte_uuid_compare(dev->device_id, dev2->device_id); + if (ret > 0) + continue; + + if (ret < 0) { + vmbus_insert_device(dev2, dev); + } else { /* already registered */ + VMBUS_LOG(NOTICE, + "%s already registered", name); + free(dev); + } + return 0; + } + + vmbus_add_device(dev); + return 0; +error: + VMBUS_LOG(DEBUG, "failed"); + + free(dev); + return -1; +} + +/* + * Scan the content of the vmbus, and the devices in the devices list + */ +int +rte_vmbus_scan(void) +{ + struct dirent *e; + DIR *dir; + + dir = opendir(SYSFS_VMBUS_DEVICES); + if (dir == NULL) { + if (errno == ENOENT) + return 0; + + VMBUS_LOG(ERR, "opendir %s failed: %s", + SYSFS_VMBUS_DEVICES, strerror(errno)); + return -1; + } + + while ((e = readdir(dir)) != NULL) { + if (e->d_name[0] == '.') + continue; + + if (vmbus_scan_one(e->d_name) < 0) + goto error; + } + closedir(dir); + return 0; + +error: + closedir(dir); + return -1; +} + +void rte_vmbus_irq_mask(struct rte_vmbus_device *device) +{ + vmbus_uio_irq_control(device, 1); +} + +void rte_vmbus_irq_unmask(struct rte_vmbus_device *device) +{ + vmbus_uio_irq_control(device, 0); +} + +int rte_vmbus_irq_read(struct rte_vmbus_device *device) +{ + return vmbus_uio_irq_read(device); +} diff --git a/drivers/bus/vmbus/linux/vmbus_uio.c b/drivers/bus/vmbus/linux/vmbus_uio.c new file mode 100644 index 00000000..856c6d66 --- /dev/null +++ b/drivers/bus/vmbus/linux/vmbus_uio.c @@ -0,0 +1,398 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018, Microsoft Corporation. + * All Rights Reserved. + */ + +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <dirent.h> +#include <inttypes.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include <rte_log.h> +#include <rte_bus.h> +#include <rte_memory.h> +#include <rte_eal_memconfig.h> +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_bus_vmbus.h> +#include <rte_string_fns.h> + +#include "private.h" + +/** Pathname of VMBUS devices directory. */ +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices" + +static void *vmbus_map_addr; + +/* Control interrupts */ +void vmbus_uio_irq_control(struct rte_vmbus_device *dev, int32_t onoff) +{ + if (write(dev->intr_handle.fd, &onoff, sizeof(onoff)) < 0) { + VMBUS_LOG(ERR, "cannot write to %d:%s", + dev->intr_handle.fd, strerror(errno)); + } +} + +int vmbus_uio_irq_read(struct rte_vmbus_device *dev) +{ + int32_t count; + int cc; + + cc = read(dev->intr_handle.fd, &count, sizeof(count)); + if (cc < (int)sizeof(count)) { + if (cc < 0) { + VMBUS_LOG(ERR, "IRQ read failed %s", + strerror(errno)); + return -errno; + } + VMBUS_LOG(ERR, "can't read IRQ count"); + return -EINVAL; + } + + return count; +} + +void +vmbus_uio_free_resource(struct rte_vmbus_device *dev, + struct mapped_vmbus_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + + if (dev->intr_handle.fd >= 0) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +vmbus_uio_alloc_resource(struct rte_vmbus_device *dev, + struct mapped_vmbus_resource **uio_res) +{ + char devname[PATH_MAX]; /* contains the /dev/uioX */ + + /* save fd if in primary process */ + snprintf(devname, sizeof(devname), "/dev/uio%u", dev->uio_num); + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + VMBUS_LOG(ERR, "Cannot open %s: %s", + devname, strerror(errno)); + goto error; + } + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + VMBUS_LOG(ERR, "cannot store uio mmap details"); + goto error; + } + + strlcpy((*uio_res)->path, devname, PATH_MAX); + rte_uuid_copy((*uio_res)->id, dev->device_id); + + return 0; + +error: + vmbus_uio_free_resource(dev, *uio_res); + return -1; +} + +static int +find_max_end_va(const struct rte_memseg_list *msl, void *arg) +{ + size_t sz = msl->memseg_arr.len * msl->page_sz; + void *end_va = RTE_PTR_ADD(msl->base_va, sz); + void **max_va = arg; + + if (*max_va < end_va) + *max_va = end_va; + return 0; +} + +/* + * TODO: this should be part of memseg api. + * code is duplicated from PCI. + */ +static void * +vmbus_find_max_end_va(void) +{ + void *va = NULL; + + rte_memseg_list_walk(find_max_end_va, &va); + return va; +} + +int +vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev, int idx, + struct mapped_vmbus_resource *uio_res, + int flags) +{ + size_t size = dev->resource[idx].len; + struct vmbus_map *maps = uio_res->maps; + void *mapaddr; + off_t offset; + int fd; + + /* devname for mmap */ + fd = open(uio_res->path, O_RDWR); + if (fd < 0) { + VMBUS_LOG(ERR, "Cannot open %s: %s", + uio_res->path, strerror(errno)); + return -1; + } + + /* try mapping somewhere close to the end of hugepages */ + if (vmbus_map_addr == NULL) + vmbus_map_addr = vmbus_find_max_end_va(); + + /* offset is special in uio it indicates which resource */ + offset = idx * PAGE_SIZE; + + mapaddr = vmbus_map_resource(vmbus_map_addr, fd, offset, size, flags); + close(fd); + + if (mapaddr == MAP_FAILED) + return -1; + + dev->resource[idx].addr = mapaddr; + vmbus_map_addr = RTE_PTR_ADD(mapaddr, size); + + /* Record result of sucessful mapping for use by secondary */ + maps[idx].addr = mapaddr; + maps[idx].size = size; + + return 0; +} + +static int vmbus_uio_map_primary(struct vmbus_channel *chan, + void **ring_buf, uint32_t *ring_size) +{ + struct mapped_vmbus_resource *uio_res; + + uio_res = vmbus_uio_find_resource(chan->device); + if (!uio_res) { + VMBUS_LOG(ERR, "can not find resources!"); + return -ENOMEM; + } + + if (uio_res->nb_maps < VMBUS_MAX_RESOURCE) { + VMBUS_LOG(ERR, "VMBUS: only %u resources found!", + uio_res->nb_maps); + return -EINVAL; + } + + *ring_size = uio_res->maps[HV_TXRX_RING_MAP].size / 2; + *ring_buf = uio_res->maps[HV_TXRX_RING_MAP].addr; + return 0; +} + +static int vmbus_uio_map_subchan(const struct rte_vmbus_device *dev, + const struct vmbus_channel *chan, + void **ring_buf, uint32_t *ring_size) +{ + char ring_path[PATH_MAX]; + size_t file_size; + struct stat sb; + int fd; + + snprintf(ring_path, sizeof(ring_path), + "%s/%s/channels/%u/ring", + SYSFS_VMBUS_DEVICES, dev->device.name, + chan->relid); + + fd = open(ring_path, O_RDWR); + if (fd < 0) { + VMBUS_LOG(ERR, "Cannot open %s: %s", + ring_path, strerror(errno)); + return -errno; + } + + if (fstat(fd, &sb) < 0) { + VMBUS_LOG(ERR, "Cannot state %s: %s", + ring_path, strerror(errno)); + close(fd); + return -errno; + } + file_size = sb.st_size; + + if (file_size == 0 || (file_size & (PAGE_SIZE - 1))) { + VMBUS_LOG(ERR, "incorrect size %s: %zu", + ring_path, file_size); + + close(fd); + return -EINVAL; + } + + *ring_size = file_size / 2; + *ring_buf = vmbus_map_resource(vmbus_map_addr, fd, + 0, sb.st_size, 0); + close(fd); + + if (ring_buf == MAP_FAILED) + return -EIO; + + vmbus_map_addr = RTE_PTR_ADD(ring_buf, file_size); + return 0; +} + +int vmbus_uio_map_rings(struct vmbus_channel *chan) +{ + const struct rte_vmbus_device *dev = chan->device; + uint32_t ring_size; + void *ring_buf; + int ret; + + /* Primary channel */ + if (chan->subchannel_id == 0) + ret = vmbus_uio_map_primary(chan, &ring_buf, &ring_size); + else + ret = vmbus_uio_map_subchan(dev, chan, &ring_buf, &ring_size); + + if (ret) + return ret; + + vmbus_br_setup(&chan->txbr, ring_buf, ring_size); + vmbus_br_setup(&chan->rxbr, (char *)ring_buf + ring_size, ring_size); + return 0; +} + +static int vmbus_uio_sysfs_read(const char *dir, const char *name, + unsigned long *val, unsigned long max_range) +{ + char path[PATH_MAX]; + FILE *f; + int ret; + + snprintf(path, sizeof(path), "%s/%s", dir, name); + f = fopen(path, "r"); + if (!f) { + VMBUS_LOG(ERR, "can't open %s:%s", + path, strerror(errno)); + return -errno; + } + + if (fscanf(f, "%lu", val) != 1) + ret = -EIO; + else if (*val > max_range) + ret = -ERANGE; + else + ret = 0; + fclose(f); + + return ret; +} + +static bool vmbus_uio_ring_present(const struct rte_vmbus_device *dev, + uint32_t relid) +{ + char ring_path[PATH_MAX]; + + /* Check if kernel has subchannel sysfs files */ + snprintf(ring_path, sizeof(ring_path), + "%s/%s/channels/%u/ring", + SYSFS_VMBUS_DEVICES, dev->device.name, relid); + + return access(ring_path, R_OK|W_OK) == 0; +} + +bool vmbus_uio_subchannels_supported(const struct rte_vmbus_device *dev, + const struct vmbus_channel *chan) +{ + return vmbus_uio_ring_present(dev, chan->relid); +} + +static bool vmbus_isnew_subchannel(struct vmbus_channel *primary, + unsigned long id) +{ + const struct vmbus_channel *c; + + STAILQ_FOREACH(c, &primary->subchannel_list, next) { + if (c->relid == id) + return false; + } + return true; +} + +int vmbus_uio_get_subchan(struct vmbus_channel *primary, + struct vmbus_channel **subchan) +{ + const struct rte_vmbus_device *dev = primary->device; + char chan_path[PATH_MAX], subchan_path[PATH_MAX]; + struct dirent *ent; + DIR *chan_dir; + + snprintf(chan_path, sizeof(chan_path), + "%s/%s/channels", + SYSFS_VMBUS_DEVICES, dev->device.name); + + chan_dir = opendir(chan_path); + if (!chan_dir) { + VMBUS_LOG(ERR, "cannot open %s: %s", + chan_path, strerror(errno)); + return -errno; + } + + while ((ent = readdir(chan_dir))) { + unsigned long relid, subid, monid; + char *endp; + int err; + + if (ent->d_name[0] == '.') + continue; + + errno = 0; + relid = strtoul(ent->d_name, &endp, 0); + if (*endp || errno != 0 || relid > UINT16_MAX) { + VMBUS_LOG(NOTICE, "not a valid channel relid: %s", + ent->d_name); + continue; + } + + snprintf(subchan_path, sizeof(subchan_path), "%s/%lu", + chan_path, relid); + err = vmbus_uio_sysfs_read(subchan_path, "subchannel_id", + &subid, UINT16_MAX); + if (err) { + VMBUS_LOG(NOTICE, "invalid subchannel id %lu", + subid); + closedir(chan_dir); + return err; + } + + if (subid == 0) + continue; /* skip primary channel */ + + if (!vmbus_isnew_subchannel(primary, relid)) + continue; + + if (!vmbus_uio_ring_present(dev, relid)) + continue; /* Ring may not be ready yet */ + + err = vmbus_uio_sysfs_read(subchan_path, "monitor_id", + &monid, UINT8_MAX); + if (err) { + VMBUS_LOG(NOTICE, "invalid monitor id %lu", + monid); + return err; + } + + err = vmbus_chan_create(dev, relid, subid, monid, subchan); + if (err) { + VMBUS_LOG(NOTICE, "subchannel setup failed"); + return err; + } + break; + } + closedir(chan_dir); + + return (ent == NULL) ? -ENOENT : 0; +} |