diff options
Diffstat (limited to 'drivers/net/virtio/virtio_user')
-rw-r--r-- | drivers/net/virtio/virtio_user/vhost.h | 4 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_user/vhost_kernel.c | 65 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_user/vhost_kernel_tap.c | 56 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_user/vhost_kernel_tap.h | 2 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_user/vhost_user.c | 179 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_user/virtio_user_dev.c | 35 |
6 files changed, 185 insertions, 156 deletions
diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h index 668cc99f..83a85cc6 100644 --- a/drivers/net/virtio/virtio_user/vhost.h +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -88,7 +88,7 @@ struct virtio_user_backend_ops { int enable); }; -struct virtio_user_backend_ops ops_user; -struct virtio_user_backend_ops ops_kernel; +extern struct virtio_user_backend_ops virtio_ops_user; +extern struct virtio_user_backend_ops virtio_ops_kernel; #endif diff --git a/drivers/net/virtio/virtio_user/vhost_kernel.c b/drivers/net/virtio/virtio_user/vhost_kernel.c index b2444096..6b19180d 100644 --- a/drivers/net/virtio/virtio_user/vhost_kernel.c +++ b/drivers/net/virtio/virtio_user/vhost_kernel.c @@ -70,41 +70,44 @@ static uint64_t vhost_req_user_to_kernel[] = { [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, }; -struct walk_arg { - struct vhost_memory_kernel *vm; - uint32_t region_nr; -}; static int -add_memory_region(const struct rte_memseg_list *msl __rte_unused, - const struct rte_memseg *ms, size_t len, void *arg) +add_memseg_list(const struct rte_memseg_list *msl, void *arg) { - struct walk_arg *wa = arg; + struct vhost_memory_kernel *vm = arg; struct vhost_memory_region *mr; void *start_addr; + uint64_t len; - if (wa->region_nr >= max_regions) + if (msl->external) + return 0; + + if (vm->nregions >= max_regions) return -1; - mr = &wa->vm->regions[wa->region_nr++]; - start_addr = ms->addr; + start_addr = msl->base_va; + len = msl->page_sz * msl->memseg_arr.len; + + mr = &vm->regions[vm->nregions++]; mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr; mr->userspace_addr = (uint64_t)(uintptr_t)start_addr; mr->memory_size = len; - mr->mmap_offset = 0; + mr->mmap_offset = 0; /* flags_padding */ + + PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64, + vm->nregions - 1, start_addr, len); return 0; } -/* By default, vhost kernel module allows 64 regions, but DPDK allows - * 256 segments. As a relief, below function merges those virtually - * adjacent memsegs into one region. +/* By default, vhost kernel module allows 64 regions, but DPDK may + * have much more memory regions. Below function will treat each + * contiguous memory space reserved by DPDK as one region. */ static struct vhost_memory_kernel * prepare_vhost_memory_kernel(void) { struct vhost_memory_kernel *vm; - struct walk_arg wa; vm = malloc(sizeof(struct vhost_memory_kernel) + max_regions * @@ -112,16 +115,18 @@ prepare_vhost_memory_kernel(void) if (!vm) return NULL; - wa.region_nr = 0; - wa.vm = vm; + vm->nregions = 0; + vm->padding = 0; - if (rte_memseg_contig_walk(add_memory_region, &wa) < 0) { + /* + * The memory lock has already been taken by memory subsystem + * or virtio_user_start_device(). + */ + if (rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm) < 0) { free(vm); return NULL; } - vm->nregions = wa.region_nr; - vm->padding = 0; return vm; } @@ -147,8 +152,8 @@ prepare_vhost_memory_kernel(void) (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ (1ULL << VIRTIO_NET_F_CSUM)) -static int -tap_supporte_mq(void) +static unsigned int +tap_support_features(void) { int tapfd; unsigned int tap_features; @@ -167,7 +172,7 @@ tap_supporte_mq(void) } close(tapfd); - return tap_features & IFF_MULTI_QUEUE; + return tap_features; } static int @@ -181,6 +186,7 @@ vhost_kernel_ioctl(struct virtio_user_dev *dev, struct vhost_memory_kernel *vm = NULL; int vhostfd; unsigned int queue_sel; + unsigned int features; PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); @@ -234,17 +240,20 @@ vhost_kernel_ioctl(struct virtio_user_dev *dev, } if (!ret && req_kernel == VHOST_GET_FEATURES) { + features = tap_support_features(); /* with tap as the backend, all these features are supported * but not claimed by vhost-net, so we add them back when * reporting to upper layer. */ - *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; - *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; + if (features & IFF_VNET_HDR) { + *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; + *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; + } /* vhost_kernel will not declare this feature, but it does * support multi-queue. */ - if (tap_supporte_mq()) + if (features & IFF_MULTI_QUEUE) *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); } @@ -339,7 +348,7 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, hdr_size = sizeof(struct virtio_net_hdr); tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq, - (char *)dev->mac_addr); + (char *)dev->mac_addr, dev->features); if (tapfd < 0) { PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); return -1; @@ -355,7 +364,7 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, return 0; } -struct virtio_user_backend_ops ops_kernel = { +struct virtio_user_backend_ops virtio_ops_kernel = { .setup = vhost_kernel_setup, .send_request = vhost_kernel_ioctl, .enable_qp = vhost_kernel_enable_queue_pair diff --git a/drivers/net/virtio/virtio_user/vhost_kernel_tap.c b/drivers/net/virtio/virtio_user/vhost_kernel_tap.c index 9ea7ade7..a3faf1d0 100644 --- a/drivers/net/virtio/virtio_user/vhost_kernel_tap.c +++ b/drivers/net/virtio/virtio_user/vhost_kernel_tap.c @@ -16,21 +16,55 @@ #include "vhost_kernel_tap.h" #include "../virtio_logs.h" +#include "../virtio_pci.h" + +static int +vhost_kernel_tap_set_offload(int fd, uint64_t features) +{ + unsigned int offload = 0; + + if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { + offload |= TUN_F_CSUM; + if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4)) + offload |= TUN_F_TSO4; + if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) + offload |= TUN_F_TSO6; + if (features & ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | + (1ULL << VIRTIO_NET_F_GUEST_TSO6)) && + (features & (1ULL << VIRTIO_NET_F_GUEST_ECN))) + offload |= TUN_F_TSO_ECN; + if (features & (1ULL << VIRTIO_NET_F_GUEST_UFO)) + offload |= TUN_F_UFO; + } + + if (offload != 0) { + /* Check if our kernel supports TUNSETOFFLOAD */ + if (ioctl(fd, TUNSETOFFLOAD, 0) != 0 && errno == EINVAL) { + PMD_DRV_LOG(ERR, "Kernel does't support TUNSETOFFLOAD\n"); + return -ENOTSUP; + } + + if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { + offload &= ~TUN_F_UFO; + if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { + PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s\n", + strerror(errno)); + return -1; + } + } + } + + return 0; +} int vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq, - const char *mac) + const char *mac, uint64_t features) { unsigned int tap_features; int sndbuf = INT_MAX; struct ifreq ifr; int tapfd; - unsigned int offload = - TUN_F_CSUM | - TUN_F_TSO4 | - TUN_F_TSO6 | - TUN_F_TSO_ECN | - TUN_F_UFO; /* TODO: * 1. verify we can get/set vnet_hdr_len, tap_probe_vnet_hdr_len @@ -90,13 +124,7 @@ vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq, goto error; } - /* TODO: before set the offload capabilities, we'd better (1) check - * negotiated features to see if necessary to offload; (2) query tap - * to see if it supports the offload capabilities. - */ - if (ioctl(tapfd, TUNSETOFFLOAD, offload) != 0) - PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s", - strerror(errno)); + vhost_kernel_tap_set_offload(tapfd, features); memset(&ifr, 0, sizeof(ifr)); ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; diff --git a/drivers/net/virtio/virtio_user/vhost_kernel_tap.h b/drivers/net/virtio/virtio_user/vhost_kernel_tap.h index 01a026f5..e0e95b4f 100644 --- a/drivers/net/virtio/virtio_user/vhost_kernel_tap.h +++ b/drivers/net/virtio/virtio_user/vhost_kernel_tap.h @@ -36,4 +36,4 @@ #define PATH_NET_TUN "/dev/net/tun" int vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq, - const char *mac); + const char *mac, uint64_t features); diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c index ef6e43df..2c6eba0a 100644 --- a/drivers/net/virtio/virtio_user/vhost_user.c +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -11,6 +11,9 @@ #include <string.h> #include <errno.h> +#include <rte_fbarray.h> +#include <rte_eal_memconfig.h> + #include "vhost.h" #include "virtio_user_dev.h" @@ -121,133 +124,103 @@ fail: return -1; } -struct hugepage_file_info { - uint64_t addr; /**< virtual addr */ - size_t size; /**< the file size */ - char path[PATH_MAX]; /**< path to backing file */ +struct walk_arg { + struct vhost_memory *vm; + int *fds; + int region_nr; }; -/* Two possible options: - * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file - * array. This is simple but cannot be used in secondary process because - * secondary process will close and munmap that file. - * 2. Match HUGEFILE_FMT to find hugepage files directly. - * - * We choose option 2. - */ static int -get_hugepage_file_info(struct hugepage_file_info huges[], int max) +update_memory_region(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) { - int idx, k, exist; - FILE *f; - char buf[BUFSIZ], *tmp, *tail; - char *str_underline, *str_start; - int huge_index; - uint64_t v_start, v_end; - struct stat stats; - - f = fopen("/proc/self/maps", "r"); - if (!f) { - PMD_DRV_LOG(ERR, "cannot open /proc/self/maps"); + struct walk_arg *wa = arg; + struct vhost_memory_region *mr; + uint64_t start_addr, end_addr; + size_t offset; + int i, fd; + + fd = rte_memseg_get_fd_thread_unsafe(ms); + if (fd < 0) { + PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p rte_errno=%d", + ms, rte_errno); return -1; } - idx = 0; - while (fgets(buf, sizeof(buf), f) != NULL) { - if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) { - PMD_DRV_LOG(ERR, "Failed to parse address"); - goto error; - } + if (rte_memseg_get_fd_offset_thread_unsafe(ms, &offset) < 0) { + PMD_DRV_LOG(ERR, "Failed to get offset, ms=%p rte_errno=%d", + ms, rte_errno); + return -1; + } - tmp = strchr(buf, ' ') + 1; /** skip address */ - tmp = strchr(tmp, ' ') + 1; /** skip perm */ - tmp = strchr(tmp, ' ') + 1; /** skip offset */ - tmp = strchr(tmp, ' ') + 1; /** skip dev */ - tmp = strchr(tmp, ' ') + 1; /** skip inode */ - while (*tmp == ' ') /** skip spaces */ - tmp++; - tail = strrchr(tmp, '\n'); /** remove newline if exists */ - if (tail) - *tail = '\0'; - - /* Match HUGEFILE_FMT, aka "%s/%smap_%d", - * which is defined in eal_filesystem.h - */ - str_underline = strrchr(tmp, '_'); - if (!str_underline) - continue; + start_addr = (uint64_t)(uintptr_t)ms->addr; + end_addr = start_addr + ms->len; - str_start = str_underline - strlen("map"); - if (str_start < tmp) + for (i = 0; i < wa->region_nr; i++) { + if (wa->fds[i] != fd) continue; - if (sscanf(str_start, "map_%d", &huge_index) != 1) - continue; + mr = &wa->vm->regions[i]; - /* skip duplicated file which is mapped to different regions */ - for (k = 0, exist = -1; k < idx; ++k) { - if (!strcmp(huges[k].path, tmp)) { - exist = k; - break; - } - } - if (exist >= 0) - continue; + if (mr->userspace_addr + mr->memory_size < end_addr) + mr->memory_size = end_addr - mr->userspace_addr; - if (idx >= max) { - PMD_DRV_LOG(ERR, "Exceed maximum of %d", max); - goto error; + if (mr->userspace_addr > start_addr) { + mr->userspace_addr = start_addr; + mr->guest_phys_addr = start_addr; } - huges[idx].addr = v_start; - huges[idx].size = v_end - v_start; /* To be corrected later */ - snprintf(huges[idx].path, PATH_MAX, "%s", tmp); - idx++; + if (mr->mmap_offset > offset) + mr->mmap_offset = offset; + + PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64 + " addr=0x%" PRIx64 " len=%" PRIu64, i, fd, + mr->mmap_offset, mr->userspace_addr, + mr->memory_size); + + return 0; } - /* correct the size for files who have many regions */ - for (k = 0; k < idx; ++k) { - if (stat(huges[k].path, &stats) < 0) { - PMD_DRV_LOG(ERR, "Failed to stat %s, %s\n", - huges[k].path, strerror(errno)); - continue; - } - huges[k].size = stats.st_size; - PMD_DRV_LOG(INFO, "file %s, size %zx\n", - huges[k].path, huges[k].size); + if (i >= VHOST_MEMORY_MAX_NREGIONS) { + PMD_DRV_LOG(ERR, "Too many memory regions"); + return -1; } - fclose(f); - return idx; + mr = &wa->vm->regions[i]; + wa->fds[i] = fd; -error: - fclose(f); - return -1; + mr->guest_phys_addr = start_addr; + mr->userspace_addr = start_addr; + mr->memory_size = ms->len; + mr->mmap_offset = offset; + + PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64 + " addr=0x%" PRIx64 " len=%" PRIu64, i, fd, + mr->mmap_offset, mr->userspace_addr, + mr->memory_size); + + wa->region_nr++; + + return 0; } static int prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) { - int i, num; - struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS]; - struct vhost_memory_region *mr; + struct walk_arg wa; - num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS); - if (num < 0) { - PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user"); - return -1; - } + wa.region_nr = 0; + wa.vm = &msg->payload.memory; + wa.fds = fds; - for (i = 0; i < num; ++i) { - mr = &msg->payload.memory.regions[i]; - mr->guest_phys_addr = huges[i].addr; /* use vaddr! */ - mr->userspace_addr = huges[i].addr; - mr->memory_size = huges[i].size; - mr->mmap_offset = 0; - fds[i] = open(huges[i].path, O_RDWR); - } + /* + * The memory lock has already been taken by memory subsystem + * or virtio_user_start_device(). + */ + if (rte_memseg_walk_thread_unsafe(update_memory_region, &wa) < 0) + return -1; - msg->payload.memory.nregions = num; + msg->payload.memory.nregions = wa.region_nr; msg->payload.memory.padding = 0; return 0; @@ -280,7 +253,7 @@ vhost_user_sock(struct virtio_user_dev *dev, int need_reply = 0; int fds[VHOST_MEMORY_MAX_NREGIONS]; int fd_num = 0; - int i, len; + int len; int vhostfd = dev->vhostfd; RTE_SET_USED(m); @@ -364,10 +337,6 @@ vhost_user_sock(struct virtio_user_dev *dev, return -1; } - if (req == VHOST_USER_SET_MEM_TABLE) - for (i = 0; i < fd_num; ++i) - close(fds[i]); - if (need_reply) { if (vhost_user_read(vhostfd, &msg) < 0) { PMD_DRV_LOG(ERR, "Received msg failed: %s", @@ -497,7 +466,7 @@ vhost_user_enable_queue_pair(struct virtio_user_dev *dev, return 0; } -struct virtio_user_backend_ops ops_user = { +struct virtio_user_backend_ops virtio_ops_user = { .setup = vhost_user_setup, .send_request = vhost_user_sock, .enable_qp = vhost_user_enable_queue_pair diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 7df600b0..b4997ee3 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -13,6 +13,8 @@ #include <sys/types.h> #include <sys/stat.h> +#include <rte_eal_memconfig.h> + #include "vhost.h" #include "virtio_user_dev.h" #include "../virtio_ethdev.h" @@ -109,9 +111,24 @@ is_vhost_user_by_type(const char *path) int virtio_user_start_device(struct virtio_user_dev *dev) { + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; uint64_t features; int ret; + /* + * XXX workaround! + * + * We need to make sure that the locks will be + * taken in the correct order to avoid deadlocks. + * + * Before releasing this lock, this thread should + * not trigger any memory hotplug events. + * + * This is a temporary workaround, and should be + * replaced when we get proper supports from the + * memory subsystem in the future. + */ + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); pthread_mutex_lock(&dev->mutex); if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0) @@ -152,10 +169,12 @@ virtio_user_start_device(struct virtio_user_dev *dev) dev->started = true; pthread_mutex_unlock(&dev->mutex); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); return 0; error: pthread_mutex_unlock(&dev->mutex); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); /* TODO: free resource here or caller to check */ return -1; } @@ -282,8 +301,14 @@ virtio_user_mem_event_cb(enum rte_mem_event type __rte_unused, void *arg) { struct virtio_user_dev *dev = arg; + struct rte_memseg_list *msl; uint16_t i; + /* ignore externally allocated memory */ + msl = rte_mem_virt2memseg_list(addr); + if (msl->external) + return; + pthread_mutex_lock(&dev->mutex); if (dev->started == false) @@ -319,12 +344,12 @@ virtio_user_dev_setup(struct virtio_user_dev *dev) PMD_DRV_LOG(ERR, "Server mode doesn't support vhost-kernel!"); return -1; } - dev->ops = &ops_user; + dev->ops = &virtio_ops_user; } else { if (is_vhost_user_by_type(dev->path)) { - dev->ops = &ops_user; + dev->ops = &virtio_ops_user; } else { - dev->ops = &ops_kernel; + dev->ops = &virtio_ops_kernel; dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int)); @@ -530,13 +555,11 @@ virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) /* Server mode can't enable queue pairs if vhostfd is invalid, * always return 0 in this case. */ - if (dev->vhostfd >= 0) { + if (!dev->is_server || dev->vhostfd >= 0) { for (i = 0; i < q_pairs; ++i) ret |= dev->ops->enable_qp(dev, i, 1); for (i = q_pairs; i < dev->max_queue_pairs; ++i) ret |= dev->ops->enable_qp(dev, i, 0); - } else if (!dev->is_server) { - ret = ~0; } dev->queue_pairs = q_pairs; |