From 5129044dce1f85ce4950f31bcf90f3886466f06a Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 14 Jun 2016 07:54:47 -0700 Subject: Imported upstream release 16.04 * gbp import-orig ../dpdk-16.04.tar.xz Change-Id: Iac2196db782ba322f6974d8a752acc34ce5024c3 Signed-off-by: C.J. Collier --- lib/librte_vhost/vhost_cuse/eventfd_copy.c | 104 ++++++ lib/librte_vhost/vhost_cuse/eventfd_copy.h | 45 +++ lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 426 +++++++++++++++++++++++++ lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 435 ++++++++++++++++++++++++++ lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 48 +++ 5 files changed, 1058 insertions(+) create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.c create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.h create mode 100644 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h (limited to 'lib/librte_vhost/vhost_cuse') diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.c b/lib/librte_vhost/vhost_cuse/eventfd_copy.c new file mode 100644 index 00000000..154b32a4 --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/eventfd_copy.c @@ -0,0 +1,104 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "eventfd_link/eventfd_link.h" +#include "eventfd_copy.h" +#include "vhost-net.h" + +static const char eventfd_cdev[] = "/dev/eventfd-link"; + +static int eventfd_link = -1; + +int +eventfd_init(void) +{ + if (eventfd_link >= 0) + return 0; + + eventfd_link = open(eventfd_cdev, O_RDWR); + if (eventfd_link < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "eventfd_link module is not loaded\n"); + return -1; + } + + return 0; +} + +int +eventfd_free(void) +{ + if (eventfd_link >= 0) + close(eventfd_link); + return 0; +} + +/* + * This function uses the eventfd_link kernel module to copy an eventfd file + * descriptor provided by QEMU in to our process space. + */ +int +eventfd_copy(int target_fd, int target_pid) +{ + int ret; + struct eventfd_copy2 eventfd_copy2; + + + /* Open the character device to the kernel module. */ + /* TODO: check this earlier rather than fail until VM boots! */ + if (eventfd_init() < 0) + return -1; + + eventfd_copy2.fd = target_fd; + eventfd_copy2.pid = target_pid; + eventfd_copy2.flags = O_NONBLOCK | O_CLOEXEC; + /* Call the IOCTL to copy the eventfd. */ + ret = ioctl(eventfd_link, EVENTFD_COPY2, &eventfd_copy2); + + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "EVENTFD_COPY2 ioctl failed\n"); + return -1; + } + + return ret; +} diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.h b/lib/librte_vhost/vhost_cuse/eventfd_copy.h new file mode 100644 index 00000000..5f446ca0 --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/eventfd_copy.h @@ -0,0 +1,45 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _EVENTFD_H +#define _EVENTFD_H + +int +eventfd_init(void); + +int +eventfd_free(void); + +int +eventfd_copy(int target_fd, int target_pid); + +#endif diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c new file mode 100644 index 00000000..c613e68e --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c @@ -0,0 +1,426 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "virtio-net-cdev.h" +#include "vhost-net.h" +#include "eventfd_copy.h" + +#define FUSE_OPT_DUMMY "\0\0" +#define FUSE_OPT_FORE "-f\0\0" +#define FUSE_OPT_NOMULTI "-s\0\0" + +static const uint32_t default_major = 231; +static const uint32_t default_minor = 1; +static const char cuse_device_name[] = "/dev/cuse"; +static const char default_cdev[] = "vhost-net"; + +static struct fuse_session *session; + +/* + * Returns vhost_device_ctx from given fuse_req_t. The index is populated later + * when the device is added to the device linked list. + */ +static struct vhost_device_ctx +fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi) +{ + struct vhost_device_ctx ctx; + struct fuse_ctx const *const req_ctx = fuse_req_ctx(req); + + ctx.pid = req_ctx->pid; + ctx.fh = fi->fh; + + return ctx; +} + +/* + * When the device is created in QEMU it gets initialised here and + * added to the device linked list. + */ +static void +vhost_net_open(fuse_req_t req, struct fuse_file_info *fi) +{ + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); + int err = 0; + + err = vhost_new_device(ctx); + if (err == -1) { + fuse_reply_err(req, EPERM); + return; + } + + fi->fh = err; + + RTE_LOG(INFO, VHOST_CONFIG, + "(%"PRIu64") Device configuration started\n", fi->fh); + fuse_reply_open(req, fi); +} + +/* + * When QEMU is shutdown or killed the device gets released. + */ +static void +vhost_net_release(fuse_req_t req, struct fuse_file_info *fi) +{ + int err = 0; + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); + + vhost_destroy_device(ctx); + RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh); + fuse_reply_err(req, err); +} + +/* + * Boilerplate code for CUSE IOCTL + * Implicit arguments: ctx, req, result. + */ +#define VHOST_IOCTL(func) do { \ + result = (func)(ctx); \ + fuse_reply_ioctl(req, result, NULL, 0); \ +} while (0) + +/* + * Boilerplate IOCTL RETRY + * Implicit arguments: req. + */ +#define VHOST_IOCTL_RETRY(size_r, size_w) do { \ + struct iovec iov_r = { arg, (size_r) }; \ + struct iovec iov_w = { arg, (size_w) }; \ + fuse_reply_ioctl_retry(req, &iov_r, \ + (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\ +} while (0) + +/* + * Boilerplate code for CUSE Read IOCTL + * Implicit arguments: ctx, req, result, in_bufsz, in_buf. + */ +#define VHOST_IOCTL_R(type, var, func) do { \ + if (!in_bufsz) { \ + VHOST_IOCTL_RETRY(sizeof(type), 0);\ + } else { \ + (var) = *(const type*)in_buf; \ + result = func(ctx, &(var)); \ + fuse_reply_ioctl(req, result, NULL, 0);\ + } \ +} while (0) + +/* + * Boilerplate code for CUSE Write IOCTL + * Implicit arguments: ctx, req, result, out_bufsz. + */ +#define VHOST_IOCTL_W(type, var, func) do { \ + if (!out_bufsz) { \ + VHOST_IOCTL_RETRY(0, sizeof(type));\ + } else { \ + result = (func)(ctx, &(var));\ + fuse_reply_ioctl(req, result, &(var), sizeof(type));\ + } \ +} while (0) + +/* + * Boilerplate code for CUSE Read/Write IOCTL + * Implicit arguments: ctx, req, result, in_bufsz, in_buf. + */ +#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \ + if (!in_bufsz) { \ + VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\ + } else { \ + (var1) = *(const type1*) (in_buf); \ + result = (func)(ctx, (var1), &(var2)); \ + fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\ + } \ +} while (0) + +/* + * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type + * of IOCTL a buffer is requested to read or to write. This request is handled + * by FUSE and the buffer is then given to CUSE. + */ +static void +vhost_net_ioctl(fuse_req_t req, int cmd, void *arg, + struct fuse_file_info *fi, __rte_unused unsigned flags, + const void *in_buf, size_t in_bufsz, size_t out_bufsz) +{ + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + uint64_t features; + uint32_t index; + int result = 0; + + switch (cmd) { + case VHOST_NET_SET_BACKEND: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh); + if (!in_buf) { + VHOST_IOCTL_RETRY(sizeof(file), 0); + break; + } + file = *(const struct vhost_vring_file *)in_buf; + result = cuse_set_backend(ctx, &file); + fuse_reply_ioctl(req, result, NULL, 0); + break; + + case VHOST_GET_FEATURES: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh); + VHOST_IOCTL_W(uint64_t, features, vhost_get_features); + break; + + case VHOST_SET_FEATURES: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh); + VHOST_IOCTL_R(uint64_t, features, vhost_set_features); + break; + + case VHOST_RESET_OWNER: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh); + VHOST_IOCTL(vhost_reset_owner); + break; + + case VHOST_SET_OWNER: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh); + VHOST_IOCTL(vhost_set_owner); + break; + + case VHOST_SET_MEM_TABLE: + /*TODO fix race condition.*/ + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh); + static struct vhost_memory mem_temp; + + switch (in_bufsz) { + case 0: + VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0); + break; + + case sizeof(struct vhost_memory): + mem_temp = *(const struct vhost_memory *) in_buf; + + if (mem_temp.nregions > 0) { + VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) + + (sizeof(struct vhost_memory_region) * + mem_temp.nregions), 0); + } else { + result = -1; + fuse_reply_ioctl(req, result, NULL, 0); + } + break; + + default: + result = cuse_set_mem_table(ctx, in_buf, + mem_temp.nregions); + if (result) + fuse_reply_err(req, EINVAL); + else + fuse_reply_ioctl(req, result, NULL, 0); + } + break; + + case VHOST_SET_VRING_NUM: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh); + VHOST_IOCTL_R(struct vhost_vring_state, state, + vhost_set_vring_num); + break; + + case VHOST_SET_VRING_BASE: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh); + VHOST_IOCTL_R(struct vhost_vring_state, state, + vhost_set_vring_base); + break; + + case VHOST_GET_VRING_BASE: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh); + VHOST_IOCTL_RW(uint32_t, index, + struct vhost_vring_state, state, vhost_get_vring_base); + break; + + case VHOST_SET_VRING_ADDR: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh); + VHOST_IOCTL_R(struct vhost_vring_addr, addr, + vhost_set_vring_addr); + break; + + case VHOST_SET_VRING_KICK: + case VHOST_SET_VRING_CALL: + if (cmd == VHOST_SET_VRING_KICK) + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", + ctx.fh); + else + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", + ctx.fh); + if (!in_buf) + VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0); + else { + int fd; + file = *(const struct vhost_vring_file *)in_buf; + LOG_DEBUG(VHOST_CONFIG, + "idx:%d fd:%d\n", file.index, file.fd); + fd = eventfd_copy(file.fd, ctx.pid); + if (fd < 0) { + fuse_reply_ioctl(req, -1, NULL, 0); + result = -1; + break; + } + file.fd = fd; + if (cmd == VHOST_SET_VRING_KICK) { + result = vhost_set_vring_kick(ctx, &file); + fuse_reply_ioctl(req, result, NULL, 0); + } else { + result = vhost_set_vring_call(ctx, &file); + fuse_reply_ioctl(req, result, NULL, 0); + } + } + break; + + default: + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh); + result = -1; + fuse_reply_ioctl(req, result, NULL, 0); + } + + if (result < 0) + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: FAIL\n", ctx.fh); + else + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh); +} + +/* + * Structure handling open, release and ioctl function pointers is populated. + */ +static const struct cuse_lowlevel_ops vhost_net_ops = { + .open = vhost_net_open, + .release = vhost_net_release, + .ioctl = vhost_net_ioctl, +}; + +/* + * cuse_info is populated and used to register the cuse device. + * vhost_net_device_ops are also passed when the device is registered in app. + */ +int +rte_vhost_driver_register(const char *dev_name) +{ + struct cuse_info cuse_info; + char device_name[PATH_MAX] = ""; + char char_device_name[PATH_MAX] = ""; + const char *device_argv[] = { device_name }; + + char fuse_opt_dummy[] = FUSE_OPT_DUMMY; + char fuse_opt_fore[] = FUSE_OPT_FORE; + char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI; + char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti}; + + if (access(cuse_device_name, R_OK | W_OK) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "char device %s can't be accessed, maybe not exist\n", + cuse_device_name); + return -1; + } + + if (eventfd_init() < 0) + return -1; + + /* + * The device name is created. This is passed to QEMU so that it can + * register the device with our application. + */ + snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name); + snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name); + + /* Check if device already exists. */ + if (access(char_device_name, F_OK) != -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "char device %s already exists\n", char_device_name); + return -1; + } + + memset(&cuse_info, 0, sizeof(cuse_info)); + cuse_info.dev_major = default_major; + cuse_info.dev_minor = default_minor; + cuse_info.dev_info_argc = 1; + cuse_info.dev_info_argv = device_argv; + cuse_info.flags = CUSE_UNRESTRICTED_IOCTL; + + session = cuse_lowlevel_setup(3, fuse_argv, + &cuse_info, &vhost_net_ops, 0, NULL); + if (session == NULL) + return -1; + + return 0; +} + +/** + * An empty function for unregister + */ +int +rte_vhost_driver_unregister(const char *dev_name __rte_unused) +{ + return 0; +} + +/** + * The CUSE session is launched allowing the application to receive open, + * release and ioctl calls. + */ +int +rte_vhost_driver_session_start(void) +{ + fuse_session_loop(session); + + return 0; +} diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c new file mode 100644 index 00000000..a68a8bd4 --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c @@ -0,0 +1,435 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "rte_virtio_net.h" +#include "vhost-net.h" +#include "virtio-net-cdev.h" +#include "virtio-net.h" +#include "eventfd_copy.h" + +/* Line size for reading maps file. */ +static const uint32_t BUFSIZE = PATH_MAX; + +/* Size of prot char array in procmap. */ +#define PROT_SZ 5 + +/* Number of elements in procmap struct. */ +#define PROCMAP_SZ 8 + +/* Structure containing information gathered from maps file. */ +struct procmap { + uint64_t va_start; /* Start virtual address in file. */ + uint64_t len; /* Size of file. */ + uint64_t pgoff; /* Not used. */ + uint32_t maj; /* Not used. */ + uint32_t min; /* Not used. */ + uint32_t ino; /* Not used. */ + char prot[PROT_SZ]; /* Not used. */ + char fname[PATH_MAX]; /* File name. */ +}; + +/* + * Locate the file containing QEMU's memory space and + * map it to our address space. + */ +static int +host_memory_map(pid_t pid, uint64_t addr, + uint64_t *mapped_address, uint64_t *mapped_size) +{ + struct dirent *dptr = NULL; + struct procmap procmap; + DIR *dp = NULL; + int fd; + int i; + char memfile[PATH_MAX]; + char mapfile[PATH_MAX]; + char procdir[PATH_MAX]; + char resolved_path[PATH_MAX]; + char *path = NULL; + FILE *fmap; + void *map; + uint8_t found = 0; + char line[BUFSIZE]; + char dlm[] = "- : "; + char *str, *sp, *in[PROCMAP_SZ]; + char *end = NULL; + + /* Path where mem files are located. */ + snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid); + /* Maps file used to locate mem file. */ + snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid); + + fmap = fopen(mapfile, "r"); + if (fmap == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to open maps file for pid %d\n", + pid); + return -1; + } + + /* Read through maps file until we find out base_address. */ + while (fgets(line, BUFSIZE, fmap) != 0) { + str = line; + errno = 0; + /* Split line into fields. */ + for (i = 0; i < PROCMAP_SZ; i++) { + in[i] = strtok_r(str, &dlm[i], &sp); + if ((in[i] == NULL) || (errno != 0)) { + fclose(fmap); + return -1; + } + str = NULL; + } + + /* Convert/Copy each field as needed. */ + procmap.va_start = strtoull(in[0], &end, 16); + if ((in[0] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + procmap.len = strtoull(in[1], &end, 16); + if ((in[1] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + procmap.pgoff = strtoull(in[3], &end, 16); + if ((in[3] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + procmap.maj = strtoul(in[4], &end, 16); + if ((in[4] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + procmap.min = strtoul(in[5], &end, 16); + if ((in[5] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + procmap.ino = strtoul(in[6], &end, 16); + if ((in[6] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + memcpy(&procmap.prot, in[2], PROT_SZ); + memcpy(&procmap.fname, in[7], PATH_MAX); + + if (procmap.va_start == addr) { + procmap.len = procmap.len - procmap.va_start; + found = 1; + break; + } + } + fclose(fmap); + + if (!found) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to find memory file in pid %d maps file\n", + pid); + return -1; + } + + /* Find the guest memory file among the process fds. */ + dp = opendir(procdir); + if (dp == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Cannot open pid %d process directory\n", + pid); + return -1; + } + + found = 0; + + /* Read the fd directory contents. */ + while (NULL != (dptr = readdir(dp))) { + snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s", + pid, dptr->d_name); + path = realpath(memfile, resolved_path); + if ((path == NULL) && (strlen(resolved_path) == 0)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to resolve fd directory\n"); + closedir(dp); + return -1; + } + if (strncmp(resolved_path, procmap.fname, + strnlen(procmap.fname, PATH_MAX)) == 0) { + found = 1; + break; + } + } + + closedir(dp); + + if (found == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to find memory file for pid %d\n", + pid); + return -1; + } + /* Open the shared memory file and map the memory into this process. */ + fd = open(memfile, O_RDWR); + + if (fd == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to open %s for pid %d\n", + memfile, pid); + return -1; + } + + map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE, + MAP_POPULATE|MAP_SHARED, fd, 0); + close(fd); + + if (map == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "Error mapping the file %s for pid %d\n", + memfile, pid); + return -1; + } + + /* Store the memory address and size in the device data structure */ + *mapped_address = (uint64_t)(uintptr_t)map; + *mapped_size = procmap.len; + + LOG_DEBUG(VHOST_CONFIG, + "Mem File: %s->%s - Size: %llu - VA: %p\n", + memfile, resolved_path, + (unsigned long long)*mapped_size, map); + + return 0; +} + +int +cuse_set_mem_table(struct vhost_device_ctx ctx, + const struct vhost_memory *mem_regions_addr, uint32_t nregions) +{ + uint64_t size = offsetof(struct vhost_memory, regions); + uint32_t idx, valid_regions; + struct virtio_memory_regions *pregion; + struct vhost_memory_region *mem_regions = (void *)(uintptr_t) + ((uint64_t)(uintptr_t)mem_regions_addr + size); + uint64_t base_address = 0, mapped_address, mapped_size; + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + if (dev->mem && dev->mem->mapped_address) { + munmap((void *)(uintptr_t)dev->mem->mapped_address, + (size_t)dev->mem->mapped_size); + free(dev->mem); + dev->mem = NULL; + } + + dev->mem = calloc(1, sizeof(struct virtio_memory) + + sizeof(struct virtio_memory_regions) * nregions); + if (dev->mem == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to allocate memory for dev->mem\n", + dev->device_fh); + return -1; + } + + pregion = &dev->mem->regions[0]; + + for (idx = 0; idx < nregions; idx++) { + pregion[idx].guest_phys_address = + mem_regions[idx].guest_phys_addr; + pregion[idx].guest_phys_address_end = + pregion[idx].guest_phys_address + + mem_regions[idx].memory_size; + pregion[idx].memory_size = + mem_regions[idx].memory_size; + pregion[idx].userspace_address = + mem_regions[idx].userspace_addr; + + LOG_DEBUG(VHOST_CONFIG, + "REGION: %u - GPA: %p - QVA: %p - SIZE (%"PRIu64")\n", + idx, + (void *)(uintptr_t)pregion[idx].guest_phys_address, + (void *)(uintptr_t)pregion[idx].userspace_address, + pregion[idx].memory_size); + + /*set the base address mapping*/ + if (pregion[idx].guest_phys_address == 0x0) { + base_address = + pregion[idx].userspace_address; + /* Map VM memory file */ + if (host_memory_map(ctx.pid, base_address, + &mapped_address, &mapped_size) != 0) { + free(dev->mem); + dev->mem = NULL; + return -1; + } + dev->mem->mapped_address = mapped_address; + dev->mem->base_address = base_address; + dev->mem->mapped_size = mapped_size; + } + } + + /* Check that we have a valid base address. */ + if (base_address == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to find base address of qemu memory file.\n"); + free(dev->mem); + dev->mem = NULL; + return -1; + } + + valid_regions = nregions; + for (idx = 0; idx < nregions; idx++) { + if ((pregion[idx].userspace_address < base_address) || + (pregion[idx].userspace_address > + (base_address + mapped_size))) + valid_regions--; + } + + + if (valid_regions != nregions) { + valid_regions = 0; + for (idx = nregions; 0 != idx--; ) { + if ((pregion[idx].userspace_address < base_address) || + (pregion[idx].userspace_address > + (base_address + mapped_size))) { + memmove(&pregion[idx], &pregion[idx + 1], + sizeof(struct virtio_memory_regions) * + valid_regions); + } else + valid_regions++; + } + } + + for (idx = 0; idx < valid_regions; idx++) { + pregion[idx].address_offset = + mapped_address - base_address + + pregion[idx].userspace_address - + pregion[idx].guest_phys_address; + } + dev->mem->nregions = valid_regions; + + return 0; +} + +/* + * Function to get the tap device name from the provided file descriptor and + * save it in the device structure. + */ +static int +get_ifname(struct vhost_device_ctx ctx, struct virtio_net *dev, int tap_fd, int pid) +{ + int fd_tap; + struct ifreq ifr; + uint32_t ifr_size; + int ret; + + fd_tap = eventfd_copy(tap_fd, pid); + if (fd_tap < 0) + return -1; + + ret = ioctl(fd_tap, TUNGETIFF, &ifr); + + if (close(fd_tap) < 0) + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") fd close failed\n", + dev->device_fh); + + if (ret >= 0) { + ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name)); + vhost_set_ifname(ctx, ifr.ifr_name, ifr_size); + } else + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") TUNGETIFF ioctl failed\n", + dev->device_fh); + + return 0; +} + +int cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + if (!(dev->flags & VIRTIO_DEV_RUNNING) && file->fd != VIRTIO_DEV_STOPPED) + get_ifname(ctx, dev, file->fd, ctx.pid); + + return vhost_set_backend(ctx, file); +} + +void +vhost_backend_cleanup(struct virtio_net *dev) +{ + /* Unmap QEMU memory file if mapped. */ + if (dev->mem) { + munmap((void *)(uintptr_t)dev->mem->mapped_address, + (size_t)dev->mem->mapped_size); + free(dev->mem); + dev->mem = NULL; + } +} diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h new file mode 100644 index 00000000..eb6b0bab --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h @@ -0,0 +1,48 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _VIRTIO_NET_CDEV_H +#define _VIRTIO_NET_CDEV_H + +#include +#include + +#include "vhost-net.h" + +int +cuse_set_mem_table(struct vhost_device_ctx ctx, + const struct vhost_memory *mem_regions_addr, uint32_t nregions); + +int +cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *); + +#endif -- cgit 1.2.3-korg