diff options
Diffstat (limited to 'lib/librte_vhost/vhost_user')
-rw-r--r-- | lib/librte_vhost/vhost_user/fd_man.c | 289 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user/fd_man.h | 67 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user/vhost-net-user.c | 531 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user/vhost-net-user.h | 117 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user/virtio-net-user.c | 446 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user/virtio-net-user.h | 64 |
6 files changed, 1514 insertions, 0 deletions
diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c new file mode 100644 index 00000000..087aaed6 --- /dev/null +++ b/lib/librte_vhost/vhost_user/fd_man.c @@ -0,0 +1,289 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/select.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include <rte_common.h> +#include <rte_log.h> + +#include "fd_man.h" + +/** + * Returns the index in the fdset for a given fd. + * If fd is -1, it means to search for a free entry. + * @return + * index for the fd, or -1 if fd isn't in the fdset. + */ +static int +fdset_find_fd(struct fdset *pfdset, int fd) +{ + int i; + + if (pfdset == NULL) + return -1; + + for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++) + ; + + return i == MAX_FDS ? -1 : i; +} + +static int +fdset_find_free_slot(struct fdset *pfdset) +{ + return fdset_find_fd(pfdset, -1); +} + +static void +fdset_add_fd(struct fdset *pfdset, int idx, int fd, + fd_cb rcb, fd_cb wcb, void *dat) +{ + struct fdentry *pfdentry; + + if (pfdset == NULL || idx >= MAX_FDS) + return; + + pfdentry = &pfdset->fd[idx]; + pfdentry->fd = fd; + pfdentry->rcb = rcb; + pfdentry->wcb = wcb; + pfdentry->dat = dat; +} + +/** + * Fill the read/write fd_set with the fds in the fdset. + * @return + * the maximum fds filled in the read/write fd_set. + */ +static int +fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset) +{ + struct fdentry *pfdentry; + int i, maxfds = -1; + int num = MAX_FDS; + + if (pfdset == NULL) + return -1; + + for (i = 0; i < num; i++) { + pfdentry = &pfdset->fd[i]; + if (pfdentry->fd != -1) { + int added = 0; + if (pfdentry->rcb && rfset) { + FD_SET(pfdentry->fd, rfset); + added = 1; + } + if (pfdentry->wcb && wfset) { + FD_SET(pfdentry->fd, wfset); + added = 1; + } + if (added) + maxfds = pfdentry->fd < maxfds ? + maxfds : pfdentry->fd; + } + } + return maxfds; +} + +void +fdset_init(struct fdset *pfdset) +{ + int i; + + if (pfdset == NULL) + return; + + for (i = 0; i < MAX_FDS; i++) + pfdset->fd[i].fd = -1; + pfdset->num = 0; +} + +/** + * Register the fd in the fdset with read/write handler and context. + */ +int +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) +{ + int i; + + if (pfdset == NULL || fd == -1) + return -1; + + pthread_mutex_lock(&pfdset->fd_mutex); + + /* Find a free slot in the list. */ + i = fdset_find_free_slot(pfdset); + if (i == -1) { + pthread_mutex_unlock(&pfdset->fd_mutex); + return -2; + } + + fdset_add_fd(pfdset, i, fd, rcb, wcb, dat); + pfdset->num++; + + pthread_mutex_unlock(&pfdset->fd_mutex); + + return 0; +} + +/** + * Unregister the fd from the fdset. + */ +void +fdset_del(struct fdset *pfdset, int fd) +{ + int i; + + if (pfdset == NULL || fd == -1) + return; + + do { + pthread_mutex_lock(&pfdset->fd_mutex); + + i = fdset_find_fd(pfdset, fd); + if (i != -1 && pfdset->fd[i].busy == 0) { + /* busy indicates r/wcb is executing! */ + pfdset->fd[i].fd = -1; + pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; + pfdset->num--; + i = -1; + } + pthread_mutex_unlock(&pfdset->fd_mutex); + } while (i != -1); +} + +/** + * Unregister the fd at the specified slot from the fdset. + */ +static void +fdset_del_slot(struct fdset *pfdset, int index) +{ + if (pfdset == NULL || index < 0 || index >= MAX_FDS) + return; + + pthread_mutex_lock(&pfdset->fd_mutex); + + pfdset->fd[index].fd = -1; + pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL; + pfdset->num--; + + pthread_mutex_unlock(&pfdset->fd_mutex); +} + +/** + * This functions runs in infinite blocking loop until there is no fd in + * pfdset. It calls corresponding r/w handler if there is event on the fd. + * + * Before the callback is called, we set the flag to busy status; If other + * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it + * will wait until the flag is reset to zero(which indicates the callback is + * finished), then it could free the context after fdset_del. + */ +void +fdset_event_dispatch(struct fdset *pfdset) +{ + fd_set rfds, wfds; + int i, maxfds; + struct fdentry *pfdentry; + int num = MAX_FDS; + fd_cb rcb, wcb; + void *dat; + int fd; + int remove1, remove2; + int ret; + + if (pfdset == NULL) + return; + + while (1) { + struct timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + FD_ZERO(&rfds); + FD_ZERO(&wfds); + pthread_mutex_lock(&pfdset->fd_mutex); + + maxfds = fdset_fill(&rfds, &wfds, pfdset); + + pthread_mutex_unlock(&pfdset->fd_mutex); + + /* + * When select is blocked, other threads might unregister + * listenfds from and register new listenfds into fdset. + * When select returns, the entries for listenfds in the fdset + * might have been updated. It is ok if there is unwanted call + * for new listenfds. + */ + ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv); + if (ret <= 0) + continue; + + for (i = 0; i < num; i++) { + remove1 = remove2 = 0; + pthread_mutex_lock(&pfdset->fd_mutex); + pfdentry = &pfdset->fd[i]; + fd = pfdentry->fd; + rcb = pfdentry->rcb; + wcb = pfdentry->wcb; + dat = pfdentry->dat; + pfdentry->busy = 1; + pthread_mutex_unlock(&pfdset->fd_mutex); + if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb) + rcb(fd, dat, &remove1); + if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb) + wcb(fd, dat, &remove2); + pfdentry->busy = 0; + /* + * fdset_del needs to check busy flag. + * We don't allow fdset_del to be called in callback + * directly. + */ + /* + * When we are to clean up the fd from fdset, + * because the fd is closed in the cb, + * the old fd val could be reused by when creates new + * listen fd in another thread, we couldn't call + * fd_set_del. + */ + if (remove1 || remove2) + fdset_del_slot(pfdset, i); + } + } +} diff --git a/lib/librte_vhost/vhost_user/fd_man.h b/lib/librte_vhost/vhost_user/fd_man.h new file mode 100644 index 00000000..74ecde2c --- /dev/null +++ b/lib/librte_vhost/vhost_user/fd_man.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _FD_MAN_H_ +#define _FD_MAN_H_ +#include <stdint.h> +#include <pthread.h> + +#define MAX_FDS 1024 + +typedef void (*fd_cb)(int fd, void *dat, int *remove); + +struct fdentry { + int fd; /* -1 indicates this entry is empty */ + fd_cb rcb; /* callback when this fd is readable. */ + fd_cb wcb; /* callback when this fd is writeable.*/ + void *dat; /* fd context */ + int busy; /* whether this entry is being used in cb. */ +}; + +struct fdset { + struct fdentry fd[MAX_FDS]; + pthread_mutex_t fd_mutex; + int num; /* current fd number of this fdset */ +}; + + +void fdset_init(struct fdset *pfdset); + +int fdset_add(struct fdset *pfdset, int fd, + fd_cb rcb, fd_cb wcb, void *dat); + +void fdset_del(struct fdset *pfdset, int fd); + +void fdset_event_dispatch(struct fdset *pfdset); + +#endif diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c new file mode 100644 index 00000000..df2bd648 --- /dev/null +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c @@ -0,0 +1,531 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <limits.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <errno.h> +#include <pthread.h> + +#include <rte_log.h> +#include <rte_virtio_net.h> + +#include "fd_man.h" +#include "vhost-net-user.h" +#include "vhost-net.h" +#include "virtio-net-user.h" + +#define MAX_VIRTIO_BACKLOG 128 + +static void vserver_new_vq_conn(int fd, void *data, int *remove); +static void vserver_message_handler(int fd, void *dat, int *remove); + +struct connfd_ctx { + struct vhost_server *vserver; + uint32_t fh; +}; + +#define MAX_VHOST_SERVER 1024 +struct _vhost_server { + struct vhost_server *server[MAX_VHOST_SERVER]; + struct fdset fdset; + int vserver_cnt; + pthread_mutex_t server_mutex; +}; + +static struct _vhost_server g_vhost_server = { + .fdset = { + .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, + .fd_mutex = PTHREAD_MUTEX_INITIALIZER, + .num = 0 + }, + .vserver_cnt = 0, + .server_mutex = PTHREAD_MUTEX_INITIALIZER, +}; + +static const char *vhost_message_str[VHOST_USER_MAX] = { + [VHOST_USER_NONE] = "VHOST_USER_NONE", + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", + [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", + [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", + [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", + [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", + [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", +}; + +/** + * Create a unix domain socket, bind to path and listen for connection. + * @return + * socket fd or -1 on failure + */ +static int +uds_socket(const char *path) +{ + struct sockaddr_un un; + int sockfd; + int ret; + + if (path == NULL) + return -1; + + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd < 0) + return -1; + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd); + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un)); + if (ret == -1) { + RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n", + sockfd, path); + goto err; + } + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); + + ret = listen(sockfd, MAX_VIRTIO_BACKLOG); + if (ret == -1) + goto err; + + return sockfd; + +err: + close(sockfd); + return -1; +} + +/* return bytes# of read on success or negative val on failure. */ +static int +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) +{ + struct iovec iov; + struct msghdr msgh; + size_t fdsize = fd_num * sizeof(int); + char control[CMSG_SPACE(fdsize)]; + struct cmsghdr *cmsg; + int ret; + + memset(&msgh, 0, sizeof(msgh)); + iov.iov_base = buf; + iov.iov_len = buflen; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + ret = recvmsg(sockfd, &msgh, 0); + if (ret <= 0) { + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); + return ret; + } + + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n"); + return -1; + } + + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msgh, cmsg)) { + if ((cmsg->cmsg_level == SOL_SOCKET) && + (cmsg->cmsg_type == SCM_RIGHTS)) { + memcpy(fds, CMSG_DATA(cmsg), fdsize); + break; + } + } + + return ret; +} + +/* return bytes# of read on success or negative val on failure. */ +static int +read_vhost_message(int sockfd, struct VhostUserMsg *msg) +{ + int ret; + + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, + msg->fds, VHOST_MEMORY_MAX_NREGIONS); + if (ret <= 0) + return ret; + + if (msg && msg->size) { + if (msg->size > sizeof(msg->payload)) { + RTE_LOG(ERR, VHOST_CONFIG, + "invalid msg size: %d\n", msg->size); + return -1; + } + ret = read(sockfd, &msg->payload, msg->size); + if (ret <= 0) + return ret; + if (ret != (int)msg->size) { + RTE_LOG(ERR, VHOST_CONFIG, + "read control message failed\n"); + return -1; + } + } + + return ret; +} + +static int +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) +{ + + struct iovec iov; + struct msghdr msgh; + size_t fdsize = fd_num * sizeof(int); + char control[CMSG_SPACE(fdsize)]; + struct cmsghdr *cmsg; + int ret; + + memset(&msgh, 0, sizeof(msgh)); + iov.iov_base = buf; + iov.iov_len = buflen; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + + if (fds && fd_num > 0) { + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_len = CMSG_LEN(fdsize); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } else { + msgh.msg_control = NULL; + msgh.msg_controllen = 0; + } + + do { + ret = sendmsg(sockfd, &msgh, 0); + } while (ret < 0 && errno == EINTR); + + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n"); + return ret; + } + + return ret; +} + +static int +send_vhost_message(int sockfd, struct VhostUserMsg *msg) +{ + int ret; + + if (!msg) + return 0; + + msg->flags &= ~VHOST_USER_VERSION_MASK; + msg->flags |= VHOST_USER_VERSION; + msg->flags |= VHOST_USER_REPLY_MASK; + + ret = send_fd_message(sockfd, (char *)msg, + VHOST_USER_HDR_SIZE + msg->size, NULL, 0); + + return ret; +} + +/* call back when there is new virtio connection. */ +static void +vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove) +{ + struct vhost_server *vserver = (struct vhost_server *)dat; + int conn_fd; + struct connfd_ctx *ctx; + int fh; + struct vhost_device_ctx vdev_ctx = { (pid_t)0, 0 }; + unsigned int size; + + conn_fd = accept(fd, NULL, NULL); + RTE_LOG(INFO, VHOST_CONFIG, + "new virtio connection is %d\n", conn_fd); + if (conn_fd < 0) + return; + + ctx = calloc(1, sizeof(*ctx)); + if (ctx == NULL) { + close(conn_fd); + return; + } + + fh = vhost_new_device(vdev_ctx); + if (fh == -1) { + free(ctx); + close(conn_fd); + return; + } + + vdev_ctx.fh = fh; + size = strnlen(vserver->path, PATH_MAX); + vhost_set_ifname(vdev_ctx, vserver->path, + size); + + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh); + + ctx->vserver = vserver; + ctx->fh = fh; + fdset_add(&g_vhost_server.fdset, + conn_fd, vserver_message_handler, NULL, ctx); +} + +/* callback when there is message on the connfd */ +static void +vserver_message_handler(int connfd, void *dat, int *remove) +{ + struct vhost_device_ctx ctx; + struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat; + struct VhostUserMsg msg; + uint64_t features; + int ret; + + ctx.fh = cfd_ctx->fh; + ret = read_vhost_message(connfd, &msg); + if (ret <= 0 || msg.request >= VHOST_USER_MAX) { + if (ret < 0) + RTE_LOG(ERR, VHOST_CONFIG, + "vhost read message failed\n"); + else if (ret == 0) + RTE_LOG(INFO, VHOST_CONFIG, + "vhost peer closed\n"); + else + RTE_LOG(ERR, VHOST_CONFIG, + "vhost read incorrect message\n"); + + close(connfd); + *remove = 1; + free(cfd_ctx); + vhost_destroy_device(ctx); + + return; + } + + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", + vhost_message_str[msg.request]); + switch (msg.request) { + case VHOST_USER_GET_FEATURES: + ret = vhost_get_features(ctx, &features); + msg.payload.u64 = features; + msg.size = sizeof(msg.payload.u64); + send_vhost_message(connfd, &msg); + break; + case VHOST_USER_SET_FEATURES: + features = msg.payload.u64; + vhost_set_features(ctx, &features); + break; + + case VHOST_USER_GET_PROTOCOL_FEATURES: + msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES; + msg.size = sizeof(msg.payload.u64); + send_vhost_message(connfd, &msg); + break; + case VHOST_USER_SET_PROTOCOL_FEATURES: + user_set_protocol_features(ctx, msg.payload.u64); + break; + + case VHOST_USER_SET_OWNER: + vhost_set_owner(ctx); + break; + case VHOST_USER_RESET_OWNER: + vhost_reset_owner(ctx); + break; + + case VHOST_USER_SET_MEM_TABLE: + user_set_mem_table(ctx, &msg); + break; + + case VHOST_USER_SET_LOG_BASE: + user_set_log_base(ctx, &msg); + + /* it needs a reply */ + msg.size = sizeof(msg.payload.u64); + send_vhost_message(connfd, &msg); + break; + case VHOST_USER_SET_LOG_FD: + close(msg.fds[0]); + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n"); + break; + + case VHOST_USER_SET_VRING_NUM: + vhost_set_vring_num(ctx, &msg.payload.state); + break; + case VHOST_USER_SET_VRING_ADDR: + vhost_set_vring_addr(ctx, &msg.payload.addr); + break; + case VHOST_USER_SET_VRING_BASE: + vhost_set_vring_base(ctx, &msg.payload.state); + break; + + case VHOST_USER_GET_VRING_BASE: + ret = user_get_vring_base(ctx, &msg.payload.state); + msg.size = sizeof(msg.payload.state); + send_vhost_message(connfd, &msg); + break; + + case VHOST_USER_SET_VRING_KICK: + user_set_vring_kick(ctx, &msg); + break; + case VHOST_USER_SET_VRING_CALL: + user_set_vring_call(ctx, &msg); + break; + + case VHOST_USER_SET_VRING_ERR: + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) + close(msg.fds[0]); + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n"); + break; + + case VHOST_USER_GET_QUEUE_NUM: + msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS; + msg.size = sizeof(msg.payload.u64); + send_vhost_message(connfd, &msg); + break; + + case VHOST_USER_SET_VRING_ENABLE: + user_set_vring_enable(ctx, &msg.payload.state); + break; + case VHOST_USER_SEND_RARP: + user_send_rarp(ctx, &msg); + break; + + default: + break; + + } +} + +/** + * Creates and initialise the vhost server. + */ +int +rte_vhost_driver_register(const char *path) +{ + struct vhost_server *vserver; + + pthread_mutex_lock(&g_vhost_server.server_mutex); + + if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) { + RTE_LOG(ERR, VHOST_CONFIG, + "error: the number of servers reaches maximum\n"); + pthread_mutex_unlock(&g_vhost_server.server_mutex); + return -1; + } + + vserver = calloc(sizeof(struct vhost_server), 1); + if (vserver == NULL) { + pthread_mutex_unlock(&g_vhost_server.server_mutex); + return -1; + } + + vserver->listenfd = uds_socket(path); + if (vserver->listenfd < 0) { + free(vserver); + pthread_mutex_unlock(&g_vhost_server.server_mutex); + return -1; + } + + vserver->path = strdup(path); + + fdset_add(&g_vhost_server.fdset, vserver->listenfd, + vserver_new_vq_conn, NULL, vserver); + + g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver; + pthread_mutex_unlock(&g_vhost_server.server_mutex); + + return 0; +} + + +/** + * Unregister the specified vhost server + */ +int +rte_vhost_driver_unregister(const char *path) +{ + int i; + int count; + + pthread_mutex_lock(&g_vhost_server.server_mutex); + + for (i = 0; i < g_vhost_server.vserver_cnt; i++) { + if (!strcmp(g_vhost_server.server[i]->path, path)) { + fdset_del(&g_vhost_server.fdset, + g_vhost_server.server[i]->listenfd); + + close(g_vhost_server.server[i]->listenfd); + free(g_vhost_server.server[i]->path); + free(g_vhost_server.server[i]); + + unlink(path); + + count = --g_vhost_server.vserver_cnt; + g_vhost_server.server[i] = g_vhost_server.server[count]; + g_vhost_server.server[count] = NULL; + pthread_mutex_unlock(&g_vhost_server.server_mutex); + + return 0; + } + } + pthread_mutex_unlock(&g_vhost_server.server_mutex); + + return -1; +} + +int +rte_vhost_driver_session_start(void) +{ + fdset_event_dispatch(&g_vhost_server.fdset); + return 0; +} diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h new file mode 100644 index 00000000..e3bb4138 --- /dev/null +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h @@ -0,0 +1,117 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_NET_USER_H +#define _VHOST_NET_USER_H + +#include <stdint.h> +#include <linux/vhost.h> + +#include "rte_virtio_net.h" +#include "fd_man.h" + +struct vhost_server { + char *path; /**< The path the uds is bind to. */ + int listenfd; /**< The listener sockfd. */ +}; + +/* refer to hw/virtio/vhost-user.c */ + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_MAX +} VhostUserRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; + uint64_t mmap_offset; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + +typedef struct VhostUserMsg { + VhostUserRequest request; + +#define VHOST_USER_VERSION_MASK 0x3 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK 0xff +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + VhostUserLog log; + } payload; + int fds[VHOST_MEMORY_MAX_NREGIONS]; +} __attribute((packed)) VhostUserMsg; + +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION 0x1 + +/*****************************************************************************/ +#endif diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c new file mode 100644 index 00000000..f5248bc4 --- /dev/null +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c @@ -0,0 +1,446 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <rte_common.h> +#include <rte_log.h> + +#include "virtio-net.h" +#include "virtio-net-user.h" +#include "vhost-net-user.h" +#include "vhost-net.h" + +struct orig_region_map { + int fd; + uint64_t mapped_address; + uint64_t mapped_size; + uint64_t blksz; +}; + +#define orig_region(ptr, nregions) \ + ((struct orig_region_map *)RTE_PTR_ADD((ptr), \ + sizeof(struct virtio_memory) + \ + sizeof(struct virtio_memory_regions) * (nregions))) + +static uint64_t +get_blk_size(int fd) +{ + struct stat stat; + + fstat(fd, &stat); + return (uint64_t)stat.st_blksize; +} + +static void +free_mem_region(struct virtio_net *dev) +{ + struct orig_region_map *region; + unsigned int idx; + + if (!dev || !dev->mem) + return; + + region = orig_region(dev->mem, dev->mem->nregions); + for (idx = 0; idx < dev->mem->nregions; idx++) { + if (region[idx].mapped_address) { + munmap((void *)(uintptr_t)region[idx].mapped_address, + region[idx].mapped_size); + close(region[idx].fd); + } + } +} + +void +vhost_backend_cleanup(struct virtio_net *dev) +{ + if (dev->mem) { + free_mem_region(dev); + free(dev->mem); + dev->mem = NULL; + } +} + +int +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) +{ + struct VhostUserMemory memory = pmsg->payload.memory; + struct virtio_memory_regions *pregion; + uint64_t mapped_address, mapped_size; + struct virtio_net *dev; + unsigned int idx = 0; + struct orig_region_map *pregion_orig; + uint64_t alignment; + + /* unmap old memory regions one by one*/ + dev = get_device(ctx); + if (dev == NULL) + return -1; + + /* Remove from the data plane. */ + if (dev->flags & VIRTIO_DEV_RUNNING) + notify_ops->destroy_device(dev); + + if (dev->mem) { + free_mem_region(dev); + free(dev->mem); + dev->mem = NULL; + } + + dev->mem = calloc(1, + sizeof(struct virtio_memory) + + sizeof(struct virtio_memory_regions) * memory.nregions + + sizeof(struct orig_region_map) * memory.nregions); + if (dev->mem == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to allocate memory for dev->mem\n", + dev->device_fh); + return -1; + } + dev->mem->nregions = memory.nregions; + + pregion_orig = orig_region(dev->mem, memory.nregions); + for (idx = 0; idx < memory.nregions; idx++) { + pregion = &dev->mem->regions[idx]; + pregion->guest_phys_address = + memory.regions[idx].guest_phys_addr; + pregion->guest_phys_address_end = + memory.regions[idx].guest_phys_addr + + memory.regions[idx].memory_size; + pregion->memory_size = + memory.regions[idx].memory_size; + pregion->userspace_address = + memory.regions[idx].userspace_addr; + + /* This is ugly */ + mapped_size = memory.regions[idx].memory_size + + memory.regions[idx].mmap_offset; + + /* mmap() without flag of MAP_ANONYMOUS, should be called + * with length argument aligned with hugepagesz at older + * longterm version Linux, like 2.6.32 and 3.2.72, or + * mmap() will fail with EINVAL. + * + * to avoid failure, make sure in caller to keep length + * aligned. + */ + alignment = get_blk_size(pmsg->fds[idx]); + mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment); + + mapped_address = (uint64_t)(uintptr_t)mmap(NULL, + mapped_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + pmsg->fds[idx], + 0); + + RTE_LOG(INFO, VHOST_CONFIG, + "mapped region %d fd:%d to:%p sz:0x%"PRIx64" " + "off:0x%"PRIx64" align:0x%"PRIx64"\n", + idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address, + mapped_size, memory.regions[idx].mmap_offset, + alignment); + + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap qemu guest failed.\n"); + goto err_mmap; + } + + pregion_orig[idx].mapped_address = mapped_address; + pregion_orig[idx].mapped_size = mapped_size; + pregion_orig[idx].blksz = alignment; + pregion_orig[idx].fd = pmsg->fds[idx]; + + mapped_address += memory.regions[idx].mmap_offset; + + pregion->address_offset = mapped_address - + pregion->guest_phys_address; + + if (memory.regions[idx].guest_phys_addr == 0) { + dev->mem->base_address = + memory.regions[idx].userspace_addr; + dev->mem->mapped_address = + pregion->address_offset; + } + + LOG_DEBUG(VHOST_CONFIG, + "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n", + idx, + (void *)(uintptr_t)pregion->guest_phys_address, + (void *)(uintptr_t)pregion->userspace_address, + pregion->memory_size); + } + + return 0; + +err_mmap: + while (idx--) { + munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address, + pregion_orig[idx].mapped_size); + close(pregion_orig[idx].fd); + } + free(dev->mem); + dev->mem = NULL; + return -1; +} + +static int +vq_is_ready(struct vhost_virtqueue *vq) +{ + return vq && vq->desc && + vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD && + vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD; +} + +static int +virtio_is_ready(struct virtio_net *dev) +{ + struct vhost_virtqueue *rvq, *tvq; + uint32_t i; + + for (i = 0; i < dev->virt_qp_nb; i++) { + rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]; + tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]; + + if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) { + RTE_LOG(INFO, VHOST_CONFIG, + "virtio is not ready for processing.\n"); + return 0; + } + } + + RTE_LOG(INFO, VHOST_CONFIG, + "virtio is now ready for processing.\n"); + return 1; +} + +void +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) +{ + struct vhost_vring_file file; + + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) + file.fd = VIRTIO_INVALID_EVENTFD; + else + file.fd = pmsg->fds[0]; + RTE_LOG(INFO, VHOST_CONFIG, + "vring call idx:%d file:%d\n", file.index, file.fd); + vhost_set_vring_call(ctx, &file); +} + + +/* + * In vhost-user, when we receive kick message, will test whether virtio + * device is ready for packet processing. + */ +void +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) +{ + struct vhost_vring_file file; + struct virtio_net *dev = get_device(ctx); + + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) + file.fd = VIRTIO_INVALID_EVENTFD; + else + file.fd = pmsg->fds[0]; + RTE_LOG(INFO, VHOST_CONFIG, + "vring kick idx:%d file:%d\n", file.index, file.fd); + vhost_set_vring_kick(ctx, &file); + + if (virtio_is_ready(dev) && + !(dev->flags & VIRTIO_DEV_RUNNING)) + notify_ops->new_device(dev); +} + +/* + * when virtio is stopped, qemu will send us the GET_VRING_BASE message. + */ +int +user_get_vring_base(struct vhost_device_ctx ctx, + struct vhost_vring_state *state) +{ + struct virtio_net *dev = get_device(ctx); + + if (dev == NULL) + return -1; + /* We have to stop the queue (virtio) if it is running. */ + if (dev->flags & VIRTIO_DEV_RUNNING) + notify_ops->destroy_device(dev); + + /* Here we are safe to get the last used index */ + vhost_get_vring_base(ctx, state->index, state); + + RTE_LOG(INFO, VHOST_CONFIG, + "vring base idx:%d file:%d\n", state->index, state->num); + /* + * Based on current qemu vhost-user implementation, this message is + * sent and only sent in vhost_vring_stop. + * TODO: cleanup the vring, it isn't usable since here. + */ + if (dev->virtqueue[state->index]->kickfd >= 0) + close(dev->virtqueue[state->index]->kickfd); + + dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; + + return 0; +} + +/* + * when virtio queues are ready to work, qemu will send us to + * enable the virtio queue pair. + */ +int +user_set_vring_enable(struct vhost_device_ctx ctx, + struct vhost_vring_state *state) +{ + struct virtio_net *dev = get_device(ctx); + int enable = (int)state->num; + + RTE_LOG(INFO, VHOST_CONFIG, + "set queue enable: %d to qp idx: %d\n", + enable, state->index); + + if (notify_ops->vring_state_changed) { + notify_ops->vring_state_changed(dev, state->index, enable); + } + + dev->virtqueue[state->index]->enabled = enable; + + return 0; +} + +void +user_set_protocol_features(struct vhost_device_ctx ctx, + uint64_t protocol_features) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES) + return; + + dev->protocol_features = protocol_features; +} + +int +user_set_log_base(struct vhost_device_ctx ctx, + struct VhostUserMsg *msg) +{ + struct virtio_net *dev; + int fd = msg->fds[0]; + uint64_t size, off; + void *addr; + + dev = get_device(ctx); + if (!dev) + return -1; + + if (fd < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd); + return -1; + } + + if (msg->size != sizeof(VhostUserLog)) { + RTE_LOG(ERR, VHOST_CONFIG, + "invalid log base msg size: %"PRId32" != %d\n", + msg->size, (int)sizeof(VhostUserLog)); + return -1; + } + + size = msg->payload.log.mmap_size; + off = msg->payload.log.mmap_offset; + RTE_LOG(INFO, VHOST_CONFIG, + "log mmap size: %"PRId64", offset: %"PRId64"\n", + size, off); + + /* + * mmap from 0 to workaround a hugepage mmap bug: mmap will + * fail when offset is not page size aligned. + */ + addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n"); + return -1; + } + + /* TODO: unmap on stop */ + dev->log_base = (uint64_t)(uintptr_t)addr + off; + dev->log_size = size; + + return 0; +} + +/* + * An rarp packet is constructed and broadcasted to notify switches about + * the new location of the migrated VM, so that packets from outside will + * not be lost after migration. + * + * However, we don't actually "send" a rarp packet here, instead, we set + * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it. + */ +int +user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *msg) +{ + struct virtio_net *dev; + uint8_t *mac = (uint8_t *)&msg->payload.u64; + + dev = get_device(ctx); + if (!dev) + return -1; + + RTE_LOG(DEBUG, VHOST_CONFIG, + ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + memcpy(dev->mac.addr_bytes, mac, 6); + + /* + * Set the flag to inject a RARP broadcast packet at + * rte_vhost_dequeue_burst(). + * + * rte_smp_wmb() is for making sure the mac is copied + * before the flag is set. + */ + rte_smp_wmb(); + rte_atomic16_set(&dev->broadcast_rarp, 1); + + return 0; +} diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h new file mode 100644 index 00000000..cefec162 --- /dev/null +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h @@ -0,0 +1,64 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_NET_USER_H +#define _VIRTIO_NET_USER_H + +#include "vhost-net.h" +#include "vhost-net-user.h" + +#define VHOST_USER_PROTOCOL_F_MQ 0 +#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 +#define VHOST_USER_PROTOCOL_F_RARP 2 + +#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ + (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\ + (1ULL << VHOST_USER_PROTOCOL_F_RARP)) + +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *); + +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *); + +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *); + +void user_set_protocol_features(struct vhost_device_ctx ctx, + uint64_t protocol_features); +int user_set_log_base(struct vhost_device_ctx ctx, struct VhostUserMsg *); +int user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *); + +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *); + +int user_set_vring_enable(struct vhost_device_ctx ctx, + struct vhost_vring_state *state); + +#endif |