aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_vhost/vhost_user
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_vhost/vhost_user')
-rw-r--r--lib/librte_vhost/vhost_user/fd_man.c289
-rw-r--r--lib/librte_vhost/vhost_user/fd_man.h67
-rw-r--r--lib/librte_vhost/vhost_user/vhost-net-user.c531
-rw-r--r--lib/librte_vhost/vhost_user/vhost-net-user.h117
-rw-r--r--lib/librte_vhost/vhost_user/virtio-net-user.c446
-rw-r--r--lib/librte_vhost/vhost_user/virtio-net-user.h64
6 files changed, 1514 insertions, 0 deletions
diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
new file mode 100644
index 00000000..087aaed6
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -0,0 +1,289 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include "fd_man.h"
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * If fd is -1, it means to search for a free entry.
+ * @return
+ * index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+ int i;
+
+ if (pfdset == NULL)
+ return -1;
+
+ for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++)
+ ;
+
+ return i == MAX_FDS ? -1 : i;
+}
+
+static int
+fdset_find_free_slot(struct fdset *pfdset)
+{
+ return fdset_find_fd(pfdset, -1);
+}
+
+static void
+fdset_add_fd(struct fdset *pfdset, int idx, int fd,
+ fd_cb rcb, fd_cb wcb, void *dat)
+{
+ struct fdentry *pfdentry;
+
+ if (pfdset == NULL || idx >= MAX_FDS)
+ return;
+
+ pfdentry = &pfdset->fd[idx];
+ pfdentry->fd = fd;
+ pfdentry->rcb = rcb;
+ pfdentry->wcb = wcb;
+ pfdentry->dat = dat;
+}
+
+/**
+ * Fill the read/write fd_set with the fds in the fdset.
+ * @return
+ * the maximum fds filled in the read/write fd_set.
+ */
+static int
+fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
+{
+ struct fdentry *pfdentry;
+ int i, maxfds = -1;
+ int num = MAX_FDS;
+
+ if (pfdset == NULL)
+ return -1;
+
+ for (i = 0; i < num; i++) {
+ pfdentry = &pfdset->fd[i];
+ if (pfdentry->fd != -1) {
+ int added = 0;
+ if (pfdentry->rcb && rfset) {
+ FD_SET(pfdentry->fd, rfset);
+ added = 1;
+ }
+ if (pfdentry->wcb && wfset) {
+ FD_SET(pfdentry->fd, wfset);
+ added = 1;
+ }
+ if (added)
+ maxfds = pfdentry->fd < maxfds ?
+ maxfds : pfdentry->fd;
+ }
+ }
+ return maxfds;
+}
+
+void
+fdset_init(struct fdset *pfdset)
+{
+ int i;
+
+ if (pfdset == NULL)
+ return;
+
+ for (i = 0; i < MAX_FDS; i++)
+ pfdset->fd[i].fd = -1;
+ pfdset->num = 0;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
+{
+ int i;
+
+ if (pfdset == NULL || fd == -1)
+ return -1;
+
+ pthread_mutex_lock(&pfdset->fd_mutex);
+
+ /* Find a free slot in the list. */
+ i = fdset_find_free_slot(pfdset);
+ if (i == -1) {
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+ return -2;
+ }
+
+ fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
+ pfdset->num++;
+
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+
+ return 0;
+}
+
+/**
+ * Unregister the fd from the fdset.
+ */
+void
+fdset_del(struct fdset *pfdset, int fd)
+{
+ int i;
+
+ if (pfdset == NULL || fd == -1)
+ return;
+
+ do {
+ pthread_mutex_lock(&pfdset->fd_mutex);
+
+ i = fdset_find_fd(pfdset, fd);
+ if (i != -1 && pfdset->fd[i].busy == 0) {
+ /* busy indicates r/wcb is executing! */
+ pfdset->fd[i].fd = -1;
+ pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+ pfdset->num--;
+ i = -1;
+ }
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+ } while (i != -1);
+}
+
+/**
+ * Unregister the fd at the specified slot from the fdset.
+ */
+static void
+fdset_del_slot(struct fdset *pfdset, int index)
+{
+ if (pfdset == NULL || index < 0 || index >= MAX_FDS)
+ return;
+
+ pthread_mutex_lock(&pfdset->fd_mutex);
+
+ pfdset->fd[index].fd = -1;
+ pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL;
+ pfdset->num--;
+
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+}
+
+/**
+ * This functions runs in infinite blocking loop until there is no fd in
+ * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ *
+ * Before the callback is called, we set the flag to busy status; If other
+ * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
+ * will wait until the flag is reset to zero(which indicates the callback is
+ * finished), then it could free the context after fdset_del.
+ */
+void
+fdset_event_dispatch(struct fdset *pfdset)
+{
+ fd_set rfds, wfds;
+ int i, maxfds;
+ struct fdentry *pfdentry;
+ int num = MAX_FDS;
+ fd_cb rcb, wcb;
+ void *dat;
+ int fd;
+ int remove1, remove2;
+ int ret;
+
+ if (pfdset == NULL)
+ return;
+
+ while (1) {
+ struct timeval tv;
+ tv.tv_sec = 1;
+ tv.tv_usec = 0;
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+ pthread_mutex_lock(&pfdset->fd_mutex);
+
+ maxfds = fdset_fill(&rfds, &wfds, pfdset);
+
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+
+ /*
+ * When select is blocked, other threads might unregister
+ * listenfds from and register new listenfds into fdset.
+ * When select returns, the entries for listenfds in the fdset
+ * might have been updated. It is ok if there is unwanted call
+ * for new listenfds.
+ */
+ ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv);
+ if (ret <= 0)
+ continue;
+
+ for (i = 0; i < num; i++) {
+ remove1 = remove2 = 0;
+ pthread_mutex_lock(&pfdset->fd_mutex);
+ pfdentry = &pfdset->fd[i];
+ fd = pfdentry->fd;
+ rcb = pfdentry->rcb;
+ wcb = pfdentry->wcb;
+ dat = pfdentry->dat;
+ pfdentry->busy = 1;
+ pthread_mutex_unlock(&pfdset->fd_mutex);
+ if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb)
+ rcb(fd, dat, &remove1);
+ if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb)
+ wcb(fd, dat, &remove2);
+ pfdentry->busy = 0;
+ /*
+ * fdset_del needs to check busy flag.
+ * We don't allow fdset_del to be called in callback
+ * directly.
+ */
+ /*
+ * When we are to clean up the fd from fdset,
+ * because the fd is closed in the cb,
+ * the old fd val could be reused by when creates new
+ * listen fd in another thread, we couldn't call
+ * fd_set_del.
+ */
+ if (remove1 || remove2)
+ fdset_del_slot(pfdset, i);
+ }
+ }
+}
diff --git a/lib/librte_vhost/vhost_user/fd_man.h b/lib/librte_vhost/vhost_user/fd_man.h
new file mode 100644
index 00000000..74ecde2c
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/fd_man.h
@@ -0,0 +1,67 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FD_MAN_H_
+#define _FD_MAN_H_
+#include <stdint.h>
+#include <pthread.h>
+
+#define MAX_FDS 1024
+
+typedef void (*fd_cb)(int fd, void *dat, int *remove);
+
+struct fdentry {
+ int fd; /* -1 indicates this entry is empty */
+ fd_cb rcb; /* callback when this fd is readable. */
+ fd_cb wcb; /* callback when this fd is writeable.*/
+ void *dat; /* fd context */
+ int busy; /* whether this entry is being used in cb. */
+};
+
+struct fdset {
+ struct fdentry fd[MAX_FDS];
+ pthread_mutex_t fd_mutex;
+ int num; /* current fd number of this fdset */
+};
+
+
+void fdset_init(struct fdset *pfdset);
+
+int fdset_add(struct fdset *pfdset, int fd,
+ fd_cb rcb, fd_cb wcb, void *dat);
+
+void fdset_del(struct fdset *pfdset, int fd);
+
+void fdset_event_dispatch(struct fdset *pfdset);
+
+#endif
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
new file mode 100644
index 00000000..df2bd648
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -0,0 +1,531 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include <rte_log.h>
+#include <rte_virtio_net.h>
+
+#include "fd_man.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+#include "virtio-net-user.h"
+
+#define MAX_VIRTIO_BACKLOG 128
+
+static void vserver_new_vq_conn(int fd, void *data, int *remove);
+static void vserver_message_handler(int fd, void *dat, int *remove);
+
+struct connfd_ctx {
+ struct vhost_server *vserver;
+ uint32_t fh;
+};
+
+#define MAX_VHOST_SERVER 1024
+struct _vhost_server {
+ struct vhost_server *server[MAX_VHOST_SERVER];
+ struct fdset fdset;
+ int vserver_cnt;
+ pthread_mutex_t server_mutex;
+};
+
+static struct _vhost_server g_vhost_server = {
+ .fdset = {
+ .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
+ .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
+ .num = 0
+ },
+ .vserver_cnt = 0,
+ .server_mutex = PTHREAD_MUTEX_INITIALIZER,
+};
+
+static const char *vhost_message_str[VHOST_USER_MAX] = {
+ [VHOST_USER_NONE] = "VHOST_USER_NONE",
+ [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+ [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+ [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+ [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+ [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+ [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+ [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+ [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+ [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+ [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+ [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+ [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+ [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+ [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
+ [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
+ [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
+ [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
+ [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
+ [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
+};
+
+/**
+ * Create a unix domain socket, bind to path and listen for connection.
+ * @return
+ * socket fd or -1 on failure
+ */
+static int
+uds_socket(const char *path)
+{
+ struct sockaddr_un un;
+ int sockfd;
+ int ret;
+
+ if (path == NULL)
+ return -1;
+
+ sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sockfd < 0)
+ return -1;
+ RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
+
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+ snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+ ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
+ if (ret == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n",
+ sockfd, path);
+ goto err;
+ }
+ RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
+
+ ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
+ if (ret == -1)
+ goto err;
+
+ return sockfd;
+
+err:
+ close(sockfd);
+ return -1;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+ struct iovec iov;
+ struct msghdr msgh;
+ size_t fdsize = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fdsize)];
+ struct cmsghdr *cmsg;
+ int ret;
+
+ memset(&msgh, 0, sizeof(msgh));
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ ret = recvmsg(sockfd, &msgh, 0);
+ if (ret <= 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
+ return ret;
+ }
+
+ if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
+ return -1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+ if ((cmsg->cmsg_level == SOL_SOCKET) &&
+ (cmsg->cmsg_type == SCM_RIGHTS)) {
+ memcpy(fds, CMSG_DATA(cmsg), fdsize);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+ msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+ if (ret <= 0)
+ return ret;
+
+ if (msg && msg->size) {
+ if (msg->size > sizeof(msg->payload)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "invalid msg size: %d\n", msg->size);
+ return -1;
+ }
+ ret = read(sockfd, &msg->payload, msg->size);
+ if (ret <= 0)
+ return ret;
+ if (ret != (int)msg->size) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "read control message failed\n");
+ return -1;
+ }
+ }
+
+ return ret;
+}
+
+static int
+send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+ struct iovec iov;
+ struct msghdr msgh;
+ size_t fdsize = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fdsize)];
+ struct cmsghdr *cmsg;
+ int ret;
+
+ memset(&msgh, 0, sizeof(msgh));
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+
+ if (fds && fd_num > 0) {
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_len = CMSG_LEN(fdsize);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), fds, fdsize);
+ } else {
+ msgh.msg_control = NULL;
+ msgh.msg_controllen = 0;
+ }
+
+ do {
+ ret = sendmsg(sockfd, &msgh, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ if (!msg)
+ return 0;
+
+ msg->flags &= ~VHOST_USER_VERSION_MASK;
+ msg->flags |= VHOST_USER_VERSION;
+ msg->flags |= VHOST_USER_REPLY_MASK;
+
+ ret = send_fd_message(sockfd, (char *)msg,
+ VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+
+ return ret;
+}
+
+/* call back when there is new virtio connection. */
+static void
+vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
+{
+ struct vhost_server *vserver = (struct vhost_server *)dat;
+ int conn_fd;
+ struct connfd_ctx *ctx;
+ int fh;
+ struct vhost_device_ctx vdev_ctx = { (pid_t)0, 0 };
+ unsigned int size;
+
+ conn_fd = accept(fd, NULL, NULL);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "new virtio connection is %d\n", conn_fd);
+ if (conn_fd < 0)
+ return;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (ctx == NULL) {
+ close(conn_fd);
+ return;
+ }
+
+ fh = vhost_new_device(vdev_ctx);
+ if (fh == -1) {
+ free(ctx);
+ close(conn_fd);
+ return;
+ }
+
+ vdev_ctx.fh = fh;
+ size = strnlen(vserver->path, PATH_MAX);
+ vhost_set_ifname(vdev_ctx, vserver->path,
+ size);
+
+ RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
+
+ ctx->vserver = vserver;
+ ctx->fh = fh;
+ fdset_add(&g_vhost_server.fdset,
+ conn_fd, vserver_message_handler, NULL, ctx);
+}
+
+/* callback when there is message on the connfd */
+static void
+vserver_message_handler(int connfd, void *dat, int *remove)
+{
+ struct vhost_device_ctx ctx;
+ struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat;
+ struct VhostUserMsg msg;
+ uint64_t features;
+ int ret;
+
+ ctx.fh = cfd_ctx->fh;
+ ret = read_vhost_message(connfd, &msg);
+ if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+ if (ret < 0)
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost read message failed\n");
+ else if (ret == 0)
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vhost peer closed\n");
+ else
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost read incorrect message\n");
+
+ close(connfd);
+ *remove = 1;
+ free(cfd_ctx);
+ vhost_destroy_device(ctx);
+
+ return;
+ }
+
+ RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+ vhost_message_str[msg.request]);
+ switch (msg.request) {
+ case VHOST_USER_GET_FEATURES:
+ ret = vhost_get_features(ctx, &features);
+ msg.payload.u64 = features;
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
+ case VHOST_USER_SET_FEATURES:
+ features = msg.payload.u64;
+ vhost_set_features(ctx, &features);
+ break;
+
+ case VHOST_USER_GET_PROTOCOL_FEATURES:
+ msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
+ case VHOST_USER_SET_PROTOCOL_FEATURES:
+ user_set_protocol_features(ctx, msg.payload.u64);
+ break;
+
+ case VHOST_USER_SET_OWNER:
+ vhost_set_owner(ctx);
+ break;
+ case VHOST_USER_RESET_OWNER:
+ vhost_reset_owner(ctx);
+ break;
+
+ case VHOST_USER_SET_MEM_TABLE:
+ user_set_mem_table(ctx, &msg);
+ break;
+
+ case VHOST_USER_SET_LOG_BASE:
+ user_set_log_base(ctx, &msg);
+
+ /* it needs a reply */
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
+ case VHOST_USER_SET_LOG_FD:
+ close(msg.fds[0]);
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+ break;
+
+ case VHOST_USER_SET_VRING_NUM:
+ vhost_set_vring_num(ctx, &msg.payload.state);
+ break;
+ case VHOST_USER_SET_VRING_ADDR:
+ vhost_set_vring_addr(ctx, &msg.payload.addr);
+ break;
+ case VHOST_USER_SET_VRING_BASE:
+ vhost_set_vring_base(ctx, &msg.payload.state);
+ break;
+
+ case VHOST_USER_GET_VRING_BASE:
+ ret = user_get_vring_base(ctx, &msg.payload.state);
+ msg.size = sizeof(msg.payload.state);
+ send_vhost_message(connfd, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_KICK:
+ user_set_vring_kick(ctx, &msg);
+ break;
+ case VHOST_USER_SET_VRING_CALL:
+ user_set_vring_call(ctx, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_ERR:
+ if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+ close(msg.fds[0]);
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+ break;
+
+ case VHOST_USER_GET_QUEUE_NUM:
+ msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_ENABLE:
+ user_set_vring_enable(ctx, &msg.payload.state);
+ break;
+ case VHOST_USER_SEND_RARP:
+ user_send_rarp(ctx, &msg);
+ break;
+
+ default:
+ break;
+
+ }
+}
+
+/**
+ * Creates and initialise the vhost server.
+ */
+int
+rte_vhost_driver_register(const char *path)
+{
+ struct vhost_server *vserver;
+
+ pthread_mutex_lock(&g_vhost_server.server_mutex);
+
+ if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "error: the number of servers reaches maximum\n");
+ pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ return -1;
+ }
+
+ vserver = calloc(sizeof(struct vhost_server), 1);
+ if (vserver == NULL) {
+ pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ return -1;
+ }
+
+ vserver->listenfd = uds_socket(path);
+ if (vserver->listenfd < 0) {
+ free(vserver);
+ pthread_mutex_unlock(&g_vhost_server.server_mutex);
+ return -1;
+ }
+
+ vserver->path = strdup(path);
+
+ fdset_add(&g_vhost_server.fdset, vserver->listenfd,
+ vserver_new_vq_conn, NULL, vserver);
+
+ g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
+ pthread_mutex_unlock(&g_vhost_server.server_mutex);
+
+ return 0;
+}
+
+
+/**
+ * Unregister the specified vhost server
+ */
+int
+rte_vhost_driver_unregister(const char *path)
+{
+ int i;
+ int count;
+
+ pthread_mutex_lock(&g_vhost_server.server_mutex);
+
+ for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
+ if (!strcmp(g_vhost_server.server[i]->path, path)) {
+ fdset_del(&g_vhost_server.fdset,
+ g_vhost_server.server[i]->listenfd);
+
+ close(g_vhost_server.server[i]->listenfd);
+ free(g_vhost_server.server[i]->path);
+ free(g_vhost_server.server[i]);
+
+ unlink(path);
+
+ count = --g_vhost_server.vserver_cnt;
+ g_vhost_server.server[i] = g_vhost_server.server[count];
+ g_vhost_server.server[count] = NULL;
+ pthread_mutex_unlock(&g_vhost_server.server_mutex);
+
+ return 0;
+ }
+ }
+ pthread_mutex_unlock(&g_vhost_server.server_mutex);
+
+ return -1;
+}
+
+int
+rte_vhost_driver_session_start(void)
+{
+ fdset_event_dispatch(&g_vhost_server.fdset);
+ return 0;
+}
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
new file mode 100644
index 00000000..e3bb4138
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -0,0 +1,117 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "rte_virtio_net.h"
+#include "fd_man.h"
+
+struct vhost_server {
+ char *path; /**< The path the uds is bind to. */
+ int listenfd; /**< The listener sockfd. */
+};
+
+/* refer to hw/virtio/vhost-user.c */
+
+typedef enum VhostUserRequest {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+ VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
+ VHOST_USER_SET_VRING_ENABLE = 18,
+ VHOST_USER_SEND_RARP = 19,
+ VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+ uint64_t guest_phys_addr;
+ uint64_t memory_size;
+ uint64_t userspace_addr;
+ uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+ uint32_t nregions;
+ uint32_t padding;
+ VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserLog {
+ uint64_t mmap_size;
+ uint64_t mmap_offset;
+} VhostUserLog;
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK 0x3
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+ union {
+#define VHOST_USER_VRING_IDX_MASK 0xff
+#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ VhostUserMemory memory;
+ VhostUserLog log;
+ } payload;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION 0x1
+
+/*****************************************************************************/
+#endif
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
new file mode 100644
index 00000000..f5248bc4
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -0,0 +1,446 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+
+#include "virtio-net.h"
+#include "virtio-net-user.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+
+struct orig_region_map {
+ int fd;
+ uint64_t mapped_address;
+ uint64_t mapped_size;
+ uint64_t blksz;
+};
+
+#define orig_region(ptr, nregions) \
+ ((struct orig_region_map *)RTE_PTR_ADD((ptr), \
+ sizeof(struct virtio_memory) + \
+ sizeof(struct virtio_memory_regions) * (nregions)))
+
+static uint64_t
+get_blk_size(int fd)
+{
+ struct stat stat;
+
+ fstat(fd, &stat);
+ return (uint64_t)stat.st_blksize;
+}
+
+static void
+free_mem_region(struct virtio_net *dev)
+{
+ struct orig_region_map *region;
+ unsigned int idx;
+
+ if (!dev || !dev->mem)
+ return;
+
+ region = orig_region(dev->mem, dev->mem->nregions);
+ for (idx = 0; idx < dev->mem->nregions; idx++) {
+ if (region[idx].mapped_address) {
+ munmap((void *)(uintptr_t)region[idx].mapped_address,
+ region[idx].mapped_size);
+ close(region[idx].fd);
+ }
+ }
+}
+
+void
+vhost_backend_cleanup(struct virtio_net *dev)
+{
+ if (dev->mem) {
+ free_mem_region(dev);
+ free(dev->mem);
+ dev->mem = NULL;
+ }
+}
+
+int
+user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+ struct VhostUserMemory memory = pmsg->payload.memory;
+ struct virtio_memory_regions *pregion;
+ uint64_t mapped_address, mapped_size;
+ struct virtio_net *dev;
+ unsigned int idx = 0;
+ struct orig_region_map *pregion_orig;
+ uint64_t alignment;
+
+ /* unmap old memory regions one by one*/
+ dev = get_device(ctx);
+ if (dev == NULL)
+ return -1;
+
+ /* Remove from the data plane. */
+ if (dev->flags & VIRTIO_DEV_RUNNING)
+ notify_ops->destroy_device(dev);
+
+ if (dev->mem) {
+ free_mem_region(dev);
+ free(dev->mem);
+ dev->mem = NULL;
+ }
+
+ dev->mem = calloc(1,
+ sizeof(struct virtio_memory) +
+ sizeof(struct virtio_memory_regions) * memory.nregions +
+ sizeof(struct orig_region_map) * memory.nregions);
+ if (dev->mem == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") Failed to allocate memory for dev->mem\n",
+ dev->device_fh);
+ return -1;
+ }
+ dev->mem->nregions = memory.nregions;
+
+ pregion_orig = orig_region(dev->mem, memory.nregions);
+ for (idx = 0; idx < memory.nregions; idx++) {
+ pregion = &dev->mem->regions[idx];
+ pregion->guest_phys_address =
+ memory.regions[idx].guest_phys_addr;
+ pregion->guest_phys_address_end =
+ memory.regions[idx].guest_phys_addr +
+ memory.regions[idx].memory_size;
+ pregion->memory_size =
+ memory.regions[idx].memory_size;
+ pregion->userspace_address =
+ memory.regions[idx].userspace_addr;
+
+ /* This is ugly */
+ mapped_size = memory.regions[idx].memory_size +
+ memory.regions[idx].mmap_offset;
+
+ /* mmap() without flag of MAP_ANONYMOUS, should be called
+ * with length argument aligned with hugepagesz at older
+ * longterm version Linux, like 2.6.32 and 3.2.72, or
+ * mmap() will fail with EINVAL.
+ *
+ * to avoid failure, make sure in caller to keep length
+ * aligned.
+ */
+ alignment = get_blk_size(pmsg->fds[idx]);
+ mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
+
+ mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
+ mapped_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ pmsg->fds[idx],
+ 0);
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
+ "off:0x%"PRIx64" align:0x%"PRIx64"\n",
+ idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
+ mapped_size, memory.regions[idx].mmap_offset,
+ alignment);
+
+ if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "mmap qemu guest failed.\n");
+ goto err_mmap;
+ }
+
+ pregion_orig[idx].mapped_address = mapped_address;
+ pregion_orig[idx].mapped_size = mapped_size;
+ pregion_orig[idx].blksz = alignment;
+ pregion_orig[idx].fd = pmsg->fds[idx];
+
+ mapped_address += memory.regions[idx].mmap_offset;
+
+ pregion->address_offset = mapped_address -
+ pregion->guest_phys_address;
+
+ if (memory.regions[idx].guest_phys_addr == 0) {
+ dev->mem->base_address =
+ memory.regions[idx].userspace_addr;
+ dev->mem->mapped_address =
+ pregion->address_offset;
+ }
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
+ idx,
+ (void *)(uintptr_t)pregion->guest_phys_address,
+ (void *)(uintptr_t)pregion->userspace_address,
+ pregion->memory_size);
+ }
+
+ return 0;
+
+err_mmap:
+ while (idx--) {
+ munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address,
+ pregion_orig[idx].mapped_size);
+ close(pregion_orig[idx].fd);
+ }
+ free(dev->mem);
+ dev->mem = NULL;
+ return -1;
+}
+
+static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+ return vq && vq->desc &&
+ vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
+ vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
+}
+
+static int
+virtio_is_ready(struct virtio_net *dev)
+{
+ struct vhost_virtqueue *rvq, *tvq;
+ uint32_t i;
+
+ for (i = 0; i < dev->virt_qp_nb; i++) {
+ rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+ tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+ if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "virtio is not ready for processing.\n");
+ return 0;
+ }
+ }
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "virtio is now ready for processing.\n");
+ return 1;
+}
+
+void
+user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+ struct vhost_vring_file file;
+
+ file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+ file.fd = VIRTIO_INVALID_EVENTFD;
+ else
+ file.fd = pmsg->fds[0];
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring call idx:%d file:%d\n", file.index, file.fd);
+ vhost_set_vring_call(ctx, &file);
+}
+
+
+/*
+ * In vhost-user, when we receive kick message, will test whether virtio
+ * device is ready for packet processing.
+ */
+void
+user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+ struct vhost_vring_file file;
+ struct virtio_net *dev = get_device(ctx);
+
+ file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+ file.fd = VIRTIO_INVALID_EVENTFD;
+ else
+ file.fd = pmsg->fds[0];
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring kick idx:%d file:%d\n", file.index, file.fd);
+ vhost_set_vring_kick(ctx, &file);
+
+ if (virtio_is_ready(dev) &&
+ !(dev->flags & VIRTIO_DEV_RUNNING))
+ notify_ops->new_device(dev);
+}
+
+/*
+ * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
+ */
+int
+user_get_vring_base(struct vhost_device_ctx ctx,
+ struct vhost_vring_state *state)
+{
+ struct virtio_net *dev = get_device(ctx);
+
+ if (dev == NULL)
+ return -1;
+ /* We have to stop the queue (virtio) if it is running. */
+ if (dev->flags & VIRTIO_DEV_RUNNING)
+ notify_ops->destroy_device(dev);
+
+ /* Here we are safe to get the last used index */
+ vhost_get_vring_base(ctx, state->index, state);
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring base idx:%d file:%d\n", state->index, state->num);
+ /*
+ * Based on current qemu vhost-user implementation, this message is
+ * sent and only sent in vhost_vring_stop.
+ * TODO: cleanup the vring, it isn't usable since here.
+ */
+ if (dev->virtqueue[state->index]->kickfd >= 0)
+ close(dev->virtqueue[state->index]->kickfd);
+
+ dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+ return 0;
+}
+
+/*
+ * when virtio queues are ready to work, qemu will send us to
+ * enable the virtio queue pair.
+ */
+int
+user_set_vring_enable(struct vhost_device_ctx ctx,
+ struct vhost_vring_state *state)
+{
+ struct virtio_net *dev = get_device(ctx);
+ int enable = (int)state->num;
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set queue enable: %d to qp idx: %d\n",
+ enable, state->index);
+
+ if (notify_ops->vring_state_changed) {
+ notify_ops->vring_state_changed(dev, state->index, enable);
+ }
+
+ dev->virtqueue[state->index]->enabled = enable;
+
+ return 0;
+}
+
+void
+user_set_protocol_features(struct vhost_device_ctx ctx,
+ uint64_t protocol_features)
+{
+ struct virtio_net *dev;
+
+ dev = get_device(ctx);
+ if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
+ return;
+
+ dev->protocol_features = protocol_features;
+}
+
+int
+user_set_log_base(struct vhost_device_ctx ctx,
+ struct VhostUserMsg *msg)
+{
+ struct virtio_net *dev;
+ int fd = msg->fds[0];
+ uint64_t size, off;
+ void *addr;
+
+ dev = get_device(ctx);
+ if (!dev)
+ return -1;
+
+ if (fd < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
+ return -1;
+ }
+
+ if (msg->size != sizeof(VhostUserLog)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "invalid log base msg size: %"PRId32" != %d\n",
+ msg->size, (int)sizeof(VhostUserLog));
+ return -1;
+ }
+
+ size = msg->payload.log.mmap_size;
+ off = msg->payload.log.mmap_offset;
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "log mmap size: %"PRId64", offset: %"PRId64"\n",
+ size, off);
+
+ /*
+ * mmap from 0 to workaround a hugepage mmap bug: mmap will
+ * fail when offset is not page size aligned.
+ */
+ addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
+ return -1;
+ }
+
+ /* TODO: unmap on stop */
+ dev->log_base = (uint64_t)(uintptr_t)addr + off;
+ dev->log_size = size;
+
+ return 0;
+}
+
+/*
+ * An rarp packet is constructed and broadcasted to notify switches about
+ * the new location of the migrated VM, so that packets from outside will
+ * not be lost after migration.
+ *
+ * However, we don't actually "send" a rarp packet here, instead, we set
+ * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
+ */
+int
+user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *msg)
+{
+ struct virtio_net *dev;
+ uint8_t *mac = (uint8_t *)&msg->payload.u64;
+
+ dev = get_device(ctx);
+ if (!dev)
+ return -1;
+
+ RTE_LOG(DEBUG, VHOST_CONFIG,
+ ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+ memcpy(dev->mac.addr_bytes, mac, 6);
+
+ /*
+ * Set the flag to inject a RARP broadcast packet at
+ * rte_vhost_dequeue_burst().
+ *
+ * rte_smp_wmb() is for making sure the mac is copied
+ * before the flag is set.
+ */
+ rte_smp_wmb();
+ rte_atomic16_set(&dev->broadcast_rarp, 1);
+
+ return 0;
+}
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
new file mode 100644
index 00000000..cefec162
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -0,0 +1,64 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_USER_H
+#define _VIRTIO_NET_USER_H
+
+#include "vhost-net.h"
+#include "vhost-net-user.h"
+
+#define VHOST_USER_PROTOCOL_F_MQ 0
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
+#define VHOST_USER_PROTOCOL_F_RARP 2
+
+#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+ (1ULL << VHOST_USER_PROTOCOL_F_RARP))
+
+int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
+
+void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
+
+void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
+
+void user_set_protocol_features(struct vhost_device_ctx ctx,
+ uint64_t protocol_features);
+int user_set_log_base(struct vhost_device_ctx ctx, struct VhostUserMsg *);
+int user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *);
+
+int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
+
+int user_set_vring_enable(struct vhost_device_ctx ctx,
+ struct vhost_vring_state *state);
+
+#endif