diff options
Diffstat (limited to 'lib/librte_vhost')
-rw-r--r-- | lib/librte_vhost/Makefile | 13 | ||||
-rw-r--r-- | lib/librte_vhost/fd_man.c | 98 | ||||
-rw-r--r-- | lib/librte_vhost/fd_man.h | 17 | ||||
-rw-r--r-- | lib/librte_vhost/meson.build | 11 | ||||
-rw-r--r-- | lib/librte_vhost/rte_vdpa.h | 87 | ||||
-rw-r--r-- | lib/librte_vhost/rte_vhost.h | 158 | ||||
-rw-r--r-- | lib/librte_vhost/rte_vhost_crypto.h | 109 | ||||
-rw-r--r-- | lib/librte_vhost/rte_vhost_version.map | 24 | ||||
-rw-r--r-- | lib/librte_vhost/socket.c | 282 | ||||
-rw-r--r-- | lib/librte_vhost/vdpa.c | 115 | ||||
-rw-r--r-- | lib/librte_vhost/vhost.c | 134 | ||||
-rw-r--r-- | lib/librte_vhost/vhost.h | 211 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_crypto.c | 1374 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user.c | 246 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user.h | 45 | ||||
-rw-r--r-- | lib/librte_vhost/virtio_crypto.h | 422 | ||||
-rw-r--r-- | lib/librte_vhost/virtio_net.c | 59 |
17 files changed, 3259 insertions, 146 deletions
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile index 5d6c6aba..de431fbb 100644 --- a/lib/librte_vhost/Makefile +++ b/lib/librte_vhost/Makefile @@ -18,13 +18,20 @@ LDLIBS += -lpthread ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y) LDLIBS += -lnuma endif -LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net +LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net \ + -lrte_cryptodev -lrte_hash # all source are stored in SRCS-y SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \ - vhost_user.c virtio_net.c + vhost_user.c virtio_net.c vdpa.c # install includes -SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h +SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h + +# only compile vhost crypto when cryptodev is enabled +ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y) +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_crypto.c +SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost_crypto.h +endif include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c index 181711c2..38347ab1 100644 --- a/lib/librte_vhost/fd_man.c +++ b/lib/librte_vhost/fd_man.c @@ -16,6 +16,9 @@ #include "fd_man.h" + +#define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1 + #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL) static int @@ -171,6 +174,38 @@ fdset_del(struct fdset *pfdset, int fd) return dat; } +/** + * Unregister the fd from the fdset. + * + * If parameters are invalid, return directly -2. + * And check whether fd is busy, if yes, return -1. + * Otherwise, try to delete the fd from fdset and + * return true. + */ +int +fdset_try_del(struct fdset *pfdset, int fd) +{ + int i; + + if (pfdset == NULL || fd == -1) + return -2; + + pthread_mutex_lock(&pfdset->fd_mutex); + i = fdset_find_fd(pfdset, fd); + if (i != -1 && pfdset->fd[i].busy) { + pthread_mutex_unlock(&pfdset->fd_mutex); + return -1; + } + + if (i != -1) { + pfdset->fd[i].fd = -1; + pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; + pfdset->fd[i].dat = NULL; + } + + pthread_mutex_unlock(&pfdset->fd_mutex); + return 0; +} /** * This functions runs in infinite blocking loop until there is no fd in @@ -258,7 +293,7 @@ fdset_event_dispatch(void *arg) * because the fd is closed in the cb, * the old fd val could be reused by when creates new * listen fd in another thread, we couldn't call - * fd_set_del. + * fdset_del. */ if (remove1 || remove2) { pfdentry->fd = -1; @@ -272,3 +307,64 @@ fdset_event_dispatch(void *arg) return NULL; } + +static void +fdset_pipe_read_cb(int readfd, void *dat __rte_unused, + int *remove __rte_unused) +{ + char charbuf[16]; + int r = read(readfd, charbuf, sizeof(charbuf)); + /* + * Just an optimization, we don't care if read() failed + * so ignore explicitly its return value to make the + * compiler happy + */ + RTE_SET_USED(r); +} + +void +fdset_pipe_uninit(struct fdset *fdset) +{ + fdset_del(fdset, fdset->u.readfd); + close(fdset->u.readfd); + close(fdset->u.writefd); +} + +int +fdset_pipe_init(struct fdset *fdset) +{ + int ret; + + if (pipe(fdset->u.pipefd) < 0) { + RTE_LOG(ERR, VHOST_FDMAN, + "failed to create pipe for vhost fdset\n"); + return -1; + } + + ret = fdset_add(fdset, fdset->u.readfd, + fdset_pipe_read_cb, NULL, NULL); + + if (ret < 0) { + RTE_LOG(ERR, VHOST_FDMAN, + "failed to add pipe readfd %d into vhost server fdset\n", + fdset->u.readfd); + + fdset_pipe_uninit(fdset); + return -1; + } + + return 0; +} + +void +fdset_pipe_notify(struct fdset *fdset) +{ + int r = write(fdset->u.writefd, "1", 1); + /* + * Just an optimization, we don't care if write() failed + * so ignore explicitly its return value to make the + * compiler happy + */ + RTE_SET_USED(r); + +} diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h index 3a9276c3..3331bcd9 100644 --- a/lib/librte_vhost/fd_man.h +++ b/lib/librte_vhost/fd_man.h @@ -25,6 +25,16 @@ struct fdset { struct fdentry fd[MAX_FDS]; pthread_mutex_t fd_mutex; int num; /* current fd number of this fdset */ + + union pipefds { + struct { + int pipefd[2]; + }; + struct { + int readfd; + int writefd; + }; + } u; }; @@ -34,7 +44,14 @@ int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat); void *fdset_del(struct fdset *pfdset, int fd); +int fdset_try_del(struct fdset *pfdset, int fd); void *fdset_event_dispatch(void *arg); +int fdset_pipe_init(struct fdset *fdset); + +void fdset_pipe_uninit(struct fdset *fdset); + +void fdset_pipe_notify(struct fdset *fdset); + #endif diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build index 9e8c0e76..bd62e0e3 100644 --- a/lib/librte_vhost/meson.build +++ b/lib/librte_vhost/meson.build @@ -1,5 +1,5 @@ # SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2017 Intel Corporation +# Copyright(c) 2017-2018 Intel Corporation if host_machine.system() != 'linux' build = false @@ -9,7 +9,8 @@ if has_libnuma == 1 endif version = 4 allow_experimental_apis = true -sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vhost.c', 'vhost_user.c', - 'virtio_net.c') -headers = files('rte_vhost.h') -deps += ['ethdev'] +sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vdpa.c', + 'vhost.c', 'vhost_user.c', + 'virtio_net.c', 'vhost_crypto.c') +headers = files('rte_vhost.h', 'rte_vdpa.h', 'rte_vhost_crypto.h') +deps += ['ethdev', 'cryptodev', 'hash', 'pci'] diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h new file mode 100644 index 00000000..90465ca2 --- /dev/null +++ b/lib/librte_vhost/rte_vdpa.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _RTE_VDPA_H_ +#define _RTE_VDPA_H_ + +/** + * @file + * + * Device specific vhost lib + */ + +#include <rte_pci.h> +#include "rte_vhost.h" + +#define MAX_VDPA_NAME_LEN 128 + +enum vdpa_addr_type { + PCI_ADDR, + VDPA_ADDR_MAX +}; + +struct rte_vdpa_dev_addr { + enum vdpa_addr_type type; + union { + uint8_t __dummy[64]; + struct rte_pci_addr pci_addr; + }; +}; + +struct rte_vdpa_dev_ops { + /* Get capabilities of this device */ + int (*get_queue_num)(int did, uint32_t *queue_num); + int (*get_features)(int did, uint64_t *features); + int (*get_protocol_features)(int did, uint64_t *protocol_features); + + /* Driver configure/close the device */ + int (*dev_conf)(int vid); + int (*dev_close)(int vid); + + /* Enable/disable this vring */ + int (*set_vring_state)(int vid, int vring, int state); + + /* Set features when changed */ + int (*set_features)(int vid); + + /* Destination operations when migration done */ + int (*migration_done)(int vid); + + /* Get the vfio group fd */ + int (*get_vfio_group_fd)(int vid); + + /* Get the vfio device fd */ + int (*get_vfio_device_fd)(int vid); + + /* Get the notify area info of the queue */ + int (*get_notify_area)(int vid, int qid, + uint64_t *offset, uint64_t *size); + + /* Reserved for future extension */ + void *reserved[5]; +}; + +struct rte_vdpa_device { + struct rte_vdpa_dev_addr addr; + struct rte_vdpa_dev_ops *ops; +} __rte_cache_aligned; + +/* Register a vdpa device, return did if successful, -1 on failure */ +int __rte_experimental +rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr, + struct rte_vdpa_dev_ops *ops); + +/* Unregister a vdpa device, return -1 on failure */ +int __rte_experimental +rte_vdpa_unregister_device(int did); + +/* Find did of a vdpa device, return -1 on failure */ +int __rte_experimental +rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr); + +/* Find a vdpa device based on did */ +struct rte_vdpa_device * __rte_experimental +rte_vdpa_get_device(int did); + +#endif /* _RTE_VDPA_H_ */ diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index 1179f97d..7f0cb9bc 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -29,6 +29,40 @@ extern "C" { #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2) #define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3) +/** Protocol features. */ +#ifndef VHOST_USER_PROTOCOL_F_MQ +#define VHOST_USER_PROTOCOL_F_MQ 0 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_LOG_SHMFD +#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_RARP +#define VHOST_USER_PROTOCOL_F_RARP 2 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK +#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_NET_MTU +#define VHOST_USER_PROTOCOL_F_NET_MTU 4 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_SLAVE_REQ +#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 +#endif + +#ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION +#define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7 +#endif + +/** Indicate whether protocol features negotiation is supported. */ +#ifndef VHOST_USER_F_PROTOCOL_FEATURES +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#endif + /** * Information relating to memory regions including offsets to * addresses in QEMUs memory file. @@ -216,6 +250,41 @@ int rte_vhost_driver_register(const char *path, uint64_t flags); int rte_vhost_driver_unregister(const char *path); /** + * Set the vdpa device id, enforce single connection per socket + * + * @param path + * The vhost-user socket file path + * @param did + * Device id + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_driver_attach_vdpa_device(const char *path, int did); + +/** + * Unset the vdpa device id + * + * @param path + * The vhost-user socket file path + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_driver_detach_vdpa_device(const char *path); + +/** + * Get the device id + * + * @param path + * The vhost-user socket file path + * @return + * Device id, -1 on failure + */ +int __rte_experimental +rte_vhost_driver_get_vdpa_device_id(const char *path); + +/** * Set the feature bits the vhost-user driver supports. * * @param path @@ -271,6 +340,33 @@ int rte_vhost_driver_disable_features(const char *path, uint64_t features); int rte_vhost_driver_get_features(const char *path, uint64_t *features); /** + * Get the protocol feature bits before feature negotiation. + * + * @param path + * The vhost-user socket file path + * @param protocol_features + * A pointer to store the queried protocol feature bits + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_driver_get_protocol_features(const char *path, + uint64_t *protocol_features); + +/** + * Get the queue number bits before feature negotiation. + * + * @param path + * The vhost-user socket file path + * @param queue_num + * A pointer to store the queried queue number bits + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num); + +/** * Get the feature bits after negotiation * * @param vid @@ -480,6 +576,68 @@ int rte_vhost_vring_call(int vid, uint16_t vring_idx); */ uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid); +/** + * Get log base and log size of the vhost device + * + * @param vid + * vhost device ID + * @param log_base + * vhost log base + * @param log_size + * vhost log size + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size); + +/** + * Get last_avail/used_idx of the vhost virtqueue + * + * @param vid + * vhost device ID + * @param queue_id + * vhost queue index + * @param last_avail_idx + * vhost last_avail_idx to get + * @param last_used_idx + * vhost last_used_idx to get + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_get_vring_base(int vid, uint16_t queue_id, + uint16_t *last_avail_idx, uint16_t *last_used_idx); + +/** + * Set last_avail/used_idx of the vhost virtqueue + * + * @param vid + * vhost device ID + * @param queue_id + * vhost queue index + * @param last_avail_idx + * last_avail_idx to set + * @param last_used_idx + * last_used_idx to set + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_set_vring_base(int vid, uint16_t queue_id, + uint16_t last_avail_idx, uint16_t last_used_idx); + +/** + * Get vdpa device id for vhost device. + * + * @param vid + * vhost device id + * @return + * device id + */ +int __rte_experimental +rte_vhost_get_vdpa_device_id(int vid); + #ifdef __cplusplus } #endif diff --git a/lib/librte_vhost/rte_vhost_crypto.h b/lib/librte_vhost/rte_vhost_crypto.h new file mode 100644 index 00000000..f9fbc054 --- /dev/null +++ b/lib/librte_vhost/rte_vhost_crypto.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ + +#ifndef _VHOST_CRYPTO_H_ +#define _VHOST_CRYPTO_H_ + +#define VHOST_CRYPTO_MBUF_POOL_SIZE (8192) +#define VHOST_CRYPTO_MAX_BURST_SIZE (64) +#define VHOST_CRYPTO_SESSION_MAP_ENTRIES (1024) /**< Max nb sessions */ +/** max nb virtual queues in a burst for finalizing*/ +#define VIRTIO_CRYPTO_MAX_NUM_BURST_VQS (64) + +enum rte_vhost_crypto_zero_copy { + RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE = 0, + RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE = 1, + RTE_VHOST_CRYPTO_MAX_ZERO_COPY_OPTIONS +}; + +/** + * Create Vhost-crypto instance + * + * @param vid + * The identifier of the vhost device. + * @param cryptodev_id + * The identifier of DPDK Cryptodev, the same cryptodev_id can be assigned to + * multiple Vhost-crypto devices. + * @param sess_pool + * The pointer to the created cryptodev session pool with the private data size + * matches the target DPDK Cryptodev. + * @param socket_id + * NUMA Socket ID to allocate resources on. * + * @return + * 0 if the Vhost Crypto Instance is created successfully. + * Negative integer if otherwise + */ +int __rte_experimental +rte_vhost_crypto_create(int vid, uint8_t cryptodev_id, + struct rte_mempool *sess_pool, int socket_id); + +/** + * Free the Vhost-crypto instance + * + * @param vid + * The identifier of the vhost device. + * @return + * 0 if the Vhost Crypto Instance is created successfully. + * Negative integer if otherwise. + */ +int __rte_experimental +rte_vhost_crypto_free(int vid); + +/** + * Enable or disable zero copy feature + * + * @param vid + * The identifier of the vhost device. + * @param option + * Flag of zero copy feature. + * @return + * 0 if completed successfully. + * Negative integer if otherwise. + */ +int __rte_experimental +rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy option); + +/** + * Fetch a number of vring descriptors from virt-queue and translate to DPDK + * crypto operations. After this function is executed, the user can enqueue + * the processed ops to the target cryptodev. + * + * @param vid + * The identifier of the vhost device. + * @param qid + * Virtio queue index. + * @param ops + * The address of an array of pointers to *rte_crypto_op* structures that must + * be large enough to store *nb_ops* pointers in it. + * @param nb_ops + * The maximum number of operations to be fetched and translated. + * @return + * The number of fetched and processed vhost crypto request operations. + */ +uint16_t __rte_experimental +rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, + struct rte_crypto_op **ops, uint16_t nb_ops); +/** + * Finalize the dequeued crypto ops. After the translated crypto ops are + * dequeued from the cryptodev, this function shall be called to write the + * processed data back to the vring descriptor (if no-copy is turned off). + * + * @param ops + * The address of an array of *rte_crypto_op* structure that was dequeued + * from cryptodev. + * @param nb_ops + * The number of operations contained in the array. + * @callfds + * The callfd number(s) contained in this burst, this shall be an array with + * no less than VIRTIO_CRYPTO_MAX_NUM_BURST_VQS elements. + * @nb_callfds + * The number of call_fd numbers exist in the callfds. + * @return + * The number of ops processed. + */ +uint16_t __rte_experimental +rte_vhost_crypto_finalize_requests(struct rte_crypto_op **ops, + uint16_t nb_ops, int *callfds, uint16_t *nb_callfds); + +#endif /**< _VHOST_CRYPTO_H_ */ diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index dc6f5165..da220dd0 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -60,8 +60,26 @@ DPDK_18.02 { } DPDK_17.08; -DPDK_18.02.1 { - global; +EXPERIMENTAL { + global: + rte_vdpa_register_device; + rte_vdpa_unregister_device; + rte_vdpa_find_device_id; + rte_vdpa_get_device; + rte_vhost_driver_attach_vdpa_device; + rte_vhost_driver_detach_vdpa_device; + rte_vhost_driver_get_vdpa_device_id; + rte_vhost_get_vdpa_device_id; + rte_vhost_driver_get_protocol_features; + rte_vhost_driver_get_queue_num; + rte_vhost_get_log_base; + rte_vhost_get_vring_base; + rte_vhost_set_vring_base; + rte_vhost_crypto_create; + rte_vhost_crypto_free; + rte_vhost_crypto_fetch_requests; + rte_vhost_crypto_finalize_requests; + rte_vhost_crypto_set_zero_copy; rte_vhost_va_from_guest_pa; -} DPDK_18.02; +}; diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 83befdce..0399c37b 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -4,7 +4,6 @@ #include <stdint.h> #include <stdio.h> -#include <stdbool.h> #include <limits.h> #include <stdlib.h> #include <unistd.h> @@ -52,6 +51,13 @@ struct vhost_user_socket { uint64_t supported_features; uint64_t features; + /* + * Device id to identify a specific backend device. + * It's set to -1 for the default software implementation. + * If valid, one socket can have 1 connection only. + */ + int vdpa_dev_id; + struct vhost_device_ops const *notify_ops; }; @@ -97,6 +103,7 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) size_t fdsize = fd_num * sizeof(int); char control[CMSG_SPACE(fdsize)]; struct cmsghdr *cmsg; + int got_fds = 0; int ret; memset(&msgh, 0, sizeof(msgh)); @@ -123,11 +130,16 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) cmsg = CMSG_NXTHDR(&msgh, cmsg)) { if ((cmsg->cmsg_level == SOL_SOCKET) && (cmsg->cmsg_type == SCM_RIGHTS)) { - memcpy(fds, CMSG_DATA(cmsg), fdsize); + got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int)); break; } } + /* Clear out unused file descriptors */ + while (got_fds < fd_num) + fds[got_fds++] = -1; + return ret; } @@ -153,6 +165,11 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) msgh.msg_control = control; msgh.msg_controllen = sizeof(control); cmsg = CMSG_FIRSTHDR(&msgh); + if (cmsg == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n"); + errno = EINVAL; + return -1; + } cmsg->cmsg_len = CMSG_LEN(fdsize); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; @@ -163,7 +180,7 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) } do { - ret = sendmsg(sockfd, &msgh, 0); + ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL); } while (ret < 0 && errno == EINTR); if (ret < 0) { @@ -182,6 +199,9 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) struct vhost_user_connection *conn; int ret; + if (vsocket == NULL) + return; + conn = malloc(sizeof(*conn)); if (conn == NULL) { close(fd); @@ -198,6 +218,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net); + vhost_attach_vdpa_device(vid, vsocket->vdpa_dev_id); + if (vsocket->dequeue_zero_copy) vhost_enable_dequeue_zero_copy(vid); @@ -232,6 +254,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) pthread_mutex_lock(&vsocket->conn_mutex); TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); pthread_mutex_unlock(&vsocket->conn_mutex); + + fdset_pipe_notify(&vhost_user.fdset); return; err: @@ -318,6 +342,16 @@ vhost_user_start_server(struct vhost_user_socket *vsocket) int fd = vsocket->socket_fd; const char *path = vsocket->path; + /* + * bind () may fail if the socket file with the same name already + * exists. But the library obviously should not delete the file + * provided by the user, since we can not be sure that it is not + * being used by other applications. Moreover, many applications form + * socket names based on user input, which is prone to errors. + * + * The user must ensure that the socket does not exist before + * registering the vhost driver in server mode. + */ ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un)); if (ret < 0) { RTE_LOG(ERR, VHOST_CONFIG, @@ -436,7 +470,6 @@ static int vhost_user_reconnect_init(void) { int ret; - char thread_name[RTE_MAX_THREAD_NAME_LEN]; ret = pthread_mutex_init(&reconn_list.mutex, NULL); if (ret < 0) { @@ -445,7 +478,7 @@ vhost_user_reconnect_init(void) } TAILQ_INIT(&reconn_list.head); - ret = pthread_create(&reconn_tid, NULL, + ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL, vhost_user_client_reconnect, NULL); if (ret != 0) { RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); @@ -453,14 +486,6 @@ vhost_user_reconnect_init(void) RTE_LOG(ERR, VHOST_CONFIG, "failed to destroy reconnect mutex"); } - } else { - snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, - "vhost-reconn"); - - if (rte_thread_setname(reconn_tid, thread_name)) { - RTE_LOG(DEBUG, VHOST_CONFIG, - "failed to set reconnect thread name"); - } } return ret; @@ -524,6 +549,52 @@ find_vhost_user_socket(const char *path) } int +rte_vhost_driver_attach_vdpa_device(const char *path, int did) +{ + struct vhost_user_socket *vsocket; + + if (rte_vdpa_get_device(did) == NULL) + return -1; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) + vsocket->vdpa_dev_id = did; + pthread_mutex_unlock(&vhost_user.mutex); + + return vsocket ? 0 : -1; +} + +int +rte_vhost_driver_detach_vdpa_device(const char *path) +{ + struct vhost_user_socket *vsocket; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) + vsocket->vdpa_dev_id = -1; + pthread_mutex_unlock(&vhost_user.mutex); + + return vsocket ? 0 : -1; +} + +int +rte_vhost_driver_get_vdpa_device_id(const char *path) +{ + struct vhost_user_socket *vsocket; + int did = -1; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (vsocket) + did = vsocket->vdpa_dev_id; + pthread_mutex_unlock(&vhost_user.mutex); + + return did; +} + +int rte_vhost_driver_disable_features(const char *path, uint64_t features) { struct vhost_user_socket *vsocket; @@ -591,19 +662,136 @@ int rte_vhost_driver_get_features(const char *path, uint64_t *features) { struct vhost_user_socket *vsocket; + uint64_t vdpa_features; + struct rte_vdpa_device *vdpa_dev; + int did = -1; + int ret = 0; pthread_mutex_lock(&vhost_user.mutex); vsocket = find_vhost_user_socket(path); - if (vsocket) + if (!vsocket) { + RTE_LOG(ERR, VHOST_CONFIG, + "socket file %s is not registered yet.\n", path); + ret = -1; + goto unlock_exit; + } + + did = vsocket->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (!vdpa_dev || !vdpa_dev->ops->get_features) { *features = vsocket->features; + goto unlock_exit; + } + + if (vdpa_dev->ops->get_features(did, &vdpa_features) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to get vdpa features " + "for socket file %s.\n", path); + ret = -1; + goto unlock_exit; + } + + *features = vsocket->features & vdpa_features; + +unlock_exit: pthread_mutex_unlock(&vhost_user.mutex); + return ret; +} +int +rte_vhost_driver_get_protocol_features(const char *path, + uint64_t *protocol_features) +{ + struct vhost_user_socket *vsocket; + uint64_t vdpa_protocol_features; + struct rte_vdpa_device *vdpa_dev; + int did = -1; + int ret = 0; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); if (!vsocket) { RTE_LOG(ERR, VHOST_CONFIG, "socket file %s is not registered yet.\n", path); - return -1; - } else { - return 0; + ret = -1; + goto unlock_exit; + } + + did = vsocket->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) { + *protocol_features = VHOST_USER_PROTOCOL_FEATURES; + goto unlock_exit; + } + + if (vdpa_dev->ops->get_protocol_features(did, + &vdpa_protocol_features) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to get vdpa protocol features " + "for socket file %s.\n", path); + ret = -1; + goto unlock_exit; + } + + *protocol_features = VHOST_USER_PROTOCOL_FEATURES + & vdpa_protocol_features; + +unlock_exit: + pthread_mutex_unlock(&vhost_user.mutex); + return ret; +} + +int +rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num) +{ + struct vhost_user_socket *vsocket; + uint32_t vdpa_queue_num; + struct rte_vdpa_device *vdpa_dev; + int did = -1; + int ret = 0; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (!vsocket) { + RTE_LOG(ERR, VHOST_CONFIG, + "socket file %s is not registered yet.\n", path); + ret = -1; + goto unlock_exit; + } + + did = vsocket->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (!vdpa_dev || !vdpa_dev->ops->get_queue_num) { + *queue_num = VHOST_MAX_QUEUE_PAIRS; + goto unlock_exit; + } + + if (vdpa_dev->ops->get_queue_num(did, &vdpa_queue_num) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to get vdpa queue number " + "for socket file %s.\n", path); + ret = -1; + goto unlock_exit; + } + + *queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num); + +unlock_exit: + pthread_mutex_unlock(&vhost_user.mutex); + return ret; +} + +static void +vhost_user_socket_mem_free(struct vhost_user_socket *vsocket) +{ + if (vsocket && vsocket->path) { + free(vsocket->path); + vsocket->path = NULL; + } + + if (vsocket) { + free(vsocket); + vsocket = NULL; } } @@ -637,7 +825,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags) if (vsocket->path == NULL) { RTE_LOG(ERR, VHOST_CONFIG, "error: failed to copy socket path string\n"); - free(vsocket); + vhost_user_socket_mem_free(vsocket); goto out; } TAILQ_INIT(&vsocket->conn_list); @@ -695,8 +883,7 @@ out_mutex: "error: failed to destroy connection mutex\n"); } out_free: - free(vsocket->path); - free(vsocket); + vhost_user_socket_mem_free(vsocket); out: pthread_mutex_unlock(&vhost_user.mutex); @@ -743,21 +930,25 @@ rte_vhost_driver_unregister(const char *path) struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; if (!strcmp(vsocket->path, path)) { - if (vsocket->is_server) { - fdset_del(&vhost_user.fdset, vsocket->socket_fd); - close(vsocket->socket_fd); - unlink(path); - } else if (vsocket->reconnect) { - vhost_user_remove_reconnect(vsocket); - } - +again: pthread_mutex_lock(&vsocket->conn_mutex); for (conn = TAILQ_FIRST(&vsocket->conn_list); conn != NULL; conn = next) { next = TAILQ_NEXT(conn, next); - fdset_del(&vhost_user.fdset, conn->connfd); + /* + * If r/wcb is executing, release the + * conn_mutex lock, and try again since + * the r/wcb may use the conn_mutex lock. + */ + if (fdset_try_del(&vhost_user.fdset, + conn->connfd) == -1) { + pthread_mutex_unlock( + &vsocket->conn_mutex); + goto again; + } + RTE_LOG(INFO, VHOST_CONFIG, "free connfd = %d for device '%s'\n", conn->connfd, path); @@ -768,9 +959,17 @@ rte_vhost_driver_unregister(const char *path) } pthread_mutex_unlock(&vsocket->conn_mutex); + if (vsocket->is_server) { + fdset_del(&vhost_user.fdset, + vsocket->socket_fd); + close(vsocket->socket_fd); + unlink(path); + } else if (vsocket->reconnect) { + vhost_user_remove_reconnect(vsocket); + } + pthread_mutex_destroy(&vsocket->conn_mutex); - free(vsocket->path); - free(vsocket); + vhost_user_socket_mem_free(vsocket); count = --vhost_user.vsocket_cnt; vhost_user.vsockets[i] = vhost_user.vsockets[count]; @@ -829,11 +1028,26 @@ rte_vhost_driver_start(const char *path) return -1; if (fdset_tid == 0) { - int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch, - &vhost_user.fdset); - if (ret != 0) + /** + * create a pipe which will be waited by poll and notified to + * rebuild the wait list of poll. + */ + if (fdset_pipe_init(&vhost_user.fdset) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to create pipe for vhost fdset\n"); + return -1; + } + + int ret = rte_ctrl_thread_create(&fdset_tid, + "vhost-events", NULL, fdset_event_dispatch, + &vhost_user.fdset); + if (ret != 0) { RTE_LOG(ERR, VHOST_CONFIG, "failed to create fdset handling thread"); + + fdset_pipe_uninit(&vhost_user.fdset); + return -1; + } } if (vsocket->is_server) diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c new file mode 100644 index 00000000..c82fd437 --- /dev/null +++ b/lib/librte_vhost/vdpa.c @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +/** + * @file + * + * Device specific vhost lib + */ + +#include <stdbool.h> + +#include <rte_malloc.h> +#include "rte_vdpa.h" +#include "vhost.h" + +static struct rte_vdpa_device *vdpa_devices[MAX_VHOST_DEVICE]; +static uint32_t vdpa_device_num; + +static bool +is_same_vdpa_device(struct rte_vdpa_dev_addr *a, + struct rte_vdpa_dev_addr *b) +{ + bool ret = true; + + if (a->type != b->type) + return false; + + switch (a->type) { + case PCI_ADDR: + if (a->pci_addr.domain != b->pci_addr.domain || + a->pci_addr.bus != b->pci_addr.bus || + a->pci_addr.devid != b->pci_addr.devid || + a->pci_addr.function != b->pci_addr.function) + ret = false; + break; + default: + break; + } + + return ret; +} + +int +rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr, + struct rte_vdpa_dev_ops *ops) +{ + struct rte_vdpa_device *dev; + char device_name[MAX_VDPA_NAME_LEN]; + int i; + + if (vdpa_device_num >= MAX_VHOST_DEVICE) + return -1; + + for (i = 0; i < MAX_VHOST_DEVICE; i++) { + dev = vdpa_devices[i]; + if (dev && is_same_vdpa_device(&dev->addr, addr)) + return -1; + } + + for (i = 0; i < MAX_VHOST_DEVICE; i++) { + if (vdpa_devices[i] == NULL) + break; + } + + sprintf(device_name, "vdpa-dev-%d", i); + dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device), + RTE_CACHE_LINE_SIZE); + if (!dev) + return -1; + + memcpy(&dev->addr, addr, sizeof(struct rte_vdpa_dev_addr)); + dev->ops = ops; + vdpa_devices[i] = dev; + vdpa_device_num++; + + return i; +} + +int +rte_vdpa_unregister_device(int did) +{ + if (did < 0 || did >= MAX_VHOST_DEVICE || vdpa_devices[did] == NULL) + return -1; + + rte_free(vdpa_devices[did]); + vdpa_devices[did] = NULL; + vdpa_device_num--; + + return did; +} + +int +rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr) +{ + struct rte_vdpa_device *dev; + int i; + + for (i = 0; i < MAX_VHOST_DEVICE; ++i) { + dev = vdpa_devices[i]; + if (dev && is_same_vdpa_device(&dev->addr, addr)) + return i; + } + + return -1; +} + +struct rte_vdpa_device * +rte_vdpa_get_device(int did) +{ + if (did < 0 || did >= MAX_VHOST_DEVICE) + return NULL; + + return vdpa_devices[did]; +} diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 8f5104a3..afded495 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2016 Intel Corporation + * Copyright(c) 2010-2017 Intel Corporation */ #include <linux/vhost.h> @@ -68,19 +68,6 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, return 0; } -struct virtio_net * -get_device(int vid) -{ - struct virtio_net *dev = vhost_devices[vid]; - - if (unlikely(!dev)) { - RTE_LOG(ERR, VHOST_CONFIG, - "(%d) device not found.\n", vid); - } - - return dev; -} - void cleanup_vq(struct vhost_virtqueue *vq, int destroy) { @@ -303,6 +290,7 @@ vhost_new_device(void) dev->vid = i; dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET; dev->slave_req_fd = -1; + dev->vdpa_dev_id = -1; return i; } @@ -315,11 +303,17 @@ void vhost_destroy_device(int vid) { struct virtio_net *dev = get_device(vid); + struct rte_vdpa_device *vdpa_dev; + int did = -1; if (dev == NULL) return; if (dev->flags & VIRTIO_DEV_RUNNING) { + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && vdpa_dev->ops->dev_close) + vdpa_dev->ops->dev_close(dev->vid); dev->flags &= ~VIRTIO_DEV_RUNNING; dev->notify_ops->destroy_device(vid); } @@ -331,6 +325,31 @@ vhost_destroy_device(int vid) } void +vhost_attach_vdpa_device(int vid, int did) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return; + + if (rte_vdpa_get_device(did) == NULL) + return; + + dev->vdpa_dev_id = did; +} + +void +vhost_detach_vdpa_device(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return; + + dev->vdpa_dev_id = -1; +} + +void vhost_set_ifname(int vid, const char *if_name, unsigned int if_len) { struct virtio_net *dev; @@ -565,16 +584,14 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) { struct virtio_net *dev = get_device(vid); - if (dev == NULL) - return -1; - - if (enable) { - RTE_LOG(ERR, VHOST_CONFIG, - "guest notification isn't supported.\n"); + if (!dev) return -1; - } - dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY; + if (enable) + dev->virtqueue[queue_id]->used->flags &= + ~VRING_USED_F_NO_NOTIFY; + else + dev->virtqueue[queue_id]->used->flags |= VRING_USED_F_NO_NOTIFY; return 0; } @@ -634,3 +651,76 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid) return *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx; } + +int rte_vhost_get_vdpa_device_id(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return -1; + + return dev->vdpa_dev_id; +} + +int rte_vhost_get_log_base(int vid, uint64_t *log_base, + uint64_t *log_size) +{ + struct virtio_net *dev = get_device(vid); + + if (!dev) + return -1; + + if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { + RTE_LOG(ERR, VHOST_DATA, + "(%d) %s: built-in vhost net backend is disabled.\n", + dev->vid, __func__); + return -1; + } + + *log_base = dev->log_base; + *log_size = dev->log_size; + + return 0; +} + +int rte_vhost_get_vring_base(int vid, uint16_t queue_id, + uint16_t *last_avail_idx, uint16_t *last_used_idx) +{ + struct virtio_net *dev = get_device(vid); + + if (!dev) + return -1; + + if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { + RTE_LOG(ERR, VHOST_DATA, + "(%d) %s: built-in vhost net backend is disabled.\n", + dev->vid, __func__); + return -1; + } + + *last_avail_idx = dev->virtqueue[queue_id]->last_avail_idx; + *last_used_idx = dev->virtqueue[queue_id]->last_used_idx; + + return 0; +} + +int rte_vhost_set_vring_base(int vid, uint16_t queue_id, + uint16_t last_avail_idx, uint16_t last_used_idx) +{ + struct virtio_net *dev = get_device(vid); + + if (!dev) + return -1; + + if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) { + RTE_LOG(ERR, VHOST_DATA, + "(%d) %s: built-in vhost net backend is disabled.\n", + dev->vid, __func__); + return -1; + } + + dev->virtqueue[queue_id]->last_avail_idx = last_avail_idx; + dev->virtqueue[queue_id]->last_used_idx = last_used_idx; + + return 0; +} diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index c4749a20..58c425a5 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -1,11 +1,12 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ #ifndef _VHOST_NET_CDEV_H_ #define _VHOST_NET_CDEV_H_ #include <stdint.h> #include <stdio.h> +#include <stdbool.h> #include <sys/types.h> #include <sys/queue.h> #include <unistd.h> @@ -19,6 +20,7 @@ #include <rte_rwlock.h> #include "rte_vhost.h" +#include "rte_vdpa.h" /* Used to indicate that the device is running on a data core */ #define VIRTIO_DEV_RUNNING 1 @@ -26,12 +28,16 @@ #define VIRTIO_DEV_READY 2 /* Used to indicate that the built-in vhost net device backend is enabled */ #define VIRTIO_DEV_BUILTIN_VIRTIO_NET 4 +/* Used to indicate that the device has its own data path and configured */ +#define VIRTIO_DEV_VDPA_CONFIGURED 8 /* Backend value set by guest. */ #define VIRTIO_DEV_STOPPED -1 #define BUF_VECTOR_MAX 256 +#define VHOST_LOG_CACHE_NR 32 + /** * Structure contains buffer address, length and descriptor index * from vring to do scatter RX. @@ -65,6 +71,14 @@ struct batch_copy_elem { uint64_t log_addr; }; +/* + * Structure that contains the info for batched dirty logging. + */ +struct log_cache_entry { + uint32_t offset; + unsigned long val; +}; + /** * Structure contains variables relevant to RX/TX virtqueues. */ @@ -108,6 +122,9 @@ struct vhost_virtqueue { struct batch_copy_elem *batch_copy_elems; uint16_t batch_copy_nb_elems; + struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR]; + uint16_t log_cache_nb_elem; + rte_rwlock_t iotlb_lock; rte_rwlock_t iotlb_pending_lock; struct rte_mempool *iotlb_pool; @@ -174,8 +191,6 @@ struct vhost_msg { #define VIRTIO_F_VERSION_1 32 #endif -#define VHOST_USER_F_PROTOCOL_FEATURES 30 - /* Features supported by this builtin vhost-user net driver. */ #define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \ (1ULL << VIRTIO_F_ANY_LAYOUT) | \ @@ -210,6 +225,51 @@ struct guest_page { }; /** + * function prototype for the vhost backend to handler specific vhost user + * messages prior to the master message handling + * + * @param vid + * vhost device id + * @param msg + * Message pointer. + * @param require_reply + * If the handler requires sending a reply, this varaible shall be written 1, + * otherwise 0. + * @param skip_master + * If the handler requires skipping the master message handling, this variable + * shall be written 1, otherwise 0. + * @return + * 0 on success, -1 on failure + */ +typedef int (*vhost_msg_pre_handle)(int vid, void *msg, + uint32_t *require_reply, uint32_t *skip_master); + +/** + * function prototype for the vhost backend to handler specific vhost user + * messages after the master message handling is done + * + * @param vid + * vhost device id + * @param msg + * Message pointer. + * @param require_reply + * If the handler requires sending a reply, this varaible shall be written 1, + * otherwise 0. + * @return + * 0 on success, -1 on failure + */ +typedef int (*vhost_msg_post_handle)(int vid, void *msg, + uint32_t *require_reply); + +/** + * pre and post vhost user message handlers + */ +struct vhost_user_extern_ops { + vhost_msg_pre_handle pre_msg_handle; + vhost_msg_post_handle post_msg_handle; +}; + +/** * Device structure contains all configuration information relating * to the device. */ @@ -241,8 +301,18 @@ struct virtio_net { struct guest_page *guest_pages; int slave_req_fd; -} __rte_cache_aligned; + /* + * Device id to identify a specific backend device. + * It's set to -1 for the default software implementation. + */ + int vdpa_dev_id; + + /* private data for virtio device */ + void *extern_data; + /* pre and post vhost user message handlers for the device */ + struct vhost_user_extern_ops extern_ops; +} __rte_cache_aligned; #define VHOST_LOG_PAGE 4096 @@ -252,7 +322,15 @@ struct virtio_net { static __rte_always_inline void vhost_set_bit(unsigned int nr, volatile uint8_t *addr) { - __sync_fetch_and_or_8(addr, (1U << nr)); +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) + /* + * __sync_ built-ins are deprecated, but __atomic_ ones + * are sub-optimized in older GCC versions. + */ + __sync_fetch_and_or_1(addr, (1U << nr)); +#else + __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED); +#endif } static __rte_always_inline void @@ -284,6 +362,102 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) } static __rte_always_inline void +vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + unsigned long *log_base; + int i; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base)) + return; + + log_base = (unsigned long *)(uintptr_t)dev->log_base; + + /* + * It is expected a write memory barrier has been issued + * before this function is called. + */ + + for (i = 0; i < vq->log_cache_nb_elem; i++) { + struct log_cache_entry *elem = vq->log_cache + i; + +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) + /* + * '__sync' builtins are deprecated, but '__atomic' ones + * are sub-optimized in older GCC versions. + */ + __sync_fetch_and_or(log_base + elem->offset, elem->val); +#else + __atomic_fetch_or(log_base + elem->offset, elem->val, + __ATOMIC_RELAXED); +#endif + } + + rte_smp_wmb(); + + vq->log_cache_nb_elem = 0; +} + +static __rte_always_inline void +vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t page) +{ + uint32_t bit_nr = page % (sizeof(unsigned long) << 3); + uint32_t offset = page / (sizeof(unsigned long) << 3); + int i; + + for (i = 0; i < vq->log_cache_nb_elem; i++) { + struct log_cache_entry *elem = vq->log_cache + i; + + if (elem->offset == offset) { + elem->val |= (1UL << bit_nr); + return; + } + } + + if (unlikely(i >= VHOST_LOG_CACHE_NR)) { + /* + * No more room for a new log cache entry, + * so write the dirty log map directly. + */ + rte_smp_wmb(); + vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); + + return; + } + + vq->log_cache[i].offset = offset; + vq->log_cache[i].val = (1UL << bit_nr); +} + +static __rte_always_inline void +vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t addr, uint64_t len) +{ + uint64_t page; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base || !len)) + return; + + if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) + return; + + page = addr / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < addr + len) { + vhost_log_cache_page(dev, vq, page); + page += 1; + } +} + +static __rte_always_inline void +vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) +{ + vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len); +} + +static __rte_always_inline void vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, uint64_t offset, uint64_t len) { @@ -296,8 +470,8 @@ vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, #ifdef RTE_LIBRTE_VHOST_DEBUG #define VHOST_MAX_PRINT_BUFF 6072 -#define LOG_LEVEL RTE_LOG_DEBUG -#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) +#define VHOST_LOG_DEBUG(log_type, fmt, args...) \ + RTE_LOG(DEBUG, log_type, fmt, ##args) #define PRINT_PACKET(device, addr, size, header) do { \ char *pkt_addr = (char *)(addr); \ unsigned int index; \ @@ -313,11 +487,10 @@ vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, } \ snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \ \ - LOG_DEBUG(VHOST_DATA, "%s", packet); \ + VHOST_LOG_DEBUG(VHOST_DATA, "%s", packet); \ } while (0) #else -#define LOG_LEVEL RTE_LOG_INFO -#define LOG_DEBUG(log_type, fmt, args...) do {} while (0) +#define VHOST_LOG_DEBUG(log_type, fmt, args...) do {} while (0) #define PRINT_PACKET(device, addr, size, header) do {} while (0) #endif @@ -345,7 +518,18 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size) return 0; } -struct virtio_net *get_device(int vid); +static __rte_always_inline struct virtio_net * +get_device(int vid) +{ + struct virtio_net *dev = vhost_devices[vid]; + + if (unlikely(!dev)) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) device not found.\n", vid); + } + + return dev; +} int vhost_new_device(void); void cleanup_device(struct virtio_net *dev, int destroy); @@ -357,6 +541,9 @@ void free_vq(struct vhost_virtqueue *vq); int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); +void vhost_attach_vdpa_device(int vid, int did); +void vhost_detach_vdpa_device(int vid); + void vhost_set_ifname(int, const char *if_name, unsigned int if_len); void vhost_enable_dequeue_zero_copy(int vid); void vhost_set_builtin_virtio_net(int vid, bool enable); @@ -411,7 +598,7 @@ vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq) uint16_t old = vq->signalled_used; uint16_t new = vq->last_used_idx; - LOG_DEBUG(VHOST_DATA, "%s: used_event_idx=%d, old=%d, new=%d\n", + VHOST_LOG_DEBUG(VHOST_DATA, "%s: used_event_idx=%d, old=%d, new=%d\n", __func__, vhost_used_event(vq), old, new); diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c new file mode 100644 index 00000000..f1650738 --- /dev/null +++ b/lib/librte_vhost/vhost_crypto.c @@ -0,0 +1,1374 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017-2018 Intel Corporation + */ +#include <rte_malloc.h> +#include <rte_hash.h> +#include <rte_jhash.h> +#include <rte_mbuf.h> +#include <rte_cryptodev.h> + +#include "rte_vhost_crypto.h" +#include "vhost.h" +#include "vhost_user.h" +#include "virtio_crypto.h" + +#define INHDR_LEN (sizeof(struct virtio_crypto_inhdr)) +#define IV_OFFSET (sizeof(struct rte_crypto_op) + \ + sizeof(struct rte_crypto_sym_op)) + +#ifdef RTE_LIBRTE_VHOST_DEBUG +#define VC_LOG_ERR(fmt, args...) \ + RTE_LOG(ERR, USER1, "[%s] %s() line %u: " fmt "\n", \ + "Vhost-Crypto", __func__, __LINE__, ## args) +#define VC_LOG_INFO(fmt, args...) \ + RTE_LOG(INFO, USER1, "[%s] %s() line %u: " fmt "\n", \ + "Vhost-Crypto", __func__, __LINE__, ## args) + +#define VC_LOG_DBG(fmt, args...) \ + RTE_LOG(DEBUG, USER1, "[%s] %s() line %u: " fmt "\n", \ + "Vhost-Crypto", __func__, __LINE__, ## args) +#else +#define VC_LOG_ERR(fmt, args...) \ + RTE_LOG(ERR, USER1, "[VHOST-Crypto]: " fmt "\n", ## args) +#define VC_LOG_INFO(fmt, args...) \ + RTE_LOG(INFO, USER1, "[VHOST-Crypto]: " fmt "\n", ## args) +#define VC_LOG_DBG(fmt, args...) +#endif + +#define VIRTIO_CRYPTO_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) | \ + (1 << VIRTIO_RING_F_INDIRECT_DESC) | \ + (1 << VIRTIO_RING_F_EVENT_IDX) | \ + (1 << VIRTIO_CRYPTO_SERVICE_CIPHER) | \ + (1 << VIRTIO_CRYPTO_SERVICE_MAC) | \ + (1 << VIRTIO_NET_F_CTRL_VQ)) + +#define IOVA_TO_VVA(t, r, a, l, p) \ + ((t)(uintptr_t)vhost_iova_to_vva(r->dev, r->vq, a, l, p)) + +static int +cipher_algo_transform(uint32_t virtio_cipher_algo) +{ + int ret; + + switch (virtio_cipher_algo) { + case VIRTIO_CRYPTO_CIPHER_AES_CBC: + ret = RTE_CRYPTO_CIPHER_AES_CBC; + break; + case VIRTIO_CRYPTO_CIPHER_AES_CTR: + ret = RTE_CRYPTO_CIPHER_AES_CTR; + break; + case VIRTIO_CRYPTO_CIPHER_DES_ECB: + ret = -VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_CIPHER_DES_CBC: + ret = RTE_CRYPTO_CIPHER_DES_CBC; + break; + case VIRTIO_CRYPTO_CIPHER_3DES_ECB: + ret = RTE_CRYPTO_CIPHER_3DES_ECB; + break; + case VIRTIO_CRYPTO_CIPHER_3DES_CBC: + ret = RTE_CRYPTO_CIPHER_3DES_CBC; + break; + case VIRTIO_CRYPTO_CIPHER_3DES_CTR: + ret = RTE_CRYPTO_CIPHER_3DES_CTR; + break; + case VIRTIO_CRYPTO_CIPHER_KASUMI_F8: + ret = RTE_CRYPTO_CIPHER_KASUMI_F8; + break; + case VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2: + ret = RTE_CRYPTO_CIPHER_SNOW3G_UEA2; + break; + case VIRTIO_CRYPTO_CIPHER_AES_F8: + ret = RTE_CRYPTO_CIPHER_AES_F8; + break; + case VIRTIO_CRYPTO_CIPHER_AES_XTS: + ret = RTE_CRYPTO_CIPHER_AES_XTS; + break; + case VIRTIO_CRYPTO_CIPHER_ZUC_EEA3: + ret = RTE_CRYPTO_CIPHER_ZUC_EEA3; + break; + default: + ret = -VIRTIO_CRYPTO_BADMSG; + break; + } + + return ret; +} + +static int +auth_algo_transform(uint32_t virtio_auth_algo) +{ + int ret; + + switch (virtio_auth_algo) { + + case VIRTIO_CRYPTO_NO_MAC: + ret = RTE_CRYPTO_AUTH_NULL; + break; + case VIRTIO_CRYPTO_MAC_HMAC_MD5: + ret = RTE_CRYPTO_AUTH_MD5_HMAC; + break; + case VIRTIO_CRYPTO_MAC_HMAC_SHA1: + ret = RTE_CRYPTO_AUTH_SHA1_HMAC; + break; + case VIRTIO_CRYPTO_MAC_HMAC_SHA_224: + ret = RTE_CRYPTO_AUTH_SHA224_HMAC; + break; + case VIRTIO_CRYPTO_MAC_HMAC_SHA_256: + ret = RTE_CRYPTO_AUTH_SHA256_HMAC; + break; + case VIRTIO_CRYPTO_MAC_HMAC_SHA_384: + ret = RTE_CRYPTO_AUTH_SHA384_HMAC; + break; + case VIRTIO_CRYPTO_MAC_HMAC_SHA_512: + ret = RTE_CRYPTO_AUTH_SHA512_HMAC; + break; + case VIRTIO_CRYPTO_MAC_CMAC_3DES: + ret = -VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_MAC_CMAC_AES: + ret = RTE_CRYPTO_AUTH_AES_CMAC; + break; + case VIRTIO_CRYPTO_MAC_KASUMI_F9: + ret = RTE_CRYPTO_AUTH_KASUMI_F9; + break; + case VIRTIO_CRYPTO_MAC_SNOW3G_UIA2: + ret = RTE_CRYPTO_AUTH_SNOW3G_UIA2; + break; + case VIRTIO_CRYPTO_MAC_GMAC_AES: + ret = RTE_CRYPTO_AUTH_AES_GMAC; + break; + case VIRTIO_CRYPTO_MAC_GMAC_TWOFISH: + ret = -VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_MAC_CBCMAC_AES: + ret = RTE_CRYPTO_AUTH_AES_CBC_MAC; + break; + case VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9: + ret = -VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_MAC_XCBC_AES: + ret = RTE_CRYPTO_AUTH_AES_XCBC_MAC; + break; + default: + ret = -VIRTIO_CRYPTO_BADMSG; + break; + } + + return ret; +} + +static int get_iv_len(enum rte_crypto_cipher_algorithm algo) +{ + int len; + + switch (algo) { + case RTE_CRYPTO_CIPHER_3DES_CBC: + len = 8; + break; + case RTE_CRYPTO_CIPHER_3DES_CTR: + len = 8; + break; + case RTE_CRYPTO_CIPHER_3DES_ECB: + len = 8; + break; + case RTE_CRYPTO_CIPHER_AES_CBC: + len = 16; + break; + + /* TODO: add common algos */ + + default: + len = -1; + break; + } + + return len; +} + +/** + * vhost_crypto struct is used to maintain a number of virtio_cryptos and + * one DPDK crypto device that deals with all crypto workloads. It is declared + * here and defined in vhost_crypto.c + */ +struct vhost_crypto { + /** Used to lookup DPDK Cryptodev Session based on VIRTIO crypto + * session ID. + */ + struct rte_hash *session_map; + struct rte_mempool *mbuf_pool; + struct rte_mempool *sess_pool; + + /** DPDK cryptodev ID */ + uint8_t cid; + uint16_t nb_qps; + + uint64_t last_session_id; + + uint64_t cache_session_id; + struct rte_cryptodev_sym_session *cache_session; + /** socket id for the device */ + int socket_id; + + struct virtio_net *dev; + + uint8_t option; +} __rte_cache_aligned; + +struct vhost_crypto_data_req { + struct vring_desc *head; + struct virtio_net *dev; + struct virtio_crypto_inhdr *inhdr; + struct vhost_virtqueue *vq; + struct vring_desc *wb_desc; + uint16_t wb_len; + uint16_t desc_idx; + uint16_t len; + uint16_t zero_copy; +}; + +static int +transform_cipher_param(struct rte_crypto_sym_xform *xform, + VhostUserCryptoSessionParam *param) +{ + int ret; + + ret = cipher_algo_transform(param->cipher_algo); + if (unlikely(ret < 0)) + return ret; + + xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + xform->cipher.algo = (uint32_t)ret; + xform->cipher.key.length = param->cipher_key_len; + if (xform->cipher.key.length > 0) + xform->cipher.key.data = param->cipher_key_buf; + if (param->dir == VIRTIO_CRYPTO_OP_ENCRYPT) + xform->cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + else if (param->dir == VIRTIO_CRYPTO_OP_DECRYPT) + xform->cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + else { + VC_LOG_DBG("Bad operation type"); + return -VIRTIO_CRYPTO_BADMSG; + } + + ret = get_iv_len(xform->cipher.algo); + if (unlikely(ret < 0)) + return ret; + xform->cipher.iv.length = (uint16_t)ret; + xform->cipher.iv.offset = IV_OFFSET; + return 0; +} + +static int +transform_chain_param(struct rte_crypto_sym_xform *xforms, + VhostUserCryptoSessionParam *param) +{ + struct rte_crypto_sym_xform *xform_cipher, *xform_auth; + int ret; + + switch (param->chaining_dir) { + case VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER: + xform_auth = xforms; + xform_cipher = xforms->next; + xform_cipher->cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + xform_auth->auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; + break; + case VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH: + xform_cipher = xforms; + xform_auth = xforms->next; + xform_cipher->cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + xform_auth->auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; + break; + default: + return -VIRTIO_CRYPTO_BADMSG; + } + + /* cipher */ + ret = cipher_algo_transform(param->cipher_algo); + if (unlikely(ret < 0)) + return ret; + xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + xform_cipher->cipher.algo = (uint32_t)ret; + xform_cipher->cipher.key.length = param->cipher_key_len; + xform_cipher->cipher.key.data = param->cipher_key_buf; + ret = get_iv_len(xform_cipher->cipher.algo); + if (unlikely(ret < 0)) + return ret; + xform_cipher->cipher.iv.length = (uint16_t)ret; + xform_cipher->cipher.iv.offset = IV_OFFSET; + + /* auth */ + xform_auth->type = RTE_CRYPTO_SYM_XFORM_AUTH; + ret = auth_algo_transform(param->hash_algo); + if (unlikely(ret < 0)) + return ret; + xform_auth->auth.algo = (uint32_t)ret; + xform_auth->auth.digest_length = param->digest_len; + xform_auth->auth.key.length = param->auth_key_len; + xform_auth->auth.key.data = param->auth_key_buf; + + return 0; +} + +static void +vhost_crypto_create_sess(struct vhost_crypto *vcrypto, + VhostUserCryptoSessionParam *sess_param) +{ + struct rte_crypto_sym_xform xform1 = {0}, xform2 = {0}; + struct rte_cryptodev_sym_session *session; + int ret; + + switch (sess_param->op_type) { + case VIRTIO_CRYPTO_SYM_OP_NONE: + case VIRTIO_CRYPTO_SYM_OP_CIPHER: + ret = transform_cipher_param(&xform1, sess_param); + if (unlikely(ret)) { + VC_LOG_ERR("Error transform session msg (%i)", ret); + sess_param->session_id = ret; + return; + } + break; + case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING: + if (unlikely(sess_param->hash_mode != + VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH)) { + sess_param->session_id = -VIRTIO_CRYPTO_NOTSUPP; + VC_LOG_ERR("Error transform session message (%i)", + -VIRTIO_CRYPTO_NOTSUPP); + return; + } + + xform1.next = &xform2; + + ret = transform_chain_param(&xform1, sess_param); + if (unlikely(ret)) { + VC_LOG_ERR("Error transform session message (%i)", ret); + sess_param->session_id = ret; + return; + } + + break; + default: + VC_LOG_ERR("Algorithm not yet supported"); + sess_param->session_id = -VIRTIO_CRYPTO_NOTSUPP; + return; + } + + session = rte_cryptodev_sym_session_create(vcrypto->sess_pool); + if (!session) { + VC_LOG_ERR("Failed to create session"); + sess_param->session_id = -VIRTIO_CRYPTO_ERR; + return; + } + + if (rte_cryptodev_sym_session_init(vcrypto->cid, session, &xform1, + vcrypto->sess_pool) < 0) { + VC_LOG_ERR("Failed to initialize session"); + sess_param->session_id = -VIRTIO_CRYPTO_ERR; + return; + } + + /* insert hash to map */ + if (rte_hash_add_key_data(vcrypto->session_map, + &vcrypto->last_session_id, session) < 0) { + VC_LOG_ERR("Failed to insert session to hash table"); + + if (rte_cryptodev_sym_session_clear(vcrypto->cid, session) < 0) + VC_LOG_ERR("Failed to clear session"); + else { + if (rte_cryptodev_sym_session_free(session) < 0) + VC_LOG_ERR("Failed to free session"); + } + sess_param->session_id = -VIRTIO_CRYPTO_ERR; + return; + } + + VC_LOG_INFO("Session %"PRIu64" created for vdev %i.", + vcrypto->last_session_id, vcrypto->dev->vid); + + sess_param->session_id = vcrypto->last_session_id; + vcrypto->last_session_id++; +} + +static int +vhost_crypto_close_sess(struct vhost_crypto *vcrypto, uint64_t session_id) +{ + struct rte_cryptodev_sym_session *session; + uint64_t sess_id = session_id; + int ret; + + ret = rte_hash_lookup_data(vcrypto->session_map, &sess_id, + (void **)&session); + + if (unlikely(ret < 0)) { + VC_LOG_ERR("Failed to delete session %"PRIu64".", session_id); + return -VIRTIO_CRYPTO_INVSESS; + } + + if (rte_cryptodev_sym_session_clear(vcrypto->cid, session) < 0) { + VC_LOG_DBG("Failed to clear session"); + return -VIRTIO_CRYPTO_ERR; + } + + if (rte_cryptodev_sym_session_free(session) < 0) { + VC_LOG_DBG("Failed to free session"); + return -VIRTIO_CRYPTO_ERR; + } + + if (rte_hash_del_key(vcrypto->session_map, &sess_id) < 0) { + VC_LOG_DBG("Failed to delete session from hash table."); + return -VIRTIO_CRYPTO_ERR; + } + + VC_LOG_INFO("Session %"PRIu64" deleted for vdev %i.", sess_id, + vcrypto->dev->vid); + + return 0; +} + +static int +vhost_crypto_msg_post_handler(int vid, void *msg, uint32_t *require_reply) +{ + struct virtio_net *dev = get_device(vid); + struct vhost_crypto *vcrypto; + VhostUserMsg *vmsg = msg; + int ret = 0; + + if (dev == NULL || require_reply == NULL) { + VC_LOG_ERR("Invalid vid %i", vid); + return -EINVAL; + } + + vcrypto = dev->extern_data; + if (vcrypto == NULL) { + VC_LOG_ERR("Cannot find required data, is it initialized?"); + return -ENOENT; + } + + *require_reply = 0; + + if (vmsg->request.master == VHOST_USER_CRYPTO_CREATE_SESS) { + vhost_crypto_create_sess(vcrypto, + &vmsg->payload.crypto_session); + *require_reply = 1; + } else if (vmsg->request.master == VHOST_USER_CRYPTO_CLOSE_SESS) + ret = vhost_crypto_close_sess(vcrypto, vmsg->payload.u64); + else + ret = -EINVAL; + + return ret; +} + +static __rte_always_inline struct vring_desc * +find_write_desc(struct vring_desc *head, struct vring_desc *desc) +{ + if (desc->flags & VRING_DESC_F_WRITE) + return desc; + + while (desc->flags & VRING_DESC_F_NEXT) { + desc = &head[desc->next]; + if (desc->flags & VRING_DESC_F_WRITE) + return desc; + } + + return NULL; +} + +static struct virtio_crypto_inhdr * +reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc) +{ + uint64_t dlen; + struct virtio_crypto_inhdr *inhdr; + + while (desc->flags & VRING_DESC_F_NEXT) + desc = &vc_req->head[desc->next]; + + dlen = desc->len; + inhdr = IOVA_TO_VVA(struct virtio_crypto_inhdr *, vc_req, desc->addr, + &dlen, VHOST_ACCESS_WO); + if (unlikely(!inhdr || dlen != desc->len)) + return NULL; + + return inhdr; +} + +static __rte_always_inline int +move_desc(struct vring_desc *head, struct vring_desc **cur_desc, + uint32_t size) +{ + struct vring_desc *desc = *cur_desc; + int left = size; + + rte_prefetch0(&head[desc->next]); + left -= desc->len; + + while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { + desc = &head[desc->next]; + rte_prefetch0(&head[desc->next]); + left -= desc->len; + } + + if (unlikely(left > 0)) { + VC_LOG_ERR("Incorrect virtio descriptor"); + return -1; + } + + *cur_desc = &head[desc->next]; + return 0; +} + +static int +copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, + struct vring_desc **cur_desc, uint32_t size) +{ + struct vring_desc *desc = *cur_desc; + uint64_t remain, addr, dlen, len; + uint32_t to_copy; + uint8_t *data = dst_data; + uint8_t *src; + int left = size; + + rte_prefetch0(&vc_req->head[desc->next]); + to_copy = RTE_MIN(desc->len, (uint32_t)left); + dlen = to_copy; + src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, + VHOST_ACCESS_RO); + if (unlikely(!src || !dlen)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy((uint8_t *)data, src, dlen); + data += dlen; + + if (unlikely(dlen < to_copy)) { + remain = to_copy - dlen; + addr = desc->addr + dlen; + + while (remain) { + len = remain; + src = IOVA_TO_VVA(uint8_t *, vc_req, addr, &len, + VHOST_ACCESS_RO); + if (unlikely(!src || !len)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy(data, src, len); + addr += len; + remain -= len; + data += len; + } + } + + left -= to_copy; + + while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { + desc = &vc_req->head[desc->next]; + rte_prefetch0(&vc_req->head[desc->next]); + to_copy = RTE_MIN(desc->len, (uint32_t)left); + dlen = desc->len; + src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, + VHOST_ACCESS_RO); + if (unlikely(!src || !dlen)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy(data, src, dlen); + data += dlen; + + if (unlikely(dlen < to_copy)) { + remain = to_copy - dlen; + addr = desc->addr + dlen; + + while (remain) { + len = remain; + src = IOVA_TO_VVA(uint8_t *, vc_req, addr, &len, + VHOST_ACCESS_RO); + if (unlikely(!src || !len)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy(data, src, len); + addr += len; + remain -= len; + data += len; + } + } + + left -= to_copy; + } + + if (unlikely(left > 0)) { + VC_LOG_ERR("Incorrect virtio descriptor"); + return -1; + } + + *cur_desc = &vc_req->head[desc->next]; + + return 0; +} + +static __rte_always_inline void * +get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc **cur_desc, + uint32_t size, uint8_t perm) +{ + void *data; + uint64_t dlen = (*cur_desc)->len; + + data = IOVA_TO_VVA(void *, vc_req, (*cur_desc)->addr, &dlen, perm); + if (unlikely(!data || dlen != (*cur_desc)->len)) { + VC_LOG_ERR("Failed to map object"); + return NULL; + } + + if (unlikely(move_desc(vc_req->head, cur_desc, size) < 0)) + return NULL; + + return data; +} + +static int +write_back_data(struct rte_crypto_op *op, struct vhost_crypto_data_req *vc_req) +{ + struct rte_mbuf *mbuf = op->sym->m_dst; + struct vring_desc *head = vc_req->head; + struct vring_desc *desc = vc_req->wb_desc; + int left = vc_req->wb_len; + uint32_t to_write; + uint8_t *src_data = mbuf->buf_addr, *dst; + uint64_t dlen; + + rte_prefetch0(&head[desc->next]); + to_write = RTE_MIN(desc->len, (uint32_t)left); + dlen = desc->len; + dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, + VHOST_ACCESS_RW); + if (unlikely(!dst || dlen != desc->len)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy(dst, src_data, to_write); + left -= to_write; + src_data += to_write; + + while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { + desc = &head[desc->next]; + rte_prefetch0(&head[desc->next]); + to_write = RTE_MIN(desc->len, (uint32_t)left); + dlen = desc->len; + dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, + VHOST_ACCESS_RW); + if (unlikely(!dst || dlen != desc->len)) { + VC_LOG_ERR("Failed to map descriptor"); + return -1; + } + + rte_memcpy(dst, src_data, to_write); + left -= to_write; + src_data += to_write; + } + + if (unlikely(left < 0)) { + VC_LOG_ERR("Incorrect virtio descriptor"); + return -1; + } + + return 0; +} + +static uint8_t +prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + struct vhost_crypto_data_req *vc_req, + struct virtio_crypto_cipher_data_req *cipher, + struct vring_desc *cur_desc) +{ + struct vring_desc *desc = cur_desc; + struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst; + uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET); + uint8_t ret = 0; + + /* prepare */ + /* iv */ + if (unlikely(copy_data(iv_data, vc_req, &desc, + cipher->para.iv_len) < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + m_src->data_len = cipher->para.src_data_len; + + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr, + cipher->para.src_data_len); + m_src->buf_addr = get_data_ptr(vc_req, &desc, + cipher->para.src_data_len, VHOST_ACCESS_RO); + if (unlikely(m_src->buf_iova == 0 || + m_src->buf_addr == NULL)) { + VC_LOG_ERR("zero_copy may fail due to cross page data"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + if (unlikely(cipher->para.src_data_len > + RTE_MBUF_DEFAULT_BUF_SIZE)) { + VC_LOG_ERR("Not enough space to do data copy"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), + vc_req, &desc, cipher->para.src_data_len) + < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + break; + default: + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + /* dst */ + desc = find_write_desc(vc_req->head, desc); + if (unlikely(!desc)) { + VC_LOG_ERR("Cannot find write location"); + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + m_dst->buf_iova = gpa_to_hpa(vcrypto->dev, + desc->addr, cipher->para.dst_data_len); + m_dst->buf_addr = get_data_ptr(vc_req, &desc, + cipher->para.dst_data_len, VHOST_ACCESS_RW); + if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) { + VC_LOG_ERR("zero_copy may fail due to cross page data"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + + m_dst->data_len = cipher->para.dst_data_len; + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + vc_req->wb_desc = desc; + vc_req->wb_len = cipher->para.dst_data_len; + if (unlikely(move_desc(vc_req->head, &desc, + vc_req->wb_len) < 0)) { + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + break; + default: + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + /* src data */ + op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + op->sess_type = RTE_CRYPTO_OP_WITH_SESSION; + + op->sym->cipher.data.offset = 0; + op->sym->cipher.data.length = cipher->para.src_data_len; + + vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO); + if (unlikely(vc_req->inhdr == NULL)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + vc_req->inhdr->status = VIRTIO_CRYPTO_OK; + vc_req->len = cipher->para.dst_data_len + INHDR_LEN; + + return 0; + +error_exit: + vc_req->len = INHDR_LEN; + return ret; +} + +static uint8_t +prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + struct vhost_crypto_data_req *vc_req, + struct virtio_crypto_alg_chain_data_req *chain, + struct vring_desc *cur_desc) +{ + struct vring_desc *desc = cur_desc; + struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst; + uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET); + uint32_t digest_offset; + void *digest_addr; + uint8_t ret = 0; + + /* prepare */ + /* iv */ + if (unlikely(copy_data(iv_data, vc_req, &desc, + chain->para.iv_len) < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + m_src->data_len = chain->para.src_data_len; + m_dst->data_len = chain->para.dst_data_len; + + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr, + chain->para.src_data_len); + m_src->buf_addr = get_data_ptr(vc_req, &desc, + chain->para.src_data_len, VHOST_ACCESS_RO); + if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) { + VC_LOG_ERR("zero_copy may fail due to cross page data"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + if (unlikely(chain->para.src_data_len > + RTE_MBUF_DEFAULT_BUF_SIZE)) { + VC_LOG_ERR("Not enough space to do data copy"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), + vc_req, &desc, chain->para.src_data_len)) < 0) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + break; + default: + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + /* dst */ + desc = find_write_desc(vc_req->head, desc); + if (unlikely(!desc)) { + VC_LOG_ERR("Cannot find write location"); + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + m_dst->buf_iova = gpa_to_hpa(vcrypto->dev, + desc->addr, chain->para.dst_data_len); + m_dst->buf_addr = get_data_ptr(vc_req, &desc, + chain->para.dst_data_len, VHOST_ACCESS_RW); + if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) { + VC_LOG_ERR("zero_copy may fail due to cross page data"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + + op->sym->auth.digest.phys_addr = gpa_to_hpa(vcrypto->dev, + desc->addr, chain->para.hash_result_len); + op->sym->auth.digest.data = get_data_ptr(vc_req, &desc, + chain->para.hash_result_len, VHOST_ACCESS_RW); + if (unlikely(op->sym->auth.digest.phys_addr == 0)) { + VC_LOG_ERR("zero_copy may fail due to cross page data"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + break; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + digest_offset = m_dst->data_len; + digest_addr = rte_pktmbuf_mtod_offset(m_dst, void *, + digest_offset); + + vc_req->wb_desc = desc; + vc_req->wb_len = m_dst->data_len + chain->para.hash_result_len; + + if (unlikely(move_desc(vc_req->head, &desc, + chain->para.dst_data_len) < 0)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + if (unlikely(copy_data(digest_addr, vc_req, &desc, + chain->para.hash_result_len)) < 0) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + op->sym->auth.digest.data = digest_addr; + op->sym->auth.digest.phys_addr = rte_pktmbuf_iova_offset(m_dst, + digest_offset); + break; + default: + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + /* record inhdr */ + vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO); + if (unlikely(vc_req->inhdr == NULL)) { + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } + + vc_req->inhdr->status = VIRTIO_CRYPTO_OK; + + op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + op->sess_type = RTE_CRYPTO_OP_WITH_SESSION; + + op->sym->cipher.data.offset = chain->para.cipher_start_src_offset; + op->sym->cipher.data.length = chain->para.src_data_len - + chain->para.cipher_start_src_offset; + + op->sym->auth.data.offset = chain->para.hash_start_src_offset; + op->sym->auth.data.length = chain->para.len_to_hash; + + vc_req->len = chain->para.dst_data_len + chain->para.hash_result_len + + INHDR_LEN; + return 0; + +error_exit: + vc_req->len = INHDR_LEN; + return ret; +} + +/** + * Process on descriptor + */ +static __rte_always_inline int +vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, + struct vhost_virtqueue *vq, struct rte_crypto_op *op, + struct vring_desc *head, uint16_t desc_idx) +{ + struct vhost_crypto_data_req *vc_req = RTE_PTR_ADD(op->sym->m_src, + sizeof(struct rte_mbuf)); + struct rte_cryptodev_sym_session *session; + struct virtio_crypto_op_data_req *req, tmp_req; + struct virtio_crypto_inhdr *inhdr; + struct vring_desc *desc = NULL; + uint64_t session_id; + uint64_t dlen; + int err = 0; + + vc_req->desc_idx = desc_idx; + vc_req->dev = vcrypto->dev; + vc_req->vq = vq; + + if (likely(head->flags & VRING_DESC_F_INDIRECT)) { + dlen = head->len; + desc = IOVA_TO_VVA(struct vring_desc *, vc_req, head->addr, + &dlen, VHOST_ACCESS_RO); + if (unlikely(!desc || dlen != head->len)) + return -1; + desc_idx = 0; + head = desc; + } else { + desc = head; + } + + vc_req->head = head; + vc_req->zero_copy = vcrypto->option; + + req = get_data_ptr(vc_req, &desc, sizeof(*req), VHOST_ACCESS_RO); + if (unlikely(req == NULL)) { + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + err = VIRTIO_CRYPTO_BADMSG; + VC_LOG_ERR("Invalid descriptor"); + goto error_exit; + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + req = &tmp_req; + if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req)) + < 0)) { + err = VIRTIO_CRYPTO_BADMSG; + VC_LOG_ERR("Invalid descriptor"); + goto error_exit; + } + break; + default: + err = VIRTIO_CRYPTO_ERR; + VC_LOG_ERR("Invalid option"); + goto error_exit; + } + } + + switch (req->header.opcode) { + case VIRTIO_CRYPTO_CIPHER_ENCRYPT: + case VIRTIO_CRYPTO_CIPHER_DECRYPT: + session_id = req->header.session_id; + + /* one branch to avoid unnecessary table lookup */ + if (vcrypto->cache_session_id != session_id) { + err = rte_hash_lookup_data(vcrypto->session_map, + &session_id, (void **)&session); + if (unlikely(err < 0)) { + err = VIRTIO_CRYPTO_ERR; + VC_LOG_ERR("Failed to find session %"PRIu64, + session_id); + goto error_exit; + } + + vcrypto->cache_session = session; + vcrypto->cache_session_id = session_id; + } + + session = vcrypto->cache_session; + + err = rte_crypto_op_attach_sym_session(op, session); + if (unlikely(err < 0)) { + err = VIRTIO_CRYPTO_ERR; + VC_LOG_ERR("Failed to attach session to op"); + goto error_exit; + } + + switch (req->u.sym_req.op_type) { + case VIRTIO_CRYPTO_SYM_OP_NONE: + err = VIRTIO_CRYPTO_NOTSUPP; + break; + case VIRTIO_CRYPTO_SYM_OP_CIPHER: + err = prepare_sym_cipher_op(vcrypto, op, vc_req, + &req->u.sym_req.u.cipher, desc); + break; + case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING: + err = prepare_sym_chain_op(vcrypto, op, vc_req, + &req->u.sym_req.u.chain, desc); + break; + } + if (unlikely(err != 0)) { + VC_LOG_ERR("Failed to process sym request"); + goto error_exit; + } + break; + default: + VC_LOG_ERR("Unsupported symmetric crypto request type %u", + req->header.opcode); + goto error_exit; + } + + return 0; + +error_exit: + + inhdr = reach_inhdr(vc_req, desc); + if (likely(inhdr != NULL)) + inhdr->status = (uint8_t)err; + + return -1; +} + +static __rte_always_inline struct vhost_virtqueue * +vhost_crypto_finalize_one_request(struct rte_crypto_op *op, + struct vhost_virtqueue *old_vq) +{ + struct rte_mbuf *m_src = op->sym->m_src; + struct rte_mbuf *m_dst = op->sym->m_dst; + struct vhost_crypto_data_req *vc_req = RTE_PTR_ADD(m_src, + sizeof(struct rte_mbuf)); + uint16_t desc_idx; + int ret = 0; + + if (unlikely(!vc_req)) { + VC_LOG_ERR("Failed to retrieve vc_req"); + return NULL; + } + + if (old_vq && (vc_req->vq != old_vq)) + return vc_req->vq; + + desc_idx = vc_req->desc_idx; + + if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) + vc_req->inhdr->status = VIRTIO_CRYPTO_ERR; + else { + if (vc_req->zero_copy == 0) { + ret = write_back_data(op, vc_req); + if (unlikely(ret != 0)) + vc_req->inhdr->status = VIRTIO_CRYPTO_ERR; + } + } + + vc_req->vq->used->ring[desc_idx].id = desc_idx; + vc_req->vq->used->ring[desc_idx].len = vc_req->len; + + rte_mempool_put(m_dst->pool, (void *)m_dst); + rte_mempool_put(m_src->pool, (void *)m_src); + + return vc_req->vq; +} + +static __rte_always_inline uint16_t +vhost_crypto_complete_one_vm_requests(struct rte_crypto_op **ops, + uint16_t nb_ops, int *callfd) +{ + uint16_t processed = 1; + struct vhost_virtqueue *vq, *tmp_vq; + + if (unlikely(nb_ops == 0)) + return 0; + + vq = vhost_crypto_finalize_one_request(ops[0], NULL); + if (unlikely(vq == NULL)) + return 0; + tmp_vq = vq; + + while ((processed < nb_ops)) { + tmp_vq = vhost_crypto_finalize_one_request(ops[processed], + tmp_vq); + + if (unlikely(vq != tmp_vq)) + break; + + processed++; + } + + *callfd = vq->callfd; + + *(volatile uint16_t *)&vq->used->idx += processed; + + return processed; +} + +int __rte_experimental +rte_vhost_crypto_create(int vid, uint8_t cryptodev_id, + struct rte_mempool *sess_pool, int socket_id) +{ + struct virtio_net *dev = get_device(vid); + struct rte_hash_parameters params = {0}; + struct vhost_crypto *vcrypto; + char name[128]; + int ret; + + if (!dev) { + VC_LOG_ERR("Invalid vid %i", vid); + return -EINVAL; + } + + ret = rte_vhost_driver_set_features(dev->ifname, + VIRTIO_CRYPTO_FEATURES); + if (ret < 0) { + VC_LOG_ERR("Error setting features"); + return -1; + } + + vcrypto = rte_zmalloc_socket(NULL, sizeof(*vcrypto), + RTE_CACHE_LINE_SIZE, socket_id); + if (!vcrypto) { + VC_LOG_ERR("Insufficient memory"); + return -ENOMEM; + } + + vcrypto->sess_pool = sess_pool; + vcrypto->cid = cryptodev_id; + vcrypto->cache_session_id = UINT64_MAX; + vcrypto->last_session_id = 1; + vcrypto->dev = dev; + vcrypto->option = RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE; + + snprintf(name, 127, "HASH_VHOST_CRYPT_%u", (uint32_t)vid); + params.name = name; + params.entries = VHOST_CRYPTO_SESSION_MAP_ENTRIES; + params.hash_func = rte_jhash; + params.key_len = sizeof(uint64_t); + params.socket_id = socket_id; + vcrypto->session_map = rte_hash_create(¶ms); + if (!vcrypto->session_map) { + VC_LOG_ERR("Failed to creath session map"); + ret = -ENOMEM; + goto error_exit; + } + + snprintf(name, 127, "MBUF_POOL_VM_%u", (uint32_t)vid); + vcrypto->mbuf_pool = rte_pktmbuf_pool_create(name, + VHOST_CRYPTO_MBUF_POOL_SIZE, 512, + sizeof(struct vhost_crypto_data_req), + RTE_MBUF_DEFAULT_DATAROOM * 2 + RTE_PKTMBUF_HEADROOM, + rte_socket_id()); + if (!vcrypto->mbuf_pool) { + VC_LOG_ERR("Failed to creath mbuf pool"); + ret = -ENOMEM; + goto error_exit; + } + + dev->extern_data = vcrypto; + dev->extern_ops.pre_msg_handle = NULL; + dev->extern_ops.post_msg_handle = vhost_crypto_msg_post_handler; + + return 0; + +error_exit: + if (vcrypto->session_map) + rte_hash_free(vcrypto->session_map); + if (vcrypto->mbuf_pool) + rte_mempool_free(vcrypto->mbuf_pool); + + rte_free(vcrypto); + + return ret; +} + +int __rte_experimental +rte_vhost_crypto_free(int vid) +{ + struct virtio_net *dev = get_device(vid); + struct vhost_crypto *vcrypto; + + if (unlikely(dev == NULL)) { + VC_LOG_ERR("Invalid vid %i", vid); + return -EINVAL; + } + + vcrypto = dev->extern_data; + if (unlikely(vcrypto == NULL)) { + VC_LOG_ERR("Cannot find required data, is it initialized?"); + return -ENOENT; + } + + rte_hash_free(vcrypto->session_map); + rte_mempool_free(vcrypto->mbuf_pool); + rte_free(vcrypto); + + dev->extern_data = NULL; + dev->extern_ops.pre_msg_handle = NULL; + dev->extern_ops.post_msg_handle = NULL; + + return 0; +} + +int __rte_experimental +rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy option) +{ + struct virtio_net *dev = get_device(vid); + struct vhost_crypto *vcrypto; + + if (unlikely(dev == NULL)) { + VC_LOG_ERR("Invalid vid %i", vid); + return -EINVAL; + } + + if (unlikely((uint32_t)option >= + RTE_VHOST_CRYPTO_MAX_ZERO_COPY_OPTIONS)) { + VC_LOG_ERR("Invalid option %i", option); + return -EINVAL; + } + + vcrypto = (struct vhost_crypto *)dev->extern_data; + if (unlikely(vcrypto == NULL)) { + VC_LOG_ERR("Cannot find required data, is it initialized?"); + return -ENOENT; + } + + if (vcrypto->option == (uint8_t)option) + return 0; + + if (!(rte_mempool_full(vcrypto->mbuf_pool))) { + VC_LOG_ERR("Cannot update zero copy as mempool is not full"); + return -EINVAL; + } + + vcrypto->option = (uint8_t)option; + + return 0; +} + +uint16_t __rte_experimental +rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, + struct rte_crypto_op **ops, uint16_t nb_ops) +{ + struct rte_mbuf *mbufs[VHOST_CRYPTO_MAX_BURST_SIZE * 2]; + struct virtio_net *dev = get_device(vid); + struct vhost_crypto *vcrypto; + struct vhost_virtqueue *vq; + uint16_t avail_idx; + uint16_t start_idx; + uint16_t required; + uint16_t count; + uint16_t i; + + if (unlikely(dev == NULL)) { + VC_LOG_ERR("Invalid vid %i", vid); + return -EINVAL; + } + + if (unlikely(qid >= VHOST_MAX_QUEUE_PAIRS)) { + VC_LOG_ERR("Invalid qid %u", qid); + return -EINVAL; + } + + vcrypto = (struct vhost_crypto *)dev->extern_data; + if (unlikely(vcrypto == NULL)) { + VC_LOG_ERR("Cannot find required data, is it initialized?"); + return -ENOENT; + } + + vq = dev->virtqueue[qid]; + + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + start_idx = vq->last_used_idx; + count = avail_idx - start_idx; + count = RTE_MIN(count, VHOST_CRYPTO_MAX_BURST_SIZE); + count = RTE_MIN(count, nb_ops); + + if (unlikely(count == 0)) + return 0; + + /* for zero copy, we need 2 empty mbufs for src and dst, otherwise + * we need only 1 mbuf as src and dst + */ + required = count * 2; + if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool, (void **)mbufs, + required) < 0)) { + VC_LOG_ERR("Insufficient memory"); + return -ENOMEM; + } + + for (i = 0; i < count; i++) { + uint16_t used_idx = (start_idx + i) & (vq->size - 1); + uint16_t desc_idx = vq->avail->ring[used_idx]; + struct vring_desc *head = &vq->desc[desc_idx]; + struct rte_crypto_op *op = ops[i]; + + op->sym->m_src = mbufs[i * 2]; + op->sym->m_dst = mbufs[i * 2 + 1]; + op->sym->m_src->data_off = 0; + op->sym->m_dst->data_off = 0; + + if (unlikely(vhost_crypto_process_one_req(vcrypto, vq, op, head, + desc_idx)) < 0) + break; + } + + vq->last_used_idx += i; + + return i; +} + +uint16_t __rte_experimental +rte_vhost_crypto_finalize_requests(struct rte_crypto_op **ops, + uint16_t nb_ops, int *callfds, uint16_t *nb_callfds) +{ + struct rte_crypto_op **tmp_ops = ops; + uint16_t count = 0, left = nb_ops; + int callfd; + uint16_t idx = 0; + + while (left) { + count = vhost_crypto_complete_one_vm_requests(tmp_ops, left, + &callfd); + if (unlikely(count == 0)) + break; + + tmp_ops = &tmp_ops[count]; + left -= count; + + callfds[idx++] = callfd; + + if (unlikely(idx >= VIRTIO_CRYPTO_MAX_NUM_BURST_VQS)) { + VC_LOG_ERR("Too many vqs"); + break; + } + } + + *nb_callfds = idx; + + return nb_ops - left; +} diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 2d008fb0..947290fc 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -1,5 +1,22 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2016 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation + */ + +/* Security model + * -------------- + * The vhost-user protocol connection is an external interface, so it must be + * robust against invalid inputs. + * + * This is important because the vhost-user master is only one step removed + * from the guest. Malicious guests that have escaped will then launch further + * attacks from the vhost-user master. + * + * Even in deployments where guests are trusted, a bug in the vhost-user master + * can still cause invalid messages to be sent. Such messages must not + * compromise the stability of the DPDK application by causing crashes, memory + * corruption, or other problematic behavior. + * + * Do not assume received VhostUserMsg fields contain sensible values! */ #include <stdint.h> @@ -50,6 +67,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_NET_SET_MTU] = "VHOST_USER_NET_SET_MTU", [VHOST_USER_SET_SLAVE_REQ_FD] = "VHOST_USER_SET_SLAVE_REQ_FD", [VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG", + [VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS", + [VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS", }; static uint64_t @@ -116,7 +135,14 @@ vhost_user_set_owner(void) static int vhost_user_reset_owner(struct virtio_net *dev) { + struct rte_vdpa_device *vdpa_dev; + int did = -1; + if (dev->flags & VIRTIO_DEV_RUNNING) { + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && vdpa_dev->ops->dev_close) + vdpa_dev->ops->dev_close(dev->vid); dev->flags &= ~VIRTIO_DEV_RUNNING; dev->notify_ops->destroy_device(dev->vid); } @@ -139,12 +165,26 @@ vhost_user_get_features(struct virtio_net *dev) } /* + * The queue number that we support are requested. + */ +static uint32_t +vhost_user_get_queue_num(struct virtio_net *dev) +{ + uint32_t queue_num = 0; + + rte_vhost_driver_get_queue_num(dev->ifname, &queue_num); + return queue_num; +} + +/* * We receive the negotiated features supported by us and the virtio device. */ static int vhost_user_set_features(struct virtio_net *dev, uint64_t features) { uint64_t vhost_features = 0; + struct rte_vdpa_device *vdpa_dev; + int did = -1; rte_vhost_driver_get_features(dev->ifname, &vhost_features); if (features & ~vhost_features) { @@ -181,7 +221,7 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features) } else { dev->vhost_hlen = sizeof(struct virtio_net_hdr); } - LOG_DEBUG(VHOST_CONFIG, + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mergeable RX buffers %s, virtio 1 %s\n", dev->vid, (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", @@ -207,6 +247,11 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features) } } + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && vdpa_dev->ops->set_features) + vdpa_dev->ops->set_features(dev->vid); + return 0; } @@ -221,6 +266,17 @@ vhost_user_set_vring_num(struct virtio_net *dev, vq->size = msg->payload.state.num; + /* VIRTIO 1.0, 2.4 Virtqueues says: + * + * Queue Size value is always a power of 2. The maximum Queue Size + * value is 32768. + */ + if ((vq->size & (vq->size - 1)) || vq->size > 32768) { + RTE_LOG(ERR, VHOST_CONFIG, + "invalid virtqueue size %u\n", vq->size); + return -1; + } + if (dev->dequeue_zero_copy) { vq->nr_zmbuf = 0; vq->last_zmbuf_idx = 0; @@ -474,13 +530,13 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) vq->log_guest_addr = addr->log_guest_addr; - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", dev->vid, vq->desc); - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n", dev->vid, vq->avail); - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n", dev->vid, vq->used); - LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", dev->vid, vq->log_guest_addr); return dev; @@ -513,7 +569,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg) if (vq->enabled && (dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) { - dev = translate_ring_addresses(dev, msg->payload.state.index); + dev = translate_ring_addresses(dev, msg->payload.addr.index); if (!dev) return -1; @@ -538,7 +594,7 @@ vhost_user_set_vring_base(struct virtio_net *dev, return 0; } -static void +static int add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, uint64_t host_phys_addr, uint64_t size) { @@ -548,6 +604,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, dev->max_guest_pages *= 2; dev->guest_pages = realloc(dev->guest_pages, dev->max_guest_pages * sizeof(*page)); + if (!dev->guest_pages) { + RTE_LOG(ERR, VHOST_CONFIG, "cannot realloc guest_pages\n"); + return -1; + } } if (dev->nr_guest_pages > 0) { @@ -556,7 +616,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, if (host_phys_addr == last_page->host_phys_addr + last_page->size) { last_page->size += size; - return; + return 0; } } @@ -564,9 +624,11 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, page->guest_phys_addr = guest_phys_addr; page->host_phys_addr = host_phys_addr; page->size = size; + + return 0; } -static void +static int add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, uint64_t page_size) { @@ -580,7 +642,9 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, size = page_size - (guest_phys_addr & (page_size - 1)); size = RTE_MIN(size, reg_size); - add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0) + return -1; + host_user_addr += size; guest_phys_addr += size; reg_size -= size; @@ -589,12 +653,16 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, size = RTE_MIN(reg_size, page_size); host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t) host_user_addr); - add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, + size) < 0) + return -1; host_user_addr += size; guest_phys_addr += size; reg_size -= size; } + + return 0; } #ifdef RTE_LIBRTE_VHOST_DEBUG @@ -648,8 +716,9 @@ vhost_memory_changed(struct VhostUserMemory *new, } static int -vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) +vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg) { + struct virtio_net *dev = *pdev; struct VhostUserMemory memory = pmsg->payload.memory; struct rte_vhost_mem_region *reg; void *mmap_addr; @@ -657,8 +726,15 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) uint64_t mmap_offset; uint64_t alignment; uint32_t i; + int populate; int fd; + if (memory.nregions > VHOST_MEMORY_MAX_NREGIONS) { + RTE_LOG(ERR, VHOST_CONFIG, + "too many memory regions (%u)\n", memory.nregions); + return -1; + } + if (dev->mem && !vhost_memory_changed(&memory, dev->mem)) { RTE_LOG(INFO, VHOST_CONFIG, "(%d) memory regions not changed\n", dev->vid); @@ -709,7 +785,17 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) reg->fd = fd; mmap_offset = memory.regions[i].mmap_offset; - mmap_size = reg->size + mmap_offset; + + /* Check for memory_size + mmap_offset overflow */ + if (mmap_offset >= -reg->size) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap_offset (%#"PRIx64") and memory_size " + "(%#"PRIx64") overflow\n", + mmap_offset, reg->size); + goto err_mmap; + } + + mmap_size = reg->size + mmap_offset; /* mmap() without flag of MAP_ANONYMOUS, should be called * with length argument aligned with hugepagesz at older @@ -727,8 +813,9 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) } mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment); + populate = (dev->dequeue_zero_copy) ? MAP_POPULATE : 0; mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, 0); + MAP_SHARED | populate, fd, 0); if (mmap_addr == MAP_FAILED) { RTE_LOG(ERR, VHOST_CONFIG, @@ -742,7 +829,12 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) mmap_offset; if (dev->dequeue_zero_copy) - add_guest_pages(dev, reg, alignment); + if (add_guest_pages(dev, reg, alignment) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "adding guest pages to region %u failed.\n", + i); + goto err_mmap; + } RTE_LOG(INFO, VHOST_CONFIG, "guest memory region %u, size: 0x%" PRIx64 "\n" @@ -763,6 +855,25 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) mmap_offset); } + for (i = 0; i < dev->nr_vring; i++) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + + if (vq->desc || vq->avail || vq->used) { + /* + * If the memory table got updated, the ring addresses + * need to be translated again as virtual addresses have + * changed. + */ + vring_invalidate(dev, vq); + + dev = translate_ring_addresses(dev, i); + if (!dev) + return -1; + + *pdev = dev; + } + } + dump_guest_pages(dev); return 0; @@ -885,17 +996,24 @@ vhost_user_get_vring_base(struct virtio_net *dev, VhostUserMsg *msg) { struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; + struct rte_vdpa_device *vdpa_dev; + int did = -1; /* We have to stop the queue (virtio) if it is running. */ if (dev->flags & VIRTIO_DEV_RUNNING) { + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && vdpa_dev->ops->dev_close) + vdpa_dev->ops->dev_close(dev->vid); dev->flags &= ~VIRTIO_DEV_RUNNING; dev->notify_ops->destroy_device(dev->vid); } dev->flags &= ~VIRTIO_DEV_READY; + dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED; - /* Here we are safe to get the last used index */ - msg->payload.state.num = vq->last_used_idx; + /* Here we are safe to get the last avail index */ + msg->payload.state.num = vq->last_avail_idx; RTE_LOG(INFO, VHOST_CONFIG, "vring base idx:%d file:%d\n", msg->payload.state.index, @@ -910,6 +1028,11 @@ vhost_user_get_vring_base(struct virtio_net *dev, vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; + if (vq->callfd >= 0) + close(vq->callfd); + + vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; + if (dev->dequeue_zero_copy) free_zmbufs(vq); rte_free(vq->shadow_used_ring); @@ -930,16 +1053,24 @@ vhost_user_set_vring_enable(struct virtio_net *dev, VhostUserMsg *msg) { int enable = (int)msg->payload.state.num; + int index = (int)msg->payload.state.index; + struct rte_vdpa_device *vdpa_dev; + int did = -1; RTE_LOG(INFO, VHOST_CONFIG, "set queue enable: %d to qp idx: %d\n", - enable, msg->payload.state.index); + enable, index); + + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && vdpa_dev->ops->set_vring_state) + vdpa_dev->ops->set_vring_state(dev->vid, index, enable); if (dev->notify_ops->vring_state_changed) dev->notify_ops->vring_state_changed(dev->vid, - msg->payload.state.index, enable); + index, enable); - dev->virtqueue[msg->payload.state.index]->enabled = enable; + dev->virtqueue[index]->enabled = enable; return 0; } @@ -948,9 +1079,10 @@ static void vhost_user_get_protocol_features(struct virtio_net *dev, struct VhostUserMsg *msg) { - uint64_t features, protocol_features = VHOST_USER_PROTOCOL_FEATURES; + uint64_t features, protocol_features; rte_vhost_driver_get_features(dev->ifname, &features); + rte_vhost_driver_get_protocol_features(dev->ifname, &protocol_features); /* * REPLY_ACK protocol feature is only mandatory for now @@ -996,6 +1128,15 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg) size = msg->payload.log.mmap_size; off = msg->payload.log.mmap_offset; + + /* Don't allow mmap_offset to point outside the mmap region */ + if (off > size) { + RTE_LOG(ERR, VHOST_CONFIG, + "log offset %#"PRIx64" exceeds log size %#"PRIx64"\n", + off, size); + return -1; + } + RTE_LOG(INFO, VHOST_CONFIG, "log mmap size: %"PRId64", offset: %"PRId64"\n", size, off); @@ -1004,7 +1145,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg) * mmap from 0 to workaround a hugepage mmap bug: mmap will * fail when offset is not page size aligned. */ - addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); if (addr == MAP_FAILED) { RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n"); @@ -1037,6 +1178,8 @@ static int vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg) { uint8_t *mac = (uint8_t *)&msg->payload.u64; + struct rte_vdpa_device *vdpa_dev; + int did = -1; RTE_LOG(DEBUG, VHOST_CONFIG, ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n", @@ -1052,6 +1195,10 @@ vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg) */ rte_smp_wmb(); rte_atomic16_set(&dev->broadcast_rarp, 1); + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && vdpa_dev->ops->migration_done) + vdpa_dev->ops->migration_done(dev->vid); return 0; } @@ -1214,13 +1361,13 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg) } static int -send_vhost_message(int sockfd, struct VhostUserMsg *msg) +send_vhost_message(int sockfd, struct VhostUserMsg *msg, int *fds, int fd_num) { if (!msg) return 0; return send_fd_message(sockfd, (char *)msg, - VHOST_USER_HDR_SIZE + msg->size, NULL, 0); + VHOST_USER_HDR_SIZE + msg->size, fds, fd_num); } static int @@ -1234,7 +1381,7 @@ send_vhost_reply(int sockfd, struct VhostUserMsg *msg) msg->flags |= VHOST_USER_VERSION; msg->flags |= VHOST_USER_REPLY_MASK; - return send_vhost_message(sockfd, msg); + return send_vhost_message(sockfd, msg, NULL, 0); } /* @@ -1314,8 +1461,11 @@ vhost_user_msg_handler(int vid, int fd) { struct virtio_net *dev; struct VhostUserMsg msg; + struct rte_vdpa_device *vdpa_dev; + int did = -1; int ret; int unlock_required = 0; + uint32_t skip_master = 0; dev = get_device(vid); if (dev == NULL) @@ -1393,6 +1543,21 @@ vhost_user_msg_handler(int vid, int fd) } + if (dev->extern_ops.pre_msg_handle) { + uint32_t need_reply; + + ret = (*dev->extern_ops.pre_msg_handle)(dev->vid, + (void *)&msg, &need_reply, &skip_master); + if (ret < 0) + goto skip_to_reply; + + if (need_reply) + send_vhost_reply(fd, &msg); + + if (skip_master) + goto skip_to_post_handle; + } + switch (msg.request.master) { case VHOST_USER_GET_FEATURES: msg.payload.u64 = vhost_user_get_features(dev); @@ -1421,7 +1586,7 @@ vhost_user_msg_handler(int vid, int fd) break; case VHOST_USER_SET_MEM_TABLE: - ret = vhost_user_set_mem_table(dev, &msg); + ret = vhost_user_set_mem_table(&dev, &msg); break; case VHOST_USER_SET_LOG_BASE: @@ -1466,7 +1631,7 @@ vhost_user_msg_handler(int vid, int fd) break; case VHOST_USER_GET_QUEUE_NUM: - msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS; + msg.payload.u64 = (uint64_t)vhost_user_get_queue_num(dev); msg.size = sizeof(msg.payload.u64); send_vhost_reply(fd, &msg); break; @@ -1493,9 +1658,22 @@ vhost_user_msg_handler(int vid, int fd) default: ret = -1; break; + } + +skip_to_post_handle: + if (dev->extern_ops.post_msg_handle) { + uint32_t need_reply; + ret = (*dev->extern_ops.post_msg_handle)( + dev->vid, (void *)&msg, &need_reply); + if (ret < 0) + goto skip_to_reply; + + if (need_reply) + send_vhost_reply(fd, &msg); } +skip_to_reply: if (unlock_required) vhost_user_unlock_all_queue_pairs(dev); @@ -1519,6 +1697,16 @@ vhost_user_msg_handler(int vid, int fd) } } + did = dev->vdpa_dev_id; + vdpa_dev = rte_vdpa_get_device(did); + if (vdpa_dev && virtio_is_ready(dev) && + !(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED) && + msg.request.master == VHOST_USER_SET_VRING_ENABLE) { + if (vdpa_dev->ops->dev_conf) + vdpa_dev->ops->dev_conf(dev->vid); + dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED; + } + return 0; } @@ -1537,7 +1725,7 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm) }, }; - ret = send_vhost_message(dev->slave_req_fd, &msg); + ret = send_vhost_message(dev->slave_req_fd, &msg, NULL, 0); if (ret < 0) { RTE_LOG(ERR, VHOST_CONFIG, "Failed to send IOTLB miss message (%d)\n", diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h index d4bd604b..1ad5cf46 100644 --- a/lib/librte_vhost/vhost_user.h +++ b/lib/librte_vhost/vhost_user.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ #ifndef _VHOST_NET_USER_H @@ -14,19 +14,13 @@ #define VHOST_MEMORY_MAX_NREGIONS 8 -#define VHOST_USER_PROTOCOL_F_MQ 0 -#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 -#define VHOST_USER_PROTOCOL_F_RARP 2 -#define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 -#define VHOST_USER_PROTOCOL_F_NET_MTU 4 -#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 - #define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\ (1ULL << VHOST_USER_PROTOCOL_F_RARP) | \ (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \ (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \ - (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ)) + (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \ + (1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION)) typedef enum VhostUserRequest { VHOST_USER_NONE = 0, @@ -52,7 +46,9 @@ typedef enum VhostUserRequest { VHOST_USER_NET_SET_MTU = 20, VHOST_USER_SET_SLAVE_REQ_FD = 21, VHOST_USER_IOTLB_MSG = 22, - VHOST_USER_MAX + VHOST_USER_CRYPTO_CREATE_SESS = 26, + VHOST_USER_CRYPTO_CLOSE_SESS = 27, + VHOST_USER_MAX = 28 } VhostUserRequest; typedef enum VhostUserSlaveRequest { @@ -79,10 +75,34 @@ typedef struct VhostUserLog { uint64_t mmap_offset; } VhostUserLog; +/* Comply with Cryptodev-Linux */ +#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH 512 +#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH 64 + +/* Same structure as vhost-user backend session info */ +typedef struct VhostUserCryptoSessionParam { + int64_t session_id; + uint32_t op_code; + uint32_t cipher_algo; + uint32_t cipher_key_len; + uint32_t hash_algo; + uint32_t digest_len; + uint32_t auth_key_len; + uint32_t aad_len; + uint8_t op_type; + uint8_t dir; + uint8_t hash_mode; + uint8_t chaining_dir; + uint8_t *ciphe_key; + uint8_t *auth_key; + uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH]; + uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH]; +} VhostUserCryptoSessionParam; + typedef struct VhostUserMsg { union { - VhostUserRequest master; - VhostUserSlaveRequest slave; + uint32_t master; /* a VhostUserRequest value */ + uint32_t slave; /* a VhostUserSlaveRequest value*/ } request; #define VHOST_USER_VERSION_MASK 0x3 @@ -99,6 +119,7 @@ typedef struct VhostUserMsg { VhostUserMemory memory; VhostUserLog log; struct vhost_iotlb_msg iotlb; + VhostUserCryptoSessionParam crypto_session; } payload; int fds[VHOST_MEMORY_MAX_NREGIONS]; } __attribute((packed)) VhostUserMsg; diff --git a/lib/librte_vhost/virtio_crypto.h b/lib/librte_vhost/virtio_crypto.h new file mode 100644 index 00000000..e3b93573 --- /dev/null +++ b/lib/librte_vhost/virtio_crypto.h @@ -0,0 +1,422 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 HUAWEI TECHNOLOGIES CO., LTD. + */ + +#ifndef _VIRTIO_CRYPTO_H +#define _VIRTIO_CRYPTO_H + +#define VIRTIO_CRYPTO_SERVICE_CIPHER 0 +#define VIRTIO_CRYPTO_SERVICE_HASH 1 +#define VIRTIO_CRYPTO_SERVICE_MAC 2 +#define VIRTIO_CRYPTO_SERVICE_AEAD 3 + +#define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) + +struct virtio_crypto_ctrl_header { +#define VIRTIO_CRYPTO_CIPHER_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x02) +#define VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x03) +#define VIRTIO_CRYPTO_HASH_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x02) +#define VIRTIO_CRYPTO_HASH_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x03) +#define VIRTIO_CRYPTO_MAC_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x02) +#define VIRTIO_CRYPTO_MAC_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x03) +#define VIRTIO_CRYPTO_AEAD_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) +#define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) + uint32_t opcode; + uint32_t algo; + uint32_t flag; + /* data virtqueue id */ + uint32_t queue_id; +}; + +struct virtio_crypto_cipher_session_para { +#define VIRTIO_CRYPTO_NO_CIPHER 0 +#define VIRTIO_CRYPTO_CIPHER_ARC4 1 +#define VIRTIO_CRYPTO_CIPHER_AES_ECB 2 +#define VIRTIO_CRYPTO_CIPHER_AES_CBC 3 +#define VIRTIO_CRYPTO_CIPHER_AES_CTR 4 +#define VIRTIO_CRYPTO_CIPHER_DES_ECB 5 +#define VIRTIO_CRYPTO_CIPHER_DES_CBC 6 +#define VIRTIO_CRYPTO_CIPHER_3DES_ECB 7 +#define VIRTIO_CRYPTO_CIPHER_3DES_CBC 8 +#define VIRTIO_CRYPTO_CIPHER_3DES_CTR 9 +#define VIRTIO_CRYPTO_CIPHER_KASUMI_F8 10 +#define VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2 11 +#define VIRTIO_CRYPTO_CIPHER_AES_F8 12 +#define VIRTIO_CRYPTO_CIPHER_AES_XTS 13 +#define VIRTIO_CRYPTO_CIPHER_ZUC_EEA3 14 + uint32_t algo; + /* length of key */ + uint32_t keylen; + +#define VIRTIO_CRYPTO_OP_ENCRYPT 1 +#define VIRTIO_CRYPTO_OP_DECRYPT 2 + /* encrypt or decrypt */ + uint32_t op; + uint32_t padding; +}; + +struct virtio_crypto_session_input { + /* Device-writable part */ + uint64_t session_id; + uint32_t status; + uint32_t padding; +}; + +struct virtio_crypto_cipher_session_req { + struct virtio_crypto_cipher_session_para para; + uint8_t padding[32]; +}; + +struct virtio_crypto_hash_session_para { +#define VIRTIO_CRYPTO_NO_HASH 0 +#define VIRTIO_CRYPTO_HASH_MD5 1 +#define VIRTIO_CRYPTO_HASH_SHA1 2 +#define VIRTIO_CRYPTO_HASH_SHA_224 3 +#define VIRTIO_CRYPTO_HASH_SHA_256 4 +#define VIRTIO_CRYPTO_HASH_SHA_384 5 +#define VIRTIO_CRYPTO_HASH_SHA_512 6 +#define VIRTIO_CRYPTO_HASH_SHA3_224 7 +#define VIRTIO_CRYPTO_HASH_SHA3_256 8 +#define VIRTIO_CRYPTO_HASH_SHA3_384 9 +#define VIRTIO_CRYPTO_HASH_SHA3_512 10 +#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE128 11 +#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE256 12 + uint32_t algo; + /* hash result length */ + uint32_t hash_result_len; + uint8_t padding[8]; +}; + +struct virtio_crypto_hash_create_session_req { + struct virtio_crypto_hash_session_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_mac_session_para { +#define VIRTIO_CRYPTO_NO_MAC 0 +#define VIRTIO_CRYPTO_MAC_HMAC_MD5 1 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA1 2 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_224 3 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_256 4 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_384 5 +#define VIRTIO_CRYPTO_MAC_HMAC_SHA_512 6 +#define VIRTIO_CRYPTO_MAC_CMAC_3DES 25 +#define VIRTIO_CRYPTO_MAC_CMAC_AES 26 +#define VIRTIO_CRYPTO_MAC_KASUMI_F9 27 +#define VIRTIO_CRYPTO_MAC_SNOW3G_UIA2 28 +#define VIRTIO_CRYPTO_MAC_GMAC_AES 41 +#define VIRTIO_CRYPTO_MAC_GMAC_TWOFISH 42 +#define VIRTIO_CRYPTO_MAC_CBCMAC_AES 49 +#define VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9 50 +#define VIRTIO_CRYPTO_MAC_XCBC_AES 53 + uint32_t algo; + /* hash result length */ + uint32_t hash_result_len; + /* length of authenticated key */ + uint32_t auth_key_len; + uint32_t padding; +}; + +struct virtio_crypto_mac_create_session_req { + struct virtio_crypto_mac_session_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_aead_session_para { +#define VIRTIO_CRYPTO_NO_AEAD 0 +#define VIRTIO_CRYPTO_AEAD_GCM 1 +#define VIRTIO_CRYPTO_AEAD_CCM 2 +#define VIRTIO_CRYPTO_AEAD_CHACHA20_POLY1305 3 + uint32_t algo; + /* length of key */ + uint32_t key_len; + /* hash result length */ + uint32_t hash_result_len; + /* length of the additional authenticated data (AAD) in bytes */ + uint32_t aad_len; + /* encrypt or decrypt, See above VIRTIO_CRYPTO_OP_* */ + uint32_t op; + uint32_t padding; +}; + +struct virtio_crypto_aead_create_session_req { + struct virtio_crypto_aead_session_para para; + uint8_t padding[32]; +}; + +struct virtio_crypto_alg_chain_session_para { +#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 +#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 + uint32_t alg_chain_order; +/* Plain hash */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN 1 +/* Authenticated hash (mac) */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH 2 +/* Nested hash */ +#define VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED 3 + uint32_t hash_mode; + struct virtio_crypto_cipher_session_para cipher_param; + union { + struct virtio_crypto_hash_session_para hash_param; + struct virtio_crypto_mac_session_para mac_param; + uint8_t padding[16]; + } u; + /* length of the additional authenticated data (AAD) in bytes */ + uint32_t aad_len; + uint32_t padding; +}; + +struct virtio_crypto_alg_chain_session_req { + struct virtio_crypto_alg_chain_session_para para; +}; + +struct virtio_crypto_sym_create_session_req { + union { + struct virtio_crypto_cipher_session_req cipher; + struct virtio_crypto_alg_chain_session_req chain; + uint8_t padding[48]; + } u; + + /* Device-readable part */ + +/* No operation */ +#define VIRTIO_CRYPTO_SYM_OP_NONE 0 +/* Cipher only operation on the data */ +#define VIRTIO_CRYPTO_SYM_OP_CIPHER 1 +/* + * Chain any cipher with any hash or mac operation. The order + * depends on the value of alg_chain_order param + */ +#define VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING 2 + uint32_t op_type; + uint32_t padding; +}; + +struct virtio_crypto_destroy_session_req { + /* Device-readable part */ + uint64_t session_id; + uint8_t padding[48]; +}; + +/* The request of the control virtqueue's packet */ +struct virtio_crypto_op_ctrl_req { + struct virtio_crypto_ctrl_header header; + + union { + struct virtio_crypto_sym_create_session_req + sym_create_session; + struct virtio_crypto_hash_create_session_req + hash_create_session; + struct virtio_crypto_mac_create_session_req + mac_create_session; + struct virtio_crypto_aead_create_session_req + aead_create_session; + struct virtio_crypto_destroy_session_req + destroy_session; + uint8_t padding[56]; + } u; +}; + +struct virtio_crypto_op_header { +#define VIRTIO_CRYPTO_CIPHER_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x00) +#define VIRTIO_CRYPTO_CIPHER_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x01) +#define VIRTIO_CRYPTO_HASH \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x00) +#define VIRTIO_CRYPTO_MAC \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x00) +#define VIRTIO_CRYPTO_AEAD_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) +#define VIRTIO_CRYPTO_AEAD_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) + uint32_t opcode; + /* algo should be service-specific algorithms */ + uint32_t algo; + /* session_id should be service-specific algorithms */ + uint64_t session_id; + /* control flag to control the request */ + uint32_t flag; + uint32_t padding; +}; + +struct virtio_crypto_cipher_para { + /* + * Byte Length of valid IV/Counter + * + * For block ciphers in CBC or F8 mode, or for Kasumi in F8 mode, or for + * SNOW3G in UEA2 mode, this is the length of the IV (which + * must be the same as the block length of the cipher). + * For block ciphers in CTR mode, this is the length of the counter + * (which must be the same as the block length of the cipher). + * For AES-XTS, this is the 128bit tweak, i, from IEEE Std 1619-2007. + * + * The IV/Counter will be updated after every partial cryptographic + * operation. + */ + uint32_t iv_len; + /* length of source data */ + uint32_t src_data_len; + /* length of dst data */ + uint32_t dst_data_len; + uint32_t padding; +}; + +struct virtio_crypto_hash_para { + /* length of source data */ + uint32_t src_data_len; + /* hash result length */ + uint32_t hash_result_len; +}; + +struct virtio_crypto_mac_para { + struct virtio_crypto_hash_para hash; +}; + +struct virtio_crypto_aead_para { + /* + * Byte Length of valid IV data pointed to by the below iv_addr + * parameter. + * + * For GCM mode, this is either 12 (for 96-bit IVs) or 16, in which + * case iv_addr points to J0. + * For CCM mode, this is the length of the nonce, which can be in the + * range 7 to 13 inclusive. + */ + uint32_t iv_len; + /* length of additional auth data */ + uint32_t aad_len; + /* length of source data */ + uint32_t src_data_len; + /* length of dst data */ + uint32_t dst_data_len; +}; + +struct virtio_crypto_cipher_data_req { + /* Device-readable part */ + struct virtio_crypto_cipher_para para; + uint8_t padding[24]; +}; + +struct virtio_crypto_hash_data_req { + /* Device-readable part */ + struct virtio_crypto_hash_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_mac_data_req { + /* Device-readable part */ + struct virtio_crypto_mac_para para; + uint8_t padding[40]; +}; + +struct virtio_crypto_alg_chain_data_para { + uint32_t iv_len; + /* Length of source data */ + uint32_t src_data_len; + /* Length of destination data */ + uint32_t dst_data_len; + /* Starting point for cipher processing in source data */ + uint32_t cipher_start_src_offset; + /* Length of the source data that the cipher will be computed on */ + uint32_t len_to_cipher; + /* Starting point for hash processing in source data */ + uint32_t hash_start_src_offset; + /* Length of the source data that the hash will be computed on */ + uint32_t len_to_hash; + /* Length of the additional auth data */ + uint32_t aad_len; + /* Length of the hash result */ + uint32_t hash_result_len; + uint32_t reserved; +}; + +struct virtio_crypto_alg_chain_data_req { + /* Device-readable part */ + struct virtio_crypto_alg_chain_data_para para; +}; + +struct virtio_crypto_sym_data_req { + union { + struct virtio_crypto_cipher_data_req cipher; + struct virtio_crypto_alg_chain_data_req chain; + uint8_t padding[40]; + } u; + + /* See above VIRTIO_CRYPTO_SYM_OP_* */ + uint32_t op_type; + uint32_t padding; +}; + +struct virtio_crypto_aead_data_req { + /* Device-readable part */ + struct virtio_crypto_aead_para para; + uint8_t padding[32]; +}; + +/* The request of the data virtqueue's packet */ +struct virtio_crypto_op_data_req { + struct virtio_crypto_op_header header; + + union { + struct virtio_crypto_sym_data_req sym_req; + struct virtio_crypto_hash_data_req hash_req; + struct virtio_crypto_mac_data_req mac_req; + struct virtio_crypto_aead_data_req aead_req; + uint8_t padding[48]; + } u; +}; + +#define VIRTIO_CRYPTO_OK 0 +#define VIRTIO_CRYPTO_ERR 1 +#define VIRTIO_CRYPTO_BADMSG 2 +#define VIRTIO_CRYPTO_NOTSUPP 3 +#define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ + +/* The accelerator hardware is ready */ +#define VIRTIO_CRYPTO_S_HW_READY (1 << 0) + +struct virtio_crypto_config { + /* See VIRTIO_CRYPTO_OP_* above */ + uint32_t status; + + /* + * Maximum number of data queue + */ + uint32_t max_dataqueues; + + /* + * Specifies the services mask which the device support, + * see VIRTIO_CRYPTO_SERVICE_* above + */ + uint32_t crypto_services; + + /* Detailed algorithms mask */ + uint32_t cipher_algo_l; + uint32_t cipher_algo_h; + uint32_t hash_algo; + uint32_t mac_algo_l; + uint32_t mac_algo_h; + uint32_t aead_algo; + /* Maximum length of cipher key */ + uint32_t max_cipher_key_len; + /* Maximum length of authenticated key */ + uint32_t max_auth_key_len; + uint32_t reserve; + /* Maximum size of each crypto request's content */ + uint64_t max_size; +}; + +struct virtio_crypto_inhdr { + /* See VIRTIO_CRYPTO_* above */ + uint8_t status; +}; +#endif /* _VIRTIO_CRYPTO_H */ diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 3d08ddf0..76ec5f08 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -78,7 +78,7 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_memcpy(&vq->used->ring[to], &vq->shadow_used_ring[from], size * sizeof(struct vring_used_elem)); - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[to]), size * sizeof(struct vring_used_elem)); } @@ -106,6 +106,8 @@ flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq) rte_smp_wmb(); + vhost_log_cache_sync(dev, vq); + *(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); @@ -130,7 +132,7 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) for (i = 0; i < count; i++) { rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); - vhost_log_write(dev, elem[i].log_addr, elem[i].len); + vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len); PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); } } @@ -251,7 +253,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, virtio_enqueue_offload(m, (struct virtio_net_hdr *)(uintptr_t)desc_addr); PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); - vhost_log_write(dev, desc_gaddr, dev->vhost_hlen); + vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen); } else { struct virtio_net_hdr vnet_hdr; uint64_t remain = dev->vhost_hlen; @@ -273,11 +275,11 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len); - PRINT_PACKET(dev, (uintptr_t)dst, len, 0); - vhost_log_write(dev, guest_addr, len); + PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0); + vhost_log_cache_write(dev, vq, guest_addr, len); remain -= len; guest_addr += len; - dst += len; + src += len; } } @@ -355,7 +357,8 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, desc_offset)), rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), cpy_len); - vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len); + vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset, + cpy_len); PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), cpy_len, 0); } else { @@ -399,7 +402,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, uint16_t used_idx; uint32_t i, sz; - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); @@ -431,7 +434,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, if (count == 0) goto out; - LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", dev->vid, start_idx, start_idx + count); vq->batch_copy_nb_elems = 0; @@ -444,7 +447,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, vq->used->ring[used_idx].id = desc_indexes[i]; vq->used->ring[used_idx].len = pkts[i]->pkt_len + dev->vhost_hlen; - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[used_idx]), sizeof(vq->used->ring[used_idx])); } @@ -504,6 +507,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, rte_smp_wmb(); + vhost_log_cache_sync(dev, vq); + *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx += count; vhost_log_used_vring(dev, vq, @@ -675,7 +680,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, hdr_phys_addr = desc_gaddr; rte_prefetch0((void *)(uintptr_t)hdr_addr); - LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", dev->vid, num_buffers); desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; @@ -766,17 +771,18 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, len); PRINT_PACKET(dev, (uintptr_t)dst, - len, 0); - vhost_log_write(dev, guest_addr, len); + (uint32_t)len, 0); + vhost_log_cache_write(dev, vq, + guest_addr, len); remain -= len; guest_addr += len; - dst += len; + src += len; } } else { PRINT_PACKET(dev, (uintptr_t)hdr_addr, dev->vhost_hlen, 0); - vhost_log_write(dev, hdr_phys_addr, + vhost_log_cache_write(dev, vq, hdr_phys_addr, dev->vhost_hlen); } @@ -790,7 +796,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, desc_offset)), rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), cpy_len); - vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len); + vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset, + cpy_len); PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), cpy_len, 0); } else { @@ -826,7 +833,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint16_t avail_head; - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) { RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); @@ -863,14 +870,14 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, if (unlikely(reserve_avail_buf_mergeable(dev, vq, pkt_len, buf_vec, &num_buffers, avail_head) < 0)) { - LOG_DEBUG(VHOST_DATA, + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) failed to get enough desc from vring\n", dev->vid); vq->shadow_used_idx -= num_buffers; break; } - LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", dev->vid, vq->last_avail_idx, vq->last_avail_idx + num_buffers); @@ -1170,7 +1177,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), - desc_chunck_len, 0); + (uint32_t)desc_chunck_len, 0); mbuf_offset = 0; mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; @@ -1258,7 +1265,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, desc_avail = desc->len; PRINT_PACKET(dev, (uintptr_t)desc_addr, - desc_chunck_len, 0); + (uint32_t)desc_chunck_len, 0); } else if (unlikely(desc_chunck_len == 0)) { desc_chunck_len = desc_avail; desc_gaddr += desc_offset; @@ -1273,7 +1280,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, desc_offset = 0; PRINT_PACKET(dev, (uintptr_t)desc_addr, - desc_chunck_len, 0); + (uint32_t)desc_chunck_len, 0); } /* @@ -1320,7 +1327,7 @@ update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, { vq->used->ring[used_idx].id = desc_idx; vq->used->ring[used_idx].len = 0; - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[used_idx]), sizeof(vq->used->ring[used_idx])); } @@ -1335,6 +1342,8 @@ update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_smp_wmb(); rte_smp_rmb(); + vhost_log_cache_sync(dev, vq); + vq->used->idx += count; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); @@ -1506,7 +1515,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, if (free_entries == 0) goto out; - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); /* Prefetch available and used ring */ avail_idx = vq->last_avail_idx & (vq->size - 1); @@ -1516,7 +1525,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, count = RTE_MIN(count, MAX_PKT_BURST); count = RTE_MIN(count, free_entries); - LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", dev->vid, count); /* Retrieve all of the head indexes first to avoid caching issues. */ |