diff options
Diffstat (limited to 'lib/librte_vhost')
-rw-r--r-- | lib/librte_vhost/Makefile | 3 | ||||
-rw-r--r-- | lib/librte_vhost/rte_vhost_version.map | 10 | ||||
-rw-r--r-- | lib/librte_vhost/rte_virtio_net.h | 233 | ||||
-rw-r--r-- | lib/librte_vhost/vhost-net.h | 200 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 91 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 30 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 12 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_rxtx.c | 350 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user/vhost-net-user.c | 450 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user/vhost-net-user.h | 8 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user/virtio-net-user.c | 102 | ||||
-rw-r--r-- | lib/librte_vhost/vhost_user/virtio-net-user.h | 18 | ||||
-rw-r--r-- | lib/librte_vhost/virtio-net.c | 239 | ||||
-rw-r--r-- | lib/librte_vhost/virtio-net.h | 43 |
14 files changed, 1009 insertions, 780 deletions
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile index e33ff53e..538adb0b 100644 --- a/lib/librte_vhost/Makefile +++ b/lib/librte_vhost/Makefile @@ -36,7 +36,7 @@ LIB = librte_vhost.a EXPORT_MAP := rte_vhost_version.map -LIBABIVER := 2 +LIBABIVER := 3 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y) @@ -66,6 +66,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mempool DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_net include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 3d8709e5..5ceaa8a5 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -20,3 +20,13 @@ DPDK_2.1 { rte_vhost_driver_unregister; } DPDK_2.0; + +DPDK_16.07 { + global: + + rte_vhost_avail_entries; + rte_vhost_get_ifname; + rte_vhost_get_numa_node; + rte_vhost_get_queue_num; + +} DPDK_2.1; diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h index 600b20b4..9caa6221 100644 --- a/lib/librte_vhost/rte_virtio_net.h +++ b/lib/librte_vhost/rte_virtio_net.h @@ -51,125 +51,12 @@ #include <rte_mempool.h> #include <rte_ether.h> -struct rte_mbuf; - -#define VHOST_MEMORY_MAX_NREGIONS 8 - -/* Used to indicate that the device is running on a data core */ -#define VIRTIO_DEV_RUNNING 1 - -/* Backend value set by guest. */ -#define VIRTIO_DEV_STOPPED -1 - +#define RTE_VHOST_USER_CLIENT (1ULL << 0) +#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1) /* Enum for virtqueue management. */ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; -#define BUF_VECTOR_MAX 256 - -/** - * Structure contains buffer address, length and descriptor index - * from vring to do scatter RX. - */ -struct buf_vector { - uint64_t buf_addr; - uint32_t buf_len; - uint32_t desc_idx; -}; - -/** - * Structure contains variables relevant to RX/TX virtqueues. - */ -struct vhost_virtqueue { - struct vring_desc *desc; /**< Virtqueue descriptor ring. */ - struct vring_avail *avail; /**< Virtqueue available ring. */ - struct vring_used *used; /**< Virtqueue used ring. */ - uint32_t size; /**< Size of descriptor ring. */ - uint32_t backend; /**< Backend value to determine if device should started/stopped. */ - uint16_t vhost_hlen; /**< Vhost header length (varies depending on RX merge buffers. */ - volatile uint16_t last_used_idx; /**< Last index used on the available ring */ - volatile uint16_t last_used_idx_res; /**< Used for multiple devices reserving buffers. */ -#define VIRTIO_INVALID_EVENTFD (-1) -#define VIRTIO_UNINITIALIZED_EVENTFD (-2) - int callfd; /**< Used to notify the guest (trigger interrupt). */ - int kickfd; /**< Currently unused as polling mode is enabled. */ - int enabled; - uint64_t log_guest_addr; /**< Physical address of used ring, for logging */ - uint64_t reserved[15]; /**< Reserve some spaces for future extension. */ - struct buf_vector buf_vec[BUF_VECTOR_MAX]; /**< for scatter RX. */ -} __rte_cache_aligned; - -/* Old kernels have no such macro defined */ -#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE - #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 -#endif - - -/* - * Make an extra wrapper for VIRTIO_NET_F_MQ and - * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are - * introduced since kernel v3.8. This makes our - * code buildable for older kernel. - */ -#ifdef VIRTIO_NET_F_MQ - #define VHOST_MAX_QUEUE_PAIRS VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX - #define VHOST_SUPPORTS_MQ (1ULL << VIRTIO_NET_F_MQ) -#else - #define VHOST_MAX_QUEUE_PAIRS 1 - #define VHOST_SUPPORTS_MQ 0 -#endif - -/* - * Define virtio 1.0 for older kernels - */ -#ifndef VIRTIO_F_VERSION_1 - #define VIRTIO_F_VERSION_1 32 -#endif - -/** - * Device structure contains all configuration information relating to the device. - */ -struct virtio_net { - struct virtio_memory *mem; /**< QEMU memory and memory region information. */ - uint64_t features; /**< Negotiated feature set. */ - uint64_t protocol_features; /**< Negotiated protocol feature set. */ - uint64_t device_fh; /**< device identifier. */ - uint32_t flags; /**< Device flags. Only used to check if device is running on data core. */ -#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) - char ifname[IF_NAME_SZ]; /**< Name of the tap device or socket path. */ - uint32_t virt_qp_nb; /**< number of queue pair we have allocated */ - void *priv; /**< private context */ - uint64_t log_size; /**< Size of log area */ - uint64_t log_base; /**< Where dirty pages are logged */ - struct ether_addr mac; /**< MAC address */ - rte_atomic16_t broadcast_rarp; /**< A flag to tell if we need broadcast rarp packet */ - uint64_t reserved[61]; /**< Reserve some spaces for future extension. */ - struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; /**< Contains all virtqueue information. */ -} __rte_cache_aligned; - -/** - * Information relating to memory regions including offsets to addresses in QEMUs memory file. - */ -struct virtio_memory_regions { - uint64_t guest_phys_address; /**< Base guest physical address of region. */ - uint64_t guest_phys_address_end; /**< End guest physical address of region. */ - uint64_t memory_size; /**< Size of region. */ - uint64_t userspace_address; /**< Base userspace address of region. */ - uint64_t address_offset; /**< Offset of region for address translation. */ -}; - - -/** - * Memory structure includes region and mapping information. - */ -struct virtio_memory { - uint64_t base_address; /**< Base QEMU userspace address of the memory file. */ - uint64_t mapped_address; /**< Mapped address of memory file base in our applications memory space. */ - uint64_t mapped_size; /**< Total size of memory file. */ - uint32_t nregions; /**< Number of memory regions. */ - struct virtio_memory_regions regions[0]; /**< Memory region information. */ -}; - /** * Device and vring operations. * @@ -178,45 +65,13 @@ struct virtio_memory { * */ struct virtio_net_device_ops { - int (*new_device)(struct virtio_net *); /**< Add device. */ - void (*destroy_device)(volatile struct virtio_net *); /**< Remove device. */ - - int (*vring_state_changed)(struct virtio_net *dev, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */ -}; - -static inline uint16_t __attribute__((always_inline)) -rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id) -{ - struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; - - if (!vq->enabled) - return 0; + int (*new_device)(int vid); /**< Add device. */ + void (*destroy_device)(int vid); /**< Remove device. */ - return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res; -} - -/** - * Function to convert guest physical addresses to vhost virtual addresses. - * This is used to convert guest virtio buffer addresses. - */ -static inline uint64_t __attribute__((always_inline)) -gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa) -{ - struct virtio_memory_regions *region; - uint32_t regionidx; - uint64_t vhost_va = 0; - - for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { - region = &dev->mem->regions[regionidx]; - if ((guest_pa >= region->guest_phys_address) && - (guest_pa <= region->guest_phys_address_end)) { - vhost_va = region->address_offset + guest_pa; - break; - } - } - return vhost_va; -} + int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */ + void *reserved[5]; /**< Reserved for future extension */ +}; /** * Disable features in feature_mask. Returns 0 on success. @@ -231,13 +86,16 @@ int rte_vhost_feature_enable(uint64_t feature_mask); /* Returns currently supported vhost features */ uint64_t rte_vhost_feature_get(void); -int rte_vhost_enable_guest_notification(struct virtio_net *dev, uint16_t queue_id, int enable); +int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable); -/* Register vhost driver. dev_name could be different for multiple instance support. */ -int rte_vhost_driver_register(const char *dev_name); +/** + * Register vhost driver. path could be different for multiple + * instance support. + */ +int rte_vhost_driver_register(const char *path, uint64_t flags); /* Unregister vhost driver. This is only meaningful to vhost user. */ -int rte_vhost_driver_unregister(const char *dev_name); +int rte_vhost_driver_unregister(const char *path); /* Register callbacks. */ int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const); @@ -245,12 +103,65 @@ int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * cons int rte_vhost_driver_session_start(void); /** + * Get the numa node from which the virtio net device's memory + * is allocated. + * + * @param vid + * virtio-net device ID + * + * @return + * The numa node, -1 on failure + */ +int rte_vhost_get_numa_node(int vid); + +/** + * Get the number of queues the device supports. + * + * @param vid + * virtio-net device ID + * + * @return + * The number of queues, 0 on failure + */ +uint32_t rte_vhost_get_queue_num(int vid); + +/** + * Get the virtio net device's ifname. For vhost-cuse, ifname is the + * path of the char device. For vhost-user, ifname is the vhost-user + * socket file path. + * + * @param vid + * virtio-net device ID + * @param buf + * The buffer to stored the queried ifname + * @param len + * The length of buf + * + * @return + * 0 on success, -1 on failure + */ +int rte_vhost_get_ifname(int vid, char *buf, size_t len); + +/** + * Get how many avail entries are left in the queue + * + * @param vid + * virtio-net device ID + * @param queue_id + * virtio queue index + * + * @return + * num of avail entires left + */ +uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id); + +/** * This function adds buffers to the virtio devices RX virtqueue. Buffers can * be received from the physical port or from another virtual device. A packet * count is returned to indicate the number of packets that were succesfully * added to the RX queue. - * @param dev - * virtio-net device + * @param vid + * virtio-net device ID * @param queue_id * virtio queue index in mq case * @param pkts @@ -260,14 +171,14 @@ int rte_vhost_driver_session_start(void); * @return * num of packets enqueued */ -uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, +uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id, struct rte_mbuf **pkts, uint16_t count); /** * This function gets guest buffers from the virtio device TX virtqueue, * construct host mbufs, copies guest buffer content to host mbufs and * store them in pkts to be processed. - * @param dev + * @param vid * virtio-net device * @param queue_id * virtio queue index in mq case @@ -280,7 +191,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, * @return * num of packets dequeued */ -uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, +uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count); #endif /* _VIRTIO_NET_H_ */ diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h index f193a1f6..38593a29 100644 --- a/lib/librte_vhost/vhost-net.h +++ b/lib/librte_vhost/vhost-net.h @@ -43,6 +43,128 @@ #include "rte_virtio_net.h" +/* Used to indicate that the device is running on a data core */ +#define VIRTIO_DEV_RUNNING 1 + +/* Backend value set by guest. */ +#define VIRTIO_DEV_STOPPED -1 + +#define BUF_VECTOR_MAX 256 + +/** + * Structure contains buffer address, length and descriptor index + * from vring to do scatter RX. + */ +struct buf_vector { + uint64_t buf_addr; + uint32_t buf_len; + uint32_t desc_idx; +}; + +/** + * Structure contains variables relevant to RX/TX virtqueues. + */ +struct vhost_virtqueue { + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; + uint32_t size; + + /* Last index used on the available ring */ + volatile uint16_t last_used_idx; +#define VIRTIO_INVALID_EVENTFD (-1) +#define VIRTIO_UNINITIALIZED_EVENTFD (-2) + + /* Backend value to determine if device should started/stopped */ + int backend; + /* Used to notify the guest (trigger interrupt) */ + int callfd; + /* Currently unused as polling mode is enabled */ + int kickfd; + int enabled; + + /* Physical address of used ring, for logging */ + uint64_t log_guest_addr; +} __rte_cache_aligned; + +/* Old kernels have no such macro defined */ +#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE + #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 +#endif + + +/* + * Make an extra wrapper for VIRTIO_NET_F_MQ and + * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are + * introduced since kernel v3.8. This makes our + * code buildable for older kernel. + */ +#ifdef VIRTIO_NET_F_MQ + #define VHOST_MAX_QUEUE_PAIRS VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX + #define VHOST_SUPPORTS_MQ (1ULL << VIRTIO_NET_F_MQ) +#else + #define VHOST_MAX_QUEUE_PAIRS 1 + #define VHOST_SUPPORTS_MQ 0 +#endif + +/* + * Define virtio 1.0 for older kernels + */ +#ifndef VIRTIO_F_VERSION_1 + #define VIRTIO_F_VERSION_1 32 +#endif + +/** + * Device structure contains all configuration information relating + * to the device. + */ +struct virtio_net { + /* Frontend (QEMU) memory and memory region information */ + struct virtio_memory *mem; + uint64_t features; + uint64_t protocol_features; + int vid; + uint32_t flags; + uint16_t vhost_hlen; + /* to tell if we need broadcast rarp packet */ + rte_atomic16_t broadcast_rarp; + uint32_t virt_qp_nb; + struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; +#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) + char ifname[IF_NAME_SZ]; + uint64_t log_size; + uint64_t log_base; + uint64_t log_addr; + struct ether_addr mac; + +} __rte_cache_aligned; + +/** + * Information relating to memory regions including offsets to + * addresses in QEMUs memory file. + */ +struct virtio_memory_regions { + uint64_t guest_phys_address; + uint64_t guest_phys_address_end; + uint64_t memory_size; + uint64_t userspace_address; + uint64_t address_offset; +}; + + +/** + * Memory structure includes region and mapping information. + */ +struct virtio_memory { + /* Base QEMU userspace address of the memory file. */ + uint64_t base_address; + uint64_t mapped_address; + uint64_t mapped_size; + uint32_t nregions; + struct virtio_memory_regions regions[0]; +}; + + /* Macros for printing using RTE_LOG */ #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1 @@ -57,9 +179,9 @@ char packet[VHOST_MAX_PRINT_BUFF]; \ \ if ((header)) \ - snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%" PRIu64 ") Header size %d: ", (device->device_fh), (size)); \ + snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Header size %d: ", (device->vid), (size)); \ else \ - snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%" PRIu64 ") Packet size %d: ", (device->device_fh), (size)); \ + snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Packet size %d: ", (device->vid), (size)); \ for (index = 0; index < (size); index++) { \ snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \ "%02hhx ", pkt_addr[index]); \ @@ -74,37 +196,51 @@ #define PRINT_PACKET(device, addr, size, header) do {} while (0) #endif - -/* - * Structure used to identify device context. +/** + * Function to convert guest physical addresses to vhost virtual addresses. + * This is used to convert guest virtio buffer addresses. */ -struct vhost_device_ctx { - pid_t pid; /* PID of process calling the IOCTL. */ - uint64_t fh; /* Populated with fi->fh to track the device index. */ -}; - -int vhost_new_device(struct vhost_device_ctx); -void vhost_destroy_device(struct vhost_device_ctx); - -void vhost_set_ifname(struct vhost_device_ctx, - const char *if_name, unsigned int if_len); - -int vhost_get_features(struct vhost_device_ctx, uint64_t *); -int vhost_set_features(struct vhost_device_ctx, uint64_t *); - -int vhost_set_vring_num(struct vhost_device_ctx, struct vhost_vring_state *); -int vhost_set_vring_addr(struct vhost_device_ctx, struct vhost_vring_addr *); -int vhost_set_vring_base(struct vhost_device_ctx, struct vhost_vring_state *); -int vhost_get_vring_base(struct vhost_device_ctx, - uint32_t, struct vhost_vring_state *); - -int vhost_set_vring_kick(struct vhost_device_ctx, struct vhost_vring_file *); -int vhost_set_vring_call(struct vhost_device_ctx, struct vhost_vring_file *); - -int vhost_set_backend(struct vhost_device_ctx, struct vhost_vring_file *); - -int vhost_set_owner(struct vhost_device_ctx); -int vhost_reset_owner(struct vhost_device_ctx); +static inline uint64_t __attribute__((always_inline)) +gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa) +{ + struct virtio_memory_regions *region; + uint32_t regionidx; + uint64_t vhost_va = 0; + + for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { + region = &dev->mem->regions[regionidx]; + if ((guest_pa >= region->guest_phys_address) && + (guest_pa <= region->guest_phys_address_end)) { + vhost_va = region->address_offset + guest_pa; + break; + } + } + return vhost_va; +} + +struct virtio_net_device_ops const *notify_ops; +struct virtio_net *get_device(int vid); + +int vhost_new_device(void); +void vhost_destroy_device(int); + +void vhost_set_ifname(int, const char *if_name, unsigned int if_len); + +int vhost_get_features(int, uint64_t *); +int vhost_set_features(int, uint64_t *); + +int vhost_set_vring_num(int, struct vhost_vring_state *); +int vhost_set_vring_addr(int, struct vhost_vring_addr *); +int vhost_set_vring_base(int, struct vhost_vring_state *); +int vhost_get_vring_base(int, uint32_t, struct vhost_vring_state *); + +int vhost_set_vring_kick(int, struct vhost_vring_file *); +int vhost_set_vring_call(int, struct vhost_vring_file *); + +int vhost_set_backend(int, struct vhost_vring_file *); + +int vhost_set_owner(int); +int vhost_reset_owner(int); /* * Backend-specific cleanup. Defined by vhost-cuse and vhost-user. diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c index c613e68e..5d150116 100644 --- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c +++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c @@ -60,17 +60,18 @@ static const char default_cdev[] = "vhost-net"; static struct fuse_session *session; /* - * Returns vhost_device_ctx from given fuse_req_t. The index is populated later - * when the device is added to the device linked list. + * Returns vhost_cuse_device_ctx from given fuse_req_t. The + * index is populated later when the device is added to the + * device linked list. */ -static struct vhost_device_ctx +static struct vhost_cuse_device_ctx fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi) { - struct vhost_device_ctx ctx; + struct vhost_cuse_device_ctx ctx; struct fuse_ctx const *const req_ctx = fuse_req_ctx(req); ctx.pid = req_ctx->pid; - ctx.fh = fi->fh; + ctx.vid = (int)fi->fh; return ctx; } @@ -82,19 +83,18 @@ fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi) static void vhost_net_open(fuse_req_t req, struct fuse_file_info *fi) { - struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); - int err = 0; + int vid = 0; - err = vhost_new_device(ctx); - if (err == -1) { + vid = vhost_new_device(); + if (vid == -1) { fuse_reply_err(req, EPERM); return; } - fi->fh = err; + fi->fh = vid; RTE_LOG(INFO, VHOST_CONFIG, - "(%"PRIu64") Device configuration started\n", fi->fh); + "(%d) device configuration started\n", vid); fuse_reply_open(req, fi); } @@ -105,19 +105,19 @@ static void vhost_net_release(fuse_req_t req, struct fuse_file_info *fi) { int err = 0; - struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); + struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); - vhost_destroy_device(ctx); - RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh); + vhost_destroy_device(ctx.vid); + RTE_LOG(INFO, VHOST_CONFIG, "(%d) device released\n", ctx.vid); fuse_reply_err(req, err); } /* * Boilerplate code for CUSE IOCTL - * Implicit arguments: ctx, req, result. + * Implicit arguments: vid, req, result. */ #define VHOST_IOCTL(func) do { \ - result = (func)(ctx); \ + result = (func)(vid); \ fuse_reply_ioctl(req, result, NULL, 0); \ } while (0) @@ -134,41 +134,41 @@ vhost_net_release(fuse_req_t req, struct fuse_file_info *fi) /* * Boilerplate code for CUSE Read IOCTL - * Implicit arguments: ctx, req, result, in_bufsz, in_buf. + * Implicit arguments: vid, req, result, in_bufsz, in_buf. */ #define VHOST_IOCTL_R(type, var, func) do { \ if (!in_bufsz) { \ VHOST_IOCTL_RETRY(sizeof(type), 0);\ } else { \ (var) = *(const type*)in_buf; \ - result = func(ctx, &(var)); \ + result = func(vid, &(var)); \ fuse_reply_ioctl(req, result, NULL, 0);\ } \ } while (0) /* * Boilerplate code for CUSE Write IOCTL - * Implicit arguments: ctx, req, result, out_bufsz. + * Implicit arguments: vid, req, result, out_bufsz. */ #define VHOST_IOCTL_W(type, var, func) do { \ if (!out_bufsz) { \ VHOST_IOCTL_RETRY(0, sizeof(type));\ } else { \ - result = (func)(ctx, &(var));\ + result = (func)(vid, &(var));\ fuse_reply_ioctl(req, result, &(var), sizeof(type));\ } \ } while (0) /* * Boilerplate code for CUSE Read/Write IOCTL - * Implicit arguments: ctx, req, result, in_bufsz, in_buf. + * Implicit arguments: vid, req, result, in_bufsz, in_buf. */ #define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \ if (!in_bufsz) { \ VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\ } else { \ (var1) = *(const type1*) (in_buf); \ - result = (func)(ctx, (var1), &(var2)); \ + result = (func)(vid, (var1), &(var2)); \ fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\ } \ } while (0) @@ -183,18 +183,19 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg, struct fuse_file_info *fi, __rte_unused unsigned flags, const void *in_buf, size_t in_bufsz, size_t out_bufsz) { - struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); + struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); struct vhost_vring_file file; struct vhost_vring_state state; struct vhost_vring_addr addr; uint64_t features; uint32_t index; int result = 0; + int vid = ctx.vid; switch (cmd) { case VHOST_NET_SET_BACKEND: LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh); + "(%d) IOCTL: VHOST_NET_SET_BACKEND\n", ctx.vid); if (!in_buf) { VHOST_IOCTL_RETRY(sizeof(file), 0); break; @@ -206,32 +207,32 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg, case VHOST_GET_FEATURES: LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh); + "(%d) IOCTL: VHOST_GET_FEATURES\n", vid); VHOST_IOCTL_W(uint64_t, features, vhost_get_features); break; case VHOST_SET_FEATURES: LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh); + "(%d) IOCTL: VHOST_SET_FEATURES\n", vid); VHOST_IOCTL_R(uint64_t, features, vhost_set_features); break; case VHOST_RESET_OWNER: LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh); + "(%d) IOCTL: VHOST_RESET_OWNER\n", vid); VHOST_IOCTL(vhost_reset_owner); break; case VHOST_SET_OWNER: LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh); + "(%d) IOCTL: VHOST_SET_OWNER\n", vid); VHOST_IOCTL(vhost_set_owner); break; case VHOST_SET_MEM_TABLE: /*TODO fix race condition.*/ LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh); + "(%d) IOCTL: VHOST_SET_MEM_TABLE\n", vid); static struct vhost_memory mem_temp; switch (in_bufsz) { @@ -264,28 +265,28 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg, case VHOST_SET_VRING_NUM: LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh); + "(%d) IOCTL: VHOST_SET_VRING_NUM\n", vid); VHOST_IOCTL_R(struct vhost_vring_state, state, vhost_set_vring_num); break; case VHOST_SET_VRING_BASE: LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh); + "(%d) IOCTL: VHOST_SET_VRING_BASE\n", vid); VHOST_IOCTL_R(struct vhost_vring_state, state, vhost_set_vring_base); break; case VHOST_GET_VRING_BASE: LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh); + "(%d) IOCTL: VHOST_GET_VRING_BASE\n", vid); VHOST_IOCTL_RW(uint32_t, index, struct vhost_vring_state, state, vhost_get_vring_base); break; case VHOST_SET_VRING_ADDR: LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh); + "(%d) IOCTL: VHOST_SET_VRING_ADDR\n", vid); VHOST_IOCTL_R(struct vhost_vring_addr, addr, vhost_set_vring_addr); break; @@ -294,12 +295,10 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg, case VHOST_SET_VRING_CALL: if (cmd == VHOST_SET_VRING_KICK) LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", - ctx.fh); + "(%d) IOCTL: VHOST_SET_VRING_KICK\n", vid); else LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", - ctx.fh); + "(%d) IOCTL: VHOST_SET_VRING_CALL\n", vid); if (!in_buf) VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0); else { @@ -315,10 +314,10 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg, } file.fd = fd; if (cmd == VHOST_SET_VRING_KICK) { - result = vhost_set_vring_kick(ctx, &file); + result = vhost_set_vring_kick(vid, &file); fuse_reply_ioctl(req, result, NULL, 0); } else { - result = vhost_set_vring_call(ctx, &file); + result = vhost_set_vring_call(vid, &file); fuse_reply_ioctl(req, result, NULL, 0); } } @@ -326,17 +325,17 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg, default: RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh); + "(%d) IOCTL: DOESN NOT EXIST\n", vid); result = -1; fuse_reply_ioctl(req, result, NULL, 0); } if (result < 0) LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: FAIL\n", ctx.fh); + "(%d) IOCTL: FAIL\n", vid); else LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh); + "(%d) IOCTL: SUCCESS\n", vid); } /* @@ -353,7 +352,7 @@ static const struct cuse_lowlevel_ops vhost_net_ops = { * vhost_net_device_ops are also passed when the device is registered in app. */ int -rte_vhost_driver_register(const char *dev_name) +rte_vhost_driver_register(const char *dev_name, uint64_t flags) { struct cuse_info cuse_info; char device_name[PATH_MAX] = ""; @@ -365,6 +364,12 @@ rte_vhost_driver_register(const char *dev_name) char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI; char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti}; + if (flags) { + RTE_LOG(ERR, VHOST_CONFIG, + "vhost-cuse does not support any flags so far\n"); + return -1; + } + if (access(cuse_device_name, R_OK | W_OK) < 0) { RTE_LOG(ERR, VHOST_CONFIG, "char device %s can't be accessed, maybe not exist\n", diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c index a68a8bd4..552be7d4 100644 --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c @@ -54,7 +54,6 @@ #include "rte_virtio_net.h" #include "vhost-net.h" #include "virtio-net-cdev.h" -#include "virtio-net.h" #include "eventfd_copy.h" /* Line size for reading maps file. */ @@ -263,7 +262,7 @@ host_memory_map(pid_t pid, uint64_t addr, } int -cuse_set_mem_table(struct vhost_device_ctx ctx, +cuse_set_mem_table(struct vhost_cuse_device_ctx ctx, const struct vhost_memory *mem_regions_addr, uint32_t nregions) { uint64_t size = offsetof(struct vhost_memory, regions); @@ -274,7 +273,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx, uint64_t base_address = 0, mapped_address, mapped_size; struct virtio_net *dev; - dev = get_device(ctx); + dev = get_device(ctx.vid); if (dev == NULL) return -1; @@ -289,8 +288,8 @@ cuse_set_mem_table(struct vhost_device_ctx ctx, sizeof(struct virtio_memory_regions) * nregions); if (dev->mem == NULL) { RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to allocate memory for dev->mem\n", - dev->device_fh); + "(%d) failed to allocate memory for dev->mem\n", + dev->vid); return -1; } @@ -379,7 +378,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx, * save it in the device structure. */ static int -get_ifname(struct vhost_device_ctx ctx, struct virtio_net *dev, int tap_fd, int pid) +get_ifname(int vid, int tap_fd, int pid) { int fd_tap; struct ifreq ifr; @@ -393,33 +392,32 @@ get_ifname(struct vhost_device_ctx ctx, struct virtio_net *dev, int tap_fd, int ret = ioctl(fd_tap, TUNGETIFF, &ifr); if (close(fd_tap) < 0) - RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") fd close failed\n", - dev->device_fh); + RTE_LOG(ERR, VHOST_CONFIG, "(%d) fd close failed\n", vid); if (ret >= 0) { ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name)); - vhost_set_ifname(ctx, ifr.ifr_name, ifr_size); + vhost_set_ifname(vid, ifr.ifr_name, ifr_size); } else RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") TUNGETIFF ioctl failed\n", - dev->device_fh); + "(%d) TUNGETIFF ioctl failed\n", vid); return 0; } -int cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file) +int +cuse_set_backend(struct vhost_cuse_device_ctx ctx, + struct vhost_vring_file *file) { struct virtio_net *dev; - dev = get_device(ctx); + dev = get_device(ctx.vid); if (dev == NULL) return -1; if (!(dev->flags & VIRTIO_DEV_RUNNING) && file->fd != VIRTIO_DEV_STOPPED) - get_ifname(ctx, dev, file->fd, ctx.pid); + get_ifname(ctx.vid, file->fd, ctx.pid); - return vhost_set_backend(ctx, file); + return vhost_set_backend(ctx.vid, file); } void diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h index eb6b0bab..3f67154b 100644 --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h @@ -38,11 +38,19 @@ #include "vhost-net.h" +/* + * Structure used to identify device context. + */ +struct vhost_cuse_device_ctx { + pid_t pid; /* PID of process calling the IOCTL. */ + int vid; /* Virtio-net device ID */ +}; + int -cuse_set_mem_table(struct vhost_device_ctx ctx, +cuse_set_mem_table(struct vhost_cuse_device_ctx ctx, const struct vhost_memory *mem_regions_addr, uint32_t nregions); int -cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *); +cuse_set_backend(struct vhost_cuse_device_ctx ctx, struct vhost_vring_file *); #endif diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c index 750821a4..15ca9562 100644 --- a/lib/librte_vhost/vhost_rxtx.c +++ b/lib/librte_vhost/vhost_rxtx.c @@ -126,10 +126,10 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) } static inline void -copy_virtio_net_hdr(struct vhost_virtqueue *vq, uint64_t desc_addr, +copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr, struct virtio_net_hdr_mrg_rxbuf hdr) { - if (vq->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) + if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr; else *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr; @@ -137,7 +137,7 @@ copy_virtio_net_hdr(struct vhost_virtqueue *vq, uint64_t desc_addr, static inline int __attribute__((always_inline)) copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, - struct rte_mbuf *m, uint16_t desc_idx, uint32_t *copied) + struct rte_mbuf *m, uint16_t desc_idx) { uint32_t desc_avail, desc_offset; uint32_t mbuf_avail, mbuf_offset; @@ -147,21 +147,20 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; desc = &vq->desc[desc_idx]; - if (unlikely(desc->len < vq->vhost_hlen)) + if (unlikely(desc->len < dev->vhost_hlen)) return -1; desc_addr = gpa_to_vva(dev, desc->addr); rte_prefetch0((void *)(uintptr_t)desc_addr); virtio_enqueue_offload(m, &virtio_hdr.hdr); - copy_virtio_net_hdr(vq, desc_addr, virtio_hdr); - vhost_log_write(dev, desc->addr, vq->vhost_hlen); - PRINT_PACKET(dev, (uintptr_t)desc_addr, vq->vhost_hlen, 0); + copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); + vhost_log_write(dev, desc->addr, dev->vhost_hlen); + PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); - desc_offset = vq->vhost_hlen; - desc_avail = desc->len - vq->vhost_hlen; + desc_offset = dev->vhost_hlen; + desc_avail = desc->len - dev->vhost_hlen; - *copied = rte_pktmbuf_pkt_len(m); mbuf_avail = rte_pktmbuf_data_len(m); mbuf_offset = 0; while (mbuf_avail != 0 || m->next != NULL) { @@ -205,49 +204,6 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, return 0; } -/* - * As many data cores may want to access available buffers - * they need to be reserved. - */ -static inline uint32_t -reserve_avail_buf(struct vhost_virtqueue *vq, uint32_t count, - uint16_t *start, uint16_t *end) -{ - uint16_t res_start_idx; - uint16_t res_end_idx; - uint16_t avail_idx; - uint16_t free_entries; - int success; - - count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); - -again: - res_start_idx = vq->last_used_idx_res; - avail_idx = *((volatile uint16_t *)&vq->avail->idx); - - free_entries = avail_idx - res_start_idx; - count = RTE_MIN(count, free_entries); - if (count == 0) - return 0; - - res_end_idx = res_start_idx + count; - - /* - * update vq->last_used_idx_res atomically; try again if failed. - * - * TODO: Allow to disable cmpset if no concurrency in application. - */ - success = rte_atomic16_cmpset(&vq->last_used_idx_res, - res_start_idx, res_end_idx); - if (unlikely(!success)) - goto again; - - *start = res_start_idx; - *end = res_end_idx; - - return count; -} - /** * This function adds buffers to the virtio devices RX virtqueue. Buffers can * be received from the physical port or from another virtio device. A packet @@ -260,15 +216,15 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count) { struct vhost_virtqueue *vq; - uint16_t res_start_idx, res_end_idx; + uint16_t avail_idx, free_entries, start_idx; uint16_t desc_indexes[MAX_PKT_BURST]; + uint16_t used_idx; uint32_t i; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); + LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { - RTE_LOG(ERR, VHOST_DATA, - "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", - __func__, dev->device_fh, queue_id); + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); return 0; } @@ -276,38 +232,43 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, if (unlikely(vq->enabled == 0)) return 0; - count = reserve_avail_buf(vq, count, &res_start_idx, &res_end_idx); + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + start_idx = vq->last_used_idx; + free_entries = avail_idx - start_idx; + count = RTE_MIN(count, free_entries); + count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); if (count == 0) return 0; - LOG_DEBUG(VHOST_DATA, - "(%"PRIu64") res_start_idx %d| res_end_idx Index %d\n", - dev->device_fh, res_start_idx, res_end_idx); + LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", + dev->vid, start_idx, start_idx + count); /* Retrieve all of the desc indexes first to avoid caching issues. */ - rte_prefetch0(&vq->avail->ring[res_start_idx & (vq->size - 1)]); + rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]); for (i = 0; i < count; i++) { - desc_indexes[i] = vq->avail->ring[(res_start_idx + i) & - (vq->size - 1)]; + used_idx = (start_idx + i) & (vq->size - 1); + desc_indexes[i] = vq->avail->ring[used_idx]; + vq->used->ring[used_idx].id = desc_indexes[i]; + vq->used->ring[used_idx].len = pkts[i]->pkt_len + + dev->vhost_hlen; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); } rte_prefetch0(&vq->desc[desc_indexes[0]]); for (i = 0; i < count; i++) { uint16_t desc_idx = desc_indexes[i]; - uint16_t used_idx = (res_start_idx + i) & (vq->size - 1); - uint32_t copied; int err; - err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx, &copied); - - vq->used->ring[used_idx].id = desc_idx; - if (unlikely(err)) - vq->used->ring[used_idx].len = vq->vhost_hlen; - else - vq->used->ring[used_idx].len = copied + vq->vhost_hlen; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); + err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx); + if (unlikely(err)) { + used_idx = (start_idx + i) & (vq->size - 1); + vq->used->ring[used_idx].len = dev->vhost_hlen; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); + } if (i + 1 < count) rte_prefetch0(&vq->desc[desc_indexes[i+1]]); @@ -315,12 +276,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, rte_smp_wmb(); - /* Wait until it's our turn to add our buffer to the used ring. */ - while (unlikely(vq->last_used_idx != res_start_idx)) - rte_pause(); - *(volatile uint16_t *)&vq->used->idx += count; - vq->last_used_idx = res_end_idx; + vq->last_used_idx += count; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); @@ -337,7 +294,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, static inline int fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx, - uint32_t *allocated, uint32_t *vec_idx) + uint32_t *allocated, uint32_t *vec_idx, + struct buf_vector *buf_vec) { uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; uint32_t vec_id = *vec_idx; @@ -348,9 +306,9 @@ fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx, return -1; len += vq->desc[idx].len; - vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr; - vq->buf_vec[vec_id].buf_len = vq->desc[idx].len; - vq->buf_vec[vec_id].desc_idx = idx; + buf_vec[vec_id].buf_addr = vq->desc[idx].addr; + buf_vec[vec_id].buf_len = vq->desc[idx].len; + buf_vec[vec_id].desc_idx = idx; vec_id++; if ((vq->desc[idx].flags & VRING_DESC_F_NEXT) == 0) @@ -366,39 +324,30 @@ fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx, } /* - * As many data cores may want to access available buffers concurrently, - * they need to be reserved. - * * Returns -1 on fail, 0 on success */ static inline int reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size, - uint16_t *start, uint16_t *end) + uint16_t *end, struct buf_vector *buf_vec) { - uint16_t res_start_idx; - uint16_t res_cur_idx; + uint16_t cur_idx; uint16_t avail_idx; - uint32_t allocated; - uint32_t vec_idx; - uint16_t tries; + uint32_t allocated = 0; + uint32_t vec_idx = 0; + uint16_t tries = 0; -again: - res_start_idx = vq->last_used_idx_res; - res_cur_idx = res_start_idx; + cur_idx = vq->last_used_idx; - allocated = 0; - vec_idx = 0; - tries = 0; while (1) { avail_idx = *((volatile uint16_t *)&vq->avail->idx); - if (unlikely(res_cur_idx == avail_idx)) + if (unlikely(cur_idx == avail_idx)) return -1; - if (unlikely(fill_vec_buf(vq, res_cur_idx, &allocated, - &vec_idx) < 0)) + if (unlikely(fill_vec_buf(vq, cur_idx, &allocated, + &vec_idx, buf_vec) < 0)) return -1; - res_cur_idx++; + cur_idx++; tries++; if (allocated >= size) @@ -413,27 +362,19 @@ again: return -1; } - /* - * update vq->last_used_idx_res atomically. - * retry again if failed. - */ - if (rte_atomic16_cmpset(&vq->last_used_idx_res, - res_start_idx, res_cur_idx) == 0) - goto again; - - *start = res_start_idx; - *end = res_cur_idx; + *end = cur_idx; return 0; } static inline uint32_t __attribute__((always_inline)) copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint16_t res_start_idx, uint16_t res_end_idx, - struct rte_mbuf *m) + uint16_t end_idx, struct rte_mbuf *m, + struct buf_vector *buf_vec) { struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; uint32_t vec_idx = 0; - uint16_t cur_idx = res_start_idx; + uint16_t start_idx = vq->last_used_idx; + uint16_t cur_idx = start_idx; uint64_t desc_addr; uint32_t mbuf_offset, mbuf_avail; uint32_t desc_offset, desc_avail; @@ -443,34 +384,33 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, if (unlikely(m == NULL)) return 0; - LOG_DEBUG(VHOST_DATA, - "(%"PRIu64") Current Index %d| End Index %d\n", - dev->device_fh, cur_idx, res_end_idx); + LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", + dev->vid, cur_idx, end_idx); - if (vq->buf_vec[vec_idx].buf_len < vq->vhost_hlen) + if (buf_vec[vec_idx].buf_len < dev->vhost_hlen) return -1; - desc_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); + desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr); rte_prefetch0((void *)(uintptr_t)desc_addr); - virtio_hdr.num_buffers = res_end_idx - res_start_idx; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", - dev->device_fh, virtio_hdr.num_buffers); + virtio_hdr.num_buffers = end_idx - start_idx; + LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", + dev->vid, virtio_hdr.num_buffers); virtio_enqueue_offload(m, &virtio_hdr.hdr); - copy_virtio_net_hdr(vq, desc_addr, virtio_hdr); - vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr, vq->vhost_hlen); - PRINT_PACKET(dev, (uintptr_t)desc_addr, vq->vhost_hlen, 0); + copy_virtio_net_hdr(dev, desc_addr, virtio_hdr); + vhost_log_write(dev, buf_vec[vec_idx].buf_addr, dev->vhost_hlen); + PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); - desc_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen; - desc_offset = vq->vhost_hlen; + desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; + desc_offset = dev->vhost_hlen; mbuf_avail = rte_pktmbuf_data_len(m); mbuf_offset = 0; while (mbuf_avail != 0 || m->next != NULL) { /* done with current desc buf, get the next one */ if (desc_avail == 0) { - desc_idx = vq->buf_vec[vec_idx].desc_idx; + desc_idx = buf_vec[vec_idx].desc_idx; if (!(vq->desc[desc_idx].flags & VRING_DESC_F_NEXT)) { /* Update used ring with desc information */ @@ -484,12 +424,12 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, } vec_idx++; - desc_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); + desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr); /* Prefetch buffer address. */ rte_prefetch0((void *)(uintptr_t)desc_addr); desc_offset = 0; - desc_avail = vq->buf_vec[vec_idx].buf_len; + desc_avail = buf_vec[vec_idx].buf_len; } /* done with current mbuf, get the next one */ @@ -504,7 +444,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), cpy_len); - vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr + desc_offset, + vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset, cpy_len); PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), cpy_len, 0); @@ -516,13 +456,13 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, } used_idx = cur_idx & (vq->size - 1); - vq->used->ring[used_idx].id = vq->buf_vec[vec_idx].desc_idx; + vq->used->ring[used_idx].id = buf_vec[vec_idx].desc_idx; vq->used->ring[used_idx].len = desc_offset; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, ring[used_idx]), sizeof(vq->used->ring[used_idx])); - return res_end_idx - res_start_idx; + return end_idx - start_idx; } static inline uint32_t __attribute__((always_inline)) @@ -531,14 +471,13 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, { struct vhost_virtqueue *vq; uint32_t pkt_idx = 0, nr_used = 0; - uint16_t start, end; + uint16_t end; + struct buf_vector buf_vec[BUF_VECTOR_MAX]; - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n", - dev->device_fh); + LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { - RTE_LOG(ERR, VHOST_DATA, - "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", - __func__, dev->device_fh, queue_id); + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); return 0; } @@ -551,27 +490,20 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, return 0; for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { - uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen; + uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len, - &start, &end) < 0)) { + &end, buf_vec) < 0)) { LOG_DEBUG(VHOST_DATA, - "(%" PRIu64 ") Failed to get enough desc from vring\n", - dev->device_fh); + "(%d) failed to get enough desc from vring\n", + dev->vid); break; } - nr_used = copy_mbuf_to_desc_mergeable(dev, vq, start, end, - pkts[pkt_idx]); + nr_used = copy_mbuf_to_desc_mergeable(dev, vq, end, + pkts[pkt_idx], buf_vec); rte_smp_wmb(); - /* - * Wait until it's our turn to add our buffer - * to the used ring. - */ - while (unlikely(vq->last_used_idx != start)) - rte_pause(); - *(volatile uint16_t *)&vq->used->idx += nr_used; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); @@ -592,9 +524,14 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, } uint16_t -rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, +rte_vhost_enqueue_burst(int vid, uint16_t queue_id, struct rte_mbuf **pkts, uint16_t count) { + struct virtio_net *dev = get_device(vid); + + if (!dev) + return 0; + if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) return virtio_dev_merge_rx(dev, queue_id, pkts, count); else @@ -747,22 +684,53 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, uint32_t nr_desc = 1; desc = &vq->desc[desc_idx]; - if (unlikely(desc->len < vq->vhost_hlen)) + if (unlikely(desc->len < dev->vhost_hlen)) return -1; desc_addr = gpa_to_vva(dev, desc->addr); - rte_prefetch0((void *)(uintptr_t)desc_addr); - - /* Retrieve virtio net header */ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); - desc_avail = desc->len - vq->vhost_hlen; - desc_offset = vq->vhost_hlen; + rte_prefetch0(hdr); + + /* + * A virtio driver normally uses at least 2 desc buffers + * for Tx: the first for storing the header, and others + * for storing the data. + */ + if (likely((desc->len == dev->vhost_hlen) && + (desc->flags & VRING_DESC_F_NEXT) != 0)) { + desc = &vq->desc[desc->next]; + + desc_addr = gpa_to_vva(dev, desc->addr); + rte_prefetch0((void *)(uintptr_t)desc_addr); + + desc_offset = 0; + desc_avail = desc->len; + nr_desc += 1; + + PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); + } else { + desc_avail = desc->len - dev->vhost_hlen; + desc_offset = dev->vhost_hlen; + } mbuf_offset = 0; mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; - while (desc_avail != 0 || (desc->flags & VRING_DESC_F_NEXT) != 0) { + while (1) { + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), + (void *)((uintptr_t)(desc_addr + desc_offset)), + cpy_len); + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + /* This desc reaches to its end, get the next one */ if (desc_avail == 0) { + if ((desc->flags & VRING_DESC_F_NEXT) == 0) + break; + if (unlikely(desc->next >= vq->size || ++nr_desc >= vq->size)) return -1; @@ -798,16 +766,6 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, mbuf_offset = 0; mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; } - - cpy_len = RTE_MIN(desc_avail, mbuf_avail); - rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), - (void *)((uintptr_t)(desc_addr + desc_offset)), - cpy_len); - - mbuf_avail -= cpy_len; - mbuf_offset += cpy_len; - desc_avail -= cpy_len; - desc_offset += cpy_len; } prev->data_len = mbuf_offset; @@ -820,9 +778,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, } uint16_t -rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, +rte_vhost_dequeue_burst(int vid, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) { + struct virtio_net *dev; struct rte_mbuf *rarp_mbuf = NULL; struct vhost_virtqueue *vq; uint32_t desc_indexes[MAX_PKT_BURST]; @@ -831,10 +790,13 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, uint16_t free_entries; uint16_t avail_idx; + dev = get_device(vid); + if (!dev) + return 0; + if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) { - RTE_LOG(ERR, VHOST_DATA, - "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", - __func__, dev->device_fh, queue_id); + RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", + dev->vid, __func__, queue_id); return 0; } @@ -870,35 +832,37 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, if (free_entries == 0) goto out; - LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__, dev->device_fh); + LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); /* Prefetch available ring to retrieve head indexes. */ used_idx = vq->last_used_idx & (vq->size - 1); rte_prefetch0(&vq->avail->ring[used_idx]); + rte_prefetch0(&vq->used->ring[used_idx]); count = RTE_MIN(count, MAX_PKT_BURST); count = RTE_MIN(count, free_entries); - LOG_DEBUG(VHOST_DATA, "(%"PRIu64") about to dequeue %u buffers\n", - dev->device_fh, count); + LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", + dev->vid, count); /* Retrieve all of the head indexes first to avoid caching issues. */ for (i = 0; i < count; i++) { - desc_indexes[i] = vq->avail->ring[(vq->last_used_idx + i) & - (vq->size - 1)]; + used_idx = (vq->last_used_idx + i) & (vq->size - 1); + desc_indexes[i] = vq->avail->ring[used_idx]; + + vq->used->ring[used_idx].id = desc_indexes[i]; + vq->used->ring[used_idx].len = 0; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); } /* Prefetch descriptor index. */ rte_prefetch0(&vq->desc[desc_indexes[0]]); - rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); - for (i = 0; i < count; i++) { int err; - if (likely(i + 1 < count)) { + if (likely(i + 1 < count)) rte_prefetch0(&vq->desc[desc_indexes[i + 1]]); - rte_prefetch0(&vq->used->ring[(used_idx + 1) & - (vq->size - 1)]); - } pkts[i] = rte_pktmbuf_alloc(mbuf_pool); if (unlikely(pkts[i] == NULL)) { @@ -912,18 +876,12 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, rte_pktmbuf_free(pkts[i]); break; } - - used_idx = vq->last_used_idx++ & (vq->size - 1); - vq->used->ring[used_idx].id = desc_indexes[i]; - vq->used->ring[used_idx].len = 0; - vhost_log_used_vring(dev, vq, - offsetof(struct vring_used, ring[used_idx]), - sizeof(vq->used->ring[used_idx])); } rte_smp_wmb(); rte_smp_rmb(); vq->used->idx += i; + vq->last_used_idx += i; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c index df2bd648..94f1b923 100644 --- a/lib/librte_vhost/vhost_user/vhost-net-user.c +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c @@ -33,6 +33,7 @@ #include <stdint.h> #include <stdio.h> +#include <stdbool.h> #include <limits.h> #include <stdlib.h> #include <unistd.h> @@ -40,6 +41,7 @@ #include <sys/types.h> #include <sys/socket.h> #include <sys/un.h> +#include <sys/queue.h> #include <errno.h> #include <pthread.h> @@ -51,32 +53,44 @@ #include "vhost-net.h" #include "virtio-net-user.h" -#define MAX_VIRTIO_BACKLOG 128 - -static void vserver_new_vq_conn(int fd, void *data, int *remove); -static void vserver_message_handler(int fd, void *dat, int *remove); +/* + * Every time rte_vhost_driver_register() is invoked, an associated + * vhost_user_socket struct will be created. + */ +struct vhost_user_socket { + char *path; + int listenfd; + bool is_server; + bool reconnect; +}; -struct connfd_ctx { - struct vhost_server *vserver; - uint32_t fh; +struct vhost_user_connection { + struct vhost_user_socket *vsocket; + int vid; }; -#define MAX_VHOST_SERVER 1024 -struct _vhost_server { - struct vhost_server *server[MAX_VHOST_SERVER]; +#define MAX_VHOST_SOCKET 1024 +struct vhost_user { + struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET]; struct fdset fdset; - int vserver_cnt; - pthread_mutex_t server_mutex; + int vsocket_cnt; + pthread_mutex_t mutex; }; -static struct _vhost_server g_vhost_server = { +#define MAX_VIRTIO_BACKLOG 128 + +static void vhost_user_server_new_connection(int fd, void *data, int *remove); +static void vhost_user_msg_handler(int fd, void *dat, int *remove); +static int vhost_user_create_client(struct vhost_user_socket *vsocket); + +static struct vhost_user vhost_user = { .fdset = { .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, .fd_mutex = PTHREAD_MUTEX_INITIALIZER, .num = 0 }, - .vserver_cnt = 0, - .server_mutex = PTHREAD_MUTEX_INITIALIZER, + .vsocket_cnt = 0, + .mutex = PTHREAD_MUTEX_INITIALIZER, }; static const char *vhost_message_str[VHOST_USER_MAX] = { @@ -102,48 +116,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = { [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", }; -/** - * Create a unix domain socket, bind to path and listen for connection. - * @return - * socket fd or -1 on failure - */ -static int -uds_socket(const char *path) -{ - struct sockaddr_un un; - int sockfd; - int ret; - - if (path == NULL) - return -1; - - sockfd = socket(AF_UNIX, SOCK_STREAM, 0); - if (sockfd < 0) - return -1; - RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd); - - memset(&un, 0, sizeof(un)); - un.sun_family = AF_UNIX; - snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); - ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un)); - if (ret == -1) { - RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n", - sockfd, path); - goto err; - } - RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); - - ret = listen(sockfd, MAX_VIRTIO_BACKLOG); - if (ret == -1) - goto err; - - return sockfd; - -err: - close(sockfd); - return -1; -} - /* return bytes# of read on success or negative val on failure. */ static int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) @@ -278,62 +250,66 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg) return ret; } -/* call back when there is new virtio connection. */ + static void -vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove) +vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) { - struct vhost_server *vserver = (struct vhost_server *)dat; - int conn_fd; - struct connfd_ctx *ctx; - int fh; - struct vhost_device_ctx vdev_ctx = { (pid_t)0, 0 }; - unsigned int size; - - conn_fd = accept(fd, NULL, NULL); - RTE_LOG(INFO, VHOST_CONFIG, - "new virtio connection is %d\n", conn_fd); - if (conn_fd < 0) - return; + int vid; + size_t size; + struct vhost_user_connection *conn; - ctx = calloc(1, sizeof(*ctx)); - if (ctx == NULL) { - close(conn_fd); + conn = malloc(sizeof(*conn)); + if (conn == NULL) { + close(fd); return; } - fh = vhost_new_device(vdev_ctx); - if (fh == -1) { - free(ctx); - close(conn_fd); + vid = vhost_new_device(); + if (vid == -1) { + close(fd); + free(conn); return; } - vdev_ctx.fh = fh; - size = strnlen(vserver->path, PATH_MAX); - vhost_set_ifname(vdev_ctx, vserver->path, - size); + size = strnlen(vsocket->path, PATH_MAX); + vhost_set_ifname(vid, vsocket->path, size); + + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); - RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh); + conn->vsocket = vsocket; + conn->vid = vid; + fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, NULL, conn); +} + +/* call back when there is new vhost-user connection from client */ +static void +vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused) +{ + struct vhost_user_socket *vsocket = dat; - ctx->vserver = vserver; - ctx->fh = fh; - fdset_add(&g_vhost_server.fdset, - conn_fd, vserver_message_handler, NULL, ctx); + fd = accept(fd, NULL, NULL); + if (fd < 0) + return; + + RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd); + vhost_user_add_connection(fd, vsocket); } /* callback when there is message on the connfd */ static void -vserver_message_handler(int connfd, void *dat, int *remove) +vhost_user_msg_handler(int connfd, void *dat, int *remove) { - struct vhost_device_ctx ctx; - struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat; + int vid; + struct vhost_user_connection *conn = dat; struct VhostUserMsg msg; uint64_t features; int ret; - ctx.fh = cfd_ctx->fh; + vid = conn->vid; ret = read_vhost_message(connfd, &msg); if (ret <= 0 || msg.request >= VHOST_USER_MAX) { + struct vhost_user_socket *vsocket = conn->vsocket; + if (ret < 0) RTE_LOG(ERR, VHOST_CONFIG, "vhost read message failed\n"); @@ -346,8 +322,11 @@ vserver_message_handler(int connfd, void *dat, int *remove) close(connfd); *remove = 1; - free(cfd_ctx); - vhost_destroy_device(ctx); + free(conn); + vhost_destroy_device(vid); + + if (vsocket->reconnect) + vhost_user_create_client(vsocket); return; } @@ -356,14 +335,14 @@ vserver_message_handler(int connfd, void *dat, int *remove) vhost_message_str[msg.request]); switch (msg.request) { case VHOST_USER_GET_FEATURES: - ret = vhost_get_features(ctx, &features); + ret = vhost_get_features(vid, &features); msg.payload.u64 = features; msg.size = sizeof(msg.payload.u64); send_vhost_message(connfd, &msg); break; case VHOST_USER_SET_FEATURES: features = msg.payload.u64; - vhost_set_features(ctx, &features); + vhost_set_features(vid, &features); break; case VHOST_USER_GET_PROTOCOL_FEATURES: @@ -372,22 +351,22 @@ vserver_message_handler(int connfd, void *dat, int *remove) send_vhost_message(connfd, &msg); break; case VHOST_USER_SET_PROTOCOL_FEATURES: - user_set_protocol_features(ctx, msg.payload.u64); + user_set_protocol_features(vid, msg.payload.u64); break; case VHOST_USER_SET_OWNER: - vhost_set_owner(ctx); + vhost_set_owner(vid); break; case VHOST_USER_RESET_OWNER: - vhost_reset_owner(ctx); + vhost_reset_owner(vid); break; case VHOST_USER_SET_MEM_TABLE: - user_set_mem_table(ctx, &msg); + user_set_mem_table(vid, &msg); break; case VHOST_USER_SET_LOG_BASE: - user_set_log_base(ctx, &msg); + user_set_log_base(vid, &msg); /* it needs a reply */ msg.size = sizeof(msg.payload.u64); @@ -399,26 +378,26 @@ vserver_message_handler(int connfd, void *dat, int *remove) break; case VHOST_USER_SET_VRING_NUM: - vhost_set_vring_num(ctx, &msg.payload.state); + vhost_set_vring_num(vid, &msg.payload.state); break; case VHOST_USER_SET_VRING_ADDR: - vhost_set_vring_addr(ctx, &msg.payload.addr); + vhost_set_vring_addr(vid, &msg.payload.addr); break; case VHOST_USER_SET_VRING_BASE: - vhost_set_vring_base(ctx, &msg.payload.state); + vhost_set_vring_base(vid, &msg.payload.state); break; case VHOST_USER_GET_VRING_BASE: - ret = user_get_vring_base(ctx, &msg.payload.state); + ret = user_get_vring_base(vid, &msg.payload.state); msg.size = sizeof(msg.payload.state); send_vhost_message(connfd, &msg); break; case VHOST_USER_SET_VRING_KICK: - user_set_vring_kick(ctx, &msg); + user_set_vring_kick(vid, &msg); break; case VHOST_USER_SET_VRING_CALL: - user_set_vring_call(ctx, &msg); + user_set_vring_call(vid, &msg); break; case VHOST_USER_SET_VRING_ERR: @@ -434,10 +413,10 @@ vserver_message_handler(int connfd, void *dat, int *remove) break; case VHOST_USER_SET_VRING_ENABLE: - user_set_vring_enable(ctx, &msg.payload.state); + user_set_vring_enable(vid, &msg.payload.state); break; case VHOST_USER_SEND_RARP: - user_send_rarp(ctx, &msg); + user_send_rarp(vid, &msg); break; default: @@ -446,50 +425,222 @@ vserver_message_handler(int connfd, void *dat, int *remove) } } -/** - * Creates and initialise the vhost server. - */ -int -rte_vhost_driver_register(const char *path) +static int +create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server) { - struct vhost_server *vserver; + int fd; - pthread_mutex_lock(&g_vhost_server.server_mutex); + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) + return -1; + RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n", + is_server ? "server" : "client", fd); - if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) { - RTE_LOG(ERR, VHOST_CONFIG, - "error: the number of servers reaches maximum\n"); - pthread_mutex_unlock(&g_vhost_server.server_mutex); + memset(un, 0, sizeof(*un)); + un->sun_family = AF_UNIX; + strncpy(un->sun_path, path, sizeof(un->sun_path)); + + return fd; +} + +static int +vhost_user_create_server(struct vhost_user_socket *vsocket) +{ + int fd; + int ret; + struct sockaddr_un un; + const char *path = vsocket->path; + + fd = create_unix_socket(path, &un, vsocket->is_server); + if (fd < 0) return -1; + + ret = bind(fd, (struct sockaddr *)&un, sizeof(un)); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "failed to bind to %s: %s; remove it and try again\n", + path, strerror(errno)); + goto err; } + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); - vserver = calloc(sizeof(struct vhost_server), 1); - if (vserver == NULL) { - pthread_mutex_unlock(&g_vhost_server.server_mutex); - return -1; + ret = listen(fd, MAX_VIRTIO_BACKLOG); + if (ret < 0) + goto err; + + vsocket->listenfd = fd; + fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection, + NULL, vsocket); + + return 0; + +err: + close(fd); + return -1; +} + +struct vhost_user_reconnect { + struct sockaddr_un un; + int fd; + struct vhost_user_socket *vsocket; + + TAILQ_ENTRY(vhost_user_reconnect) next; +}; + +TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect); +struct vhost_user_reconnect_list { + struct vhost_user_reconnect_tailq_list head; + pthread_mutex_t mutex; +}; + +static struct vhost_user_reconnect_list reconn_list; +static pthread_t reconn_tid; + +static void * +vhost_user_client_reconnect(void *arg __rte_unused) +{ + struct vhost_user_reconnect *reconn, *next; + + while (1) { + pthread_mutex_lock(&reconn_list.mutex); + + /* + * An equal implementation of TAILQ_FOREACH_SAFE, + * which does not exist on all platforms. + */ + for (reconn = TAILQ_FIRST(&reconn_list.head); + reconn != NULL; reconn = next) { + next = TAILQ_NEXT(reconn, next); + + if (connect(reconn->fd, (struct sockaddr *)&reconn->un, + sizeof(reconn->un)) < 0) + continue; + + RTE_LOG(INFO, VHOST_CONFIG, + "%s: connected\n", reconn->vsocket->path); + vhost_user_add_connection(reconn->fd, reconn->vsocket); + TAILQ_REMOVE(&reconn_list.head, reconn, next); + free(reconn); + } + + pthread_mutex_unlock(&reconn_list.mutex); + sleep(1); } - vserver->listenfd = uds_socket(path); - if (vserver->listenfd < 0) { - free(vserver); - pthread_mutex_unlock(&g_vhost_server.server_mutex); + return NULL; +} + +static int +vhost_user_reconnect_init(void) +{ + int ret; + + pthread_mutex_init(&reconn_list.mutex, NULL); + TAILQ_INIT(&reconn_list.head); + + ret = pthread_create(&reconn_tid, NULL, + vhost_user_client_reconnect, NULL); + if (ret < 0) + RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); + + return ret; +} + +static int +vhost_user_create_client(struct vhost_user_socket *vsocket) +{ + int fd; + int ret; + struct sockaddr_un un; + const char *path = vsocket->path; + struct vhost_user_reconnect *reconn; + + fd = create_unix_socket(path, &un, vsocket->is_server); + if (fd < 0) return -1; + + ret = connect(fd, (struct sockaddr *)&un, sizeof(un)); + if (ret == 0) { + vhost_user_add_connection(fd, vsocket); + return 0; } - vserver->path = strdup(path); + RTE_LOG(ERR, VHOST_CONFIG, + "failed to connect to %s: %s\n", + path, strerror(errno)); - fdset_add(&g_vhost_server.fdset, vserver->listenfd, - vserver_new_vq_conn, NULL, vserver); + if (!vsocket->reconnect) { + close(fd); + return -1; + } - g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver; - pthread_mutex_unlock(&g_vhost_server.server_mutex); + RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path); + reconn = malloc(sizeof(*reconn)); + reconn->un = un; + reconn->fd = fd; + reconn->vsocket = vsocket; + pthread_mutex_lock(&reconn_list.mutex); + TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next); + pthread_mutex_unlock(&reconn_list.mutex); return 0; } +/* + * Register a new vhost-user socket; here we could act as server + * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag + * is set. + */ +int +rte_vhost_driver_register(const char *path, uint64_t flags) +{ + int ret = -1; + struct vhost_user_socket *vsocket; + + if (!path) + return -1; + + pthread_mutex_lock(&vhost_user.mutex); + + if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) { + RTE_LOG(ERR, VHOST_CONFIG, + "error: the number of vhost sockets reaches maximum\n"); + goto out; + } + + vsocket = malloc(sizeof(struct vhost_user_socket)); + if (!vsocket) + goto out; + memset(vsocket, 0, sizeof(struct vhost_user_socket)); + vsocket->path = strdup(path); + + if ((flags & RTE_VHOST_USER_CLIENT) != 0) { + vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); + if (vsocket->reconnect && reconn_tid == 0) { + if (vhost_user_reconnect_init() < 0) + goto out; + } + ret = vhost_user_create_client(vsocket); + } else { + vsocket->is_server = true; + ret = vhost_user_create_server(vsocket); + } + if (ret < 0) { + free(vsocket->path); + free(vsocket); + goto out; + } + + vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket; + +out: + pthread_mutex_unlock(&vhost_user.mutex); + + return ret; +} /** - * Unregister the specified vhost server + * Unregister the specified vhost socket */ int rte_vhost_driver_unregister(const char *path) @@ -497,28 +648,29 @@ rte_vhost_driver_unregister(const char *path) int i; int count; - pthread_mutex_lock(&g_vhost_server.server_mutex); - - for (i = 0; i < g_vhost_server.vserver_cnt; i++) { - if (!strcmp(g_vhost_server.server[i]->path, path)) { - fdset_del(&g_vhost_server.fdset, - g_vhost_server.server[i]->listenfd); + pthread_mutex_lock(&vhost_user.mutex); - close(g_vhost_server.server[i]->listenfd); - free(g_vhost_server.server[i]->path); - free(g_vhost_server.server[i]); + for (i = 0; i < vhost_user.vsocket_cnt; i++) { + if (!strcmp(vhost_user.vsockets[i]->path, path)) { + if (vhost_user.vsockets[i]->is_server) { + fdset_del(&vhost_user.fdset, + vhost_user.vsockets[i]->listenfd); + close(vhost_user.vsockets[i]->listenfd); + unlink(path); + } - unlink(path); + free(vhost_user.vsockets[i]->path); + free(vhost_user.vsockets[i]); - count = --g_vhost_server.vserver_cnt; - g_vhost_server.server[i] = g_vhost_server.server[count]; - g_vhost_server.server[count] = NULL; - pthread_mutex_unlock(&g_vhost_server.server_mutex); + count = --vhost_user.vsocket_cnt; + vhost_user.vsockets[i] = vhost_user.vsockets[count]; + vhost_user.vsockets[count] = NULL; + pthread_mutex_unlock(&vhost_user.mutex); return 0; } } - pthread_mutex_unlock(&g_vhost_server.server_mutex); + pthread_mutex_unlock(&vhost_user.mutex); return -1; } @@ -526,6 +678,6 @@ rte_vhost_driver_unregister(const char *path) int rte_vhost_driver_session_start(void) { - fdset_event_dispatch(&g_vhost_server.fdset); + fdset_event_dispatch(&vhost_user.fdset); return 0; } diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h index e3bb4138..f5332396 100644 --- a/lib/librte_vhost/vhost_user/vhost-net-user.h +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h @@ -38,15 +38,11 @@ #include <linux/vhost.h> #include "rte_virtio_net.h" -#include "fd_man.h" - -struct vhost_server { - char *path; /**< The path the uds is bind to. */ - int listenfd; /**< The listener sockfd. */ -}; /* refer to hw/virtio/vhost-user.c */ +#define VHOST_MEMORY_MAX_NREGIONS 8 + typedef enum VhostUserRequest { VHOST_USER_NONE = 0, VHOST_USER_GET_FEATURES = 1, diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c index f5248bc4..e7c43479 100644 --- a/lib/librte_vhost/vhost_user/virtio-net-user.c +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c @@ -43,7 +43,6 @@ #include <rte_common.h> #include <rte_log.h> -#include "virtio-net.h" #include "virtio-net-user.h" #include "vhost-net-user.h" #include "vhost-net.h" @@ -64,9 +63,10 @@ static uint64_t get_blk_size(int fd) { struct stat stat; + int ret; - fstat(fd, &stat); - return (uint64_t)stat.st_blksize; + ret = fstat(fd, &stat); + return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; } static void @@ -96,10 +96,14 @@ vhost_backend_cleanup(struct virtio_net *dev) free(dev->mem); dev->mem = NULL; } + if (dev->log_addr) { + munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); + dev->log_addr = 0; + } } int -user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) +user_set_mem_table(int vid, struct VhostUserMsg *pmsg) { struct VhostUserMemory memory = pmsg->payload.memory; struct virtio_memory_regions *pregion; @@ -110,13 +114,15 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) uint64_t alignment; /* unmap old memory regions one by one*/ - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; /* Remove from the data plane. */ - if (dev->flags & VIRTIO_DEV_RUNNING) - notify_ops->destroy_device(dev); + if (dev->flags & VIRTIO_DEV_RUNNING) { + dev->flags &= ~VIRTIO_DEV_RUNNING; + notify_ops->destroy_device(vid); + } if (dev->mem) { free_mem_region(dev); @@ -130,8 +136,8 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) sizeof(struct orig_region_map) * memory.nregions); if (dev->mem == NULL) { RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to allocate memory for dev->mem\n", - dev->device_fh); + "(%d) failed to allocate memory for dev->mem\n", + dev->vid); return -1; } dev->mem->nregions = memory.nregions; @@ -162,6 +168,11 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) * aligned. */ alignment = get_blk_size(pmsg->fds[idx]); + if (alignment == (uint64_t)-1) { + RTE_LOG(ERR, VHOST_CONFIG, + "couldn't get hugepage size through fstat\n"); + goto err_mmap; + } mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment); mapped_address = (uint64_t)(uintptr_t)mmap(NULL, @@ -252,7 +263,7 @@ virtio_is_ready(struct virtio_net *dev) } void -user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) +user_set_vring_call(int vid, struct VhostUserMsg *pmsg) { struct vhost_vring_file file; @@ -263,7 +274,7 @@ user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) file.fd = pmsg->fds[0]; RTE_LOG(INFO, VHOST_CONFIG, "vring call idx:%d file:%d\n", file.index, file.fd); - vhost_set_vring_call(ctx, &file); + vhost_set_vring_call(vid, &file); } @@ -272,10 +283,13 @@ user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) * device is ready for packet processing. */ void -user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) +user_set_vring_kick(int vid, struct VhostUserMsg *pmsg) { struct vhost_vring_file file; - struct virtio_net *dev = get_device(ctx); + struct virtio_net *dev = get_device(vid); + + if (!dev) + return; file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) @@ -284,30 +298,32 @@ user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) file.fd = pmsg->fds[0]; RTE_LOG(INFO, VHOST_CONFIG, "vring kick idx:%d file:%d\n", file.index, file.fd); - vhost_set_vring_kick(ctx, &file); + vhost_set_vring_kick(vid, &file); - if (virtio_is_ready(dev) && - !(dev->flags & VIRTIO_DEV_RUNNING)) - notify_ops->new_device(dev); + if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) { + if (notify_ops->new_device(vid) == 0) + dev->flags |= VIRTIO_DEV_RUNNING; + } } /* * when virtio is stopped, qemu will send us the GET_VRING_BASE message. */ int -user_get_vring_base(struct vhost_device_ctx ctx, - struct vhost_vring_state *state) +user_get_vring_base(int vid, struct vhost_vring_state *state) { - struct virtio_net *dev = get_device(ctx); + struct virtio_net *dev = get_device(vid); if (dev == NULL) return -1; /* We have to stop the queue (virtio) if it is running. */ - if (dev->flags & VIRTIO_DEV_RUNNING) - notify_ops->destroy_device(dev); + if (dev->flags & VIRTIO_DEV_RUNNING) { + dev->flags &= ~VIRTIO_DEV_RUNNING; + notify_ops->destroy_device(vid); + } /* Here we are safe to get the last used index */ - vhost_get_vring_base(ctx, state->index, state); + vhost_get_vring_base(vid, state->index, state); RTE_LOG(INFO, VHOST_CONFIG, "vring base idx:%d file:%d\n", state->index, state->num); @@ -329,19 +345,21 @@ user_get_vring_base(struct vhost_device_ctx ctx, * enable the virtio queue pair. */ int -user_set_vring_enable(struct vhost_device_ctx ctx, - struct vhost_vring_state *state) +user_set_vring_enable(int vid, struct vhost_vring_state *state) { - struct virtio_net *dev = get_device(ctx); + struct virtio_net *dev; int enable = (int)state->num; + dev = get_device(vid); + if (dev == NULL) + return -1; + RTE_LOG(INFO, VHOST_CONFIG, "set queue enable: %d to qp idx: %d\n", enable, state->index); - if (notify_ops->vring_state_changed) { - notify_ops->vring_state_changed(dev, state->index, enable); - } + if (notify_ops->vring_state_changed) + notify_ops->vring_state_changed(vid, state->index, enable); dev->virtqueue[state->index]->enabled = enable; @@ -349,12 +367,11 @@ user_set_vring_enable(struct vhost_device_ctx ctx, } void -user_set_protocol_features(struct vhost_device_ctx ctx, - uint64_t protocol_features) +user_set_protocol_features(int vid, uint64_t protocol_features) { struct virtio_net *dev; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES) return; @@ -362,15 +379,14 @@ user_set_protocol_features(struct vhost_device_ctx ctx, } int -user_set_log_base(struct vhost_device_ctx ctx, - struct VhostUserMsg *msg) +user_set_log_base(int vid, struct VhostUserMsg *msg) { struct virtio_net *dev; int fd = msg->fds[0]; uint64_t size, off; void *addr; - dev = get_device(ctx); + dev = get_device(vid); if (!dev) return -1; @@ -397,13 +413,21 @@ user_set_log_base(struct vhost_device_ctx ctx, * fail when offset is not page size aligned. */ addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); if (addr == MAP_FAILED) { RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n"); return -1; } - /* TODO: unmap on stop */ - dev->log_base = (uint64_t)(uintptr_t)addr + off; + /* + * Free previously mapped log memory on occasionally + * multiple VHOST_USER_SET_LOG_BASE. + */ + if (dev->log_addr) { + munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); + } + dev->log_addr = (uint64_t)(uintptr_t)addr; + dev->log_base = dev->log_addr + off; dev->log_size = size; return 0; @@ -418,12 +442,12 @@ user_set_log_base(struct vhost_device_ctx ctx, * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it. */ int -user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *msg) +user_send_rarp(int vid, struct VhostUserMsg *msg) { struct virtio_net *dev; uint8_t *mac = (uint8_t *)&msg->payload.u64; - dev = get_device(ctx); + dev = get_device(vid); if (!dev) return -1; diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h index cefec162..e1b967b8 100644 --- a/lib/librte_vhost/vhost_user/virtio-net-user.h +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h @@ -45,20 +45,18 @@ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\ (1ULL << VHOST_USER_PROTOCOL_F_RARP)) -int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *); +int user_set_mem_table(int, struct VhostUserMsg *); -void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *); +void user_set_vring_call(int, struct VhostUserMsg *); -void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *); +void user_set_vring_kick(int, struct VhostUserMsg *); -void user_set_protocol_features(struct vhost_device_ctx ctx, - uint64_t protocol_features); -int user_set_log_base(struct vhost_device_ctx ctx, struct VhostUserMsg *); -int user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *); +void user_set_protocol_features(int vid, uint64_t protocol_features); +int user_set_log_base(int vid, struct VhostUserMsg *); +int user_send_rarp(int vid, struct VhostUserMsg *); -int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *); +int user_get_vring_base(int, struct vhost_vring_state *); -int user_set_vring_enable(struct vhost_device_ctx ctx, - struct vhost_vring_state *state); +int user_set_vring_enable(int vid, struct vhost_vring_state *state); #endif diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c index d870ad97..1785695b 100644 --- a/lib/librte_vhost/virtio-net.c +++ b/lib/librte_vhost/virtio-net.c @@ -53,7 +53,6 @@ #include <rte_virtio_net.h> #include "vhost-net.h" -#include "virtio-net.h" #define MAX_VHOST_DEVICE 1024 static struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; @@ -108,15 +107,14 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va) return vhost_va; } - struct virtio_net * -get_device(struct vhost_device_ctx ctx) +get_device(int vid) { - struct virtio_net *dev = vhost_devices[ctx.fh]; + struct virtio_net *dev = vhost_devices[vid]; if (unlikely(!dev)) { RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") device not found.\n", ctx.fh); + "(%d) device not found.\n", vid); } return dev; @@ -233,7 +231,7 @@ alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) /* * Reset some variables in device structure, while keeping few - * others untouched, such as device_fh, ifname, virt_qp_nb: they + * others untouched, such as vid, ifname, virt_qp_nb: they * should be same unless the device is removed. */ static void @@ -255,7 +253,7 @@ reset_device(struct virtio_net *dev) * list. */ int -vhost_new_device(struct vhost_device_ctx ctx) +vhost_new_device(void) { struct virtio_net *dev; int i; @@ -263,8 +261,7 @@ vhost_new_device(struct vhost_device_ctx ctx) dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0); if (dev == NULL) { RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to allocate memory for dev.\n", - ctx.fh); + "Failed to allocate memory for new dev.\n"); return -1; } @@ -279,7 +276,7 @@ vhost_new_device(struct vhost_device_ctx ctx) } vhost_devices[i] = dev; - dev->device_fh = i; + dev->vid = i; return i; } @@ -289,30 +286,31 @@ vhost_new_device(struct vhost_device_ctx ctx) * cleanup the device and remove it from device configuration linked list. */ void -vhost_destroy_device(struct vhost_device_ctx ctx) +vhost_destroy_device(int vid) { - struct virtio_net *dev = get_device(ctx); + struct virtio_net *dev = get_device(vid); if (dev == NULL) return; - if (dev->flags & VIRTIO_DEV_RUNNING) - notify_ops->destroy_device(dev); + if (dev->flags & VIRTIO_DEV_RUNNING) { + dev->flags &= ~VIRTIO_DEV_RUNNING; + notify_ops->destroy_device(vid); + } cleanup_device(dev, 1); free_device(dev); - vhost_devices[ctx.fh] = NULL; + vhost_devices[vid] = NULL; } void -vhost_set_ifname(struct vhost_device_ctx ctx, - const char *if_name, unsigned int if_len) +vhost_set_ifname(int vid, const char *if_name, unsigned int if_len) { struct virtio_net *dev; unsigned int len; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return; @@ -320,6 +318,7 @@ vhost_set_ifname(struct vhost_device_ctx ctx, sizeof(dev->ifname) : if_len; strncpy(dev->ifname, if_name, len); + dev->ifname[sizeof(dev->ifname) - 1] = '\0'; } @@ -329,11 +328,11 @@ vhost_set_ifname(struct vhost_device_ctx ctx, * the device hasn't been initialised. */ int -vhost_set_owner(struct vhost_device_ctx ctx) +vhost_set_owner(int vid) { struct virtio_net *dev; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; @@ -344,16 +343,18 @@ vhost_set_owner(struct vhost_device_ctx ctx) * Called from CUSE IOCTL: VHOST_RESET_OWNER */ int -vhost_reset_owner(struct vhost_device_ctx ctx) +vhost_reset_owner(int vid) { struct virtio_net *dev; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; - if (dev->flags & VIRTIO_DEV_RUNNING) - notify_ops->destroy_device(dev); + if (dev->flags & VIRTIO_DEV_RUNNING) { + dev->flags &= ~VIRTIO_DEV_RUNNING; + notify_ops->destroy_device(vid); + } cleanup_device(dev, 0); reset_device(dev); @@ -365,11 +366,11 @@ vhost_reset_owner(struct vhost_device_ctx ctx) * The features that we support are requested. */ int -vhost_get_features(struct vhost_device_ctx ctx, uint64_t *pu) +vhost_get_features(int vid, uint64_t *pu) { struct virtio_net *dev; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; @@ -383,13 +384,11 @@ vhost_get_features(struct vhost_device_ctx ctx, uint64_t *pu) * We receive the negotiated features supported by us and the virtio device. */ int -vhost_set_features(struct vhost_device_ctx ctx, uint64_t *pu) +vhost_set_features(int vid, uint64_t *pu) { struct virtio_net *dev; - uint16_t vhost_hlen; - uint16_t i; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; if (*pu & ~VHOST_FEATURES) @@ -398,23 +397,16 @@ vhost_set_features(struct vhost_device_ctx ctx, uint64_t *pu) dev->features = *pu; if (dev->features & ((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) { - vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); + dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); } else { - vhost_hlen = sizeof(struct virtio_net_hdr); + dev->vhost_hlen = sizeof(struct virtio_net_hdr); } LOG_DEBUG(VHOST_CONFIG, - "(%"PRIu64") Mergeable RX buffers %s, virtio 1 %s\n", - dev->device_fh, + "(%d) mergeable RX buffers %s, virtio 1 %s\n", + dev->vid, (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", (dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off"); - for (i = 0; i < dev->virt_qp_nb; i++) { - uint16_t base_idx = i * VIRTIO_QNUM; - - dev->virtqueue[base_idx + VIRTIO_RXQ]->vhost_hlen = vhost_hlen; - dev->virtqueue[base_idx + VIRTIO_TXQ]->vhost_hlen = vhost_hlen; - } - return 0; } @@ -423,12 +415,11 @@ vhost_set_features(struct vhost_device_ctx ctx, uint64_t *pu) * The virtio device sends us the size of the descriptor ring. */ int -vhost_set_vring_num(struct vhost_device_ctx ctx, - struct vhost_vring_state *state) +vhost_set_vring_num(int vid, struct vhost_vring_state *state) { struct virtio_net *dev; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; @@ -509,7 +500,7 @@ numa_realloc(struct virtio_net *dev, int index) out: dev->virtqueue[index] = vq; dev->virtqueue[index + 1] = vq + 1; - vhost_devices[dev->device_fh] = dev; + vhost_devices[dev->vid] = dev; return dev; } @@ -527,12 +518,12 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused) * This function then converts these to our address space. */ int -vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr) +vhost_set_vring_addr(int vid, struct vhost_vring_addr *addr) { struct virtio_net *dev; struct vhost_virtqueue *vq; - dev = get_device(ctx); + dev = get_device(vid); if ((dev == NULL) || (dev->mem == NULL)) return -1; @@ -544,8 +535,8 @@ vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr) addr->desc_user_addr); if (vq->desc == 0) { RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to find desc ring address.\n", - dev->device_fh); + "(%d) failed to find desc ring address.\n", + dev->vid); return -1; } @@ -556,8 +547,8 @@ vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr) addr->avail_user_addr); if (vq->avail == 0) { RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to find avail ring address.\n", - dev->device_fh); + "(%d) failed to find avail ring address.\n", + dev->vid); return -1; } @@ -565,21 +556,29 @@ vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr) addr->used_user_addr); if (vq->used == 0) { RTE_LOG(ERR, VHOST_CONFIG, - "(%"PRIu64") Failed to find used ring address.\n", - dev->device_fh); + "(%d) failed to find used ring address.\n", + dev->vid); return -1; } + if (vq->last_used_idx != vq->used->idx) { + RTE_LOG(WARNING, VHOST_CONFIG, + "last_used_idx (%u) and vq->used->idx (%u) mismatches; " + "some packets maybe resent for Tx and dropped for Rx\n", + vq->last_used_idx, vq->used->idx); + vq->last_used_idx = vq->used->idx; + } + vq->log_guest_addr = addr->log_guest_addr; - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address desc: %p\n", - dev->device_fh, vq->desc); - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address avail: %p\n", - dev->device_fh, vq->avail); - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address used: %p\n", - dev->device_fh, vq->used); - LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") log_guest_addr: %"PRIx64"\n", - dev->device_fh, vq->log_guest_addr); + LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", + dev->vid, vq->desc); + LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n", + dev->vid, vq->avail); + LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n", + dev->vid, vq->used); + LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", + dev->vid, vq->log_guest_addr); return 0; } @@ -589,18 +588,16 @@ vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr) * The virtio device sends us the available ring last used index. */ int -vhost_set_vring_base(struct vhost_device_ctx ctx, - struct vhost_vring_state *state) +vhost_set_vring_base(int vid, struct vhost_vring_state *state) { struct virtio_net *dev; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; /* State->index refers to the queue index. The txq is 1, rxq is 0. */ dev->virtqueue[state->index]->last_used_idx = state->num; - dev->virtqueue[state->index]->last_used_idx_res = state->num; return 0; } @@ -610,12 +607,12 @@ vhost_set_vring_base(struct vhost_device_ctx ctx, * We send the virtio device our available ring last used index. */ int -vhost_get_vring_base(struct vhost_device_ctx ctx, uint32_t index, +vhost_get_vring_base(int vid, uint32_t index, struct vhost_vring_state *state) { struct virtio_net *dev; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; @@ -633,13 +630,13 @@ vhost_get_vring_base(struct vhost_device_ctx ctx, uint32_t index, * copied into our process space. */ int -vhost_set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file) +vhost_set_vring_call(int vid, struct vhost_vring_file *file) { struct virtio_net *dev; struct vhost_virtqueue *vq; uint32_t cur_qp_idx = file->index / VIRTIO_QNUM; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; @@ -670,12 +667,12 @@ vhost_set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file) * This fd gets copied into our process space. */ int -vhost_set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file) +vhost_set_vring_kick(int vid, struct vhost_vring_file *file) { struct virtio_net *dev; struct vhost_virtqueue *vq; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; @@ -700,11 +697,11 @@ vhost_set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file) * The device will still exist in the device configuration linked list. */ int -vhost_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file) +vhost_set_backend(int vid, struct vhost_vring_file *file) { struct virtio_net *dev; - dev = get_device(ctx); + dev = get_device(vid); if (dev == NULL) return -1; @@ -716,20 +713,98 @@ vhost_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file) * we add the device. */ if (!(dev->flags & VIRTIO_DEV_RUNNING)) { - if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) && - ((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED)) { - return notify_ops->new_device(dev); + if (dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED && + dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED) { + if (notify_ops->new_device(vid) < 0) + return -1; + dev->flags |= VIRTIO_DEV_RUNNING; } - /* Otherwise we remove it. */ - } else - if (file->fd == VIRTIO_DEV_STOPPED) - notify_ops->destroy_device(dev); + } else if (file->fd == VIRTIO_DEV_STOPPED) { + dev->flags &= ~VIRTIO_DEV_RUNNING; + notify_ops->destroy_device(vid); + } + + return 0; +} + +int +rte_vhost_get_numa_node(int vid) +{ +#ifdef RTE_LIBRTE_VHOST_NUMA + struct virtio_net *dev = get_device(vid); + int numa_node; + int ret; + + if (dev == NULL) + return -1; + + ret = get_mempolicy(&numa_node, NULL, 0, dev, + MPOL_F_NODE | MPOL_F_ADDR); + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%d) failed to query numa node: %d\n", vid, ret); + return -1; + } + + return numa_node; +#else + RTE_SET_USED(vid); + return -1; +#endif +} + +uint32_t +rte_vhost_get_queue_num(int vid) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return 0; + + return dev->virt_qp_nb; +} + +int +rte_vhost_get_ifname(int vid, char *buf, size_t len) +{ + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return -1; + + len = RTE_MIN(len, sizeof(dev->ifname)); + + strncpy(buf, dev->ifname, len); + buf[len - 1] = '\0'; + return 0; } -int rte_vhost_enable_guest_notification(struct virtio_net *dev, - uint16_t queue_id, int enable) +uint16_t +rte_vhost_avail_entries(int vid, uint16_t queue_id) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(vid); + if (!dev) + return 0; + + vq = dev->virtqueue[queue_id]; + if (!vq->enabled) + return 0; + + return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx; +} + +int +rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable) { + struct virtio_net *dev = get_device(vid); + + if (dev == NULL) + return -1; + if (enable) { RTE_LOG(ERR, VHOST_CONFIG, "guest notification isn't supported.\n"); diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h deleted file mode 100644 index 75fb57e5..00000000 --- a/lib/librte_vhost/virtio-net.h +++ /dev/null @@ -1,43 +0,0 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _VIRTIO_NET_H -#define _VIRTIO_NET_H - -#include "vhost-net.h" -#include "rte_virtio_net.h" - -struct virtio_net_device_ops const *notify_ops; -struct virtio_net *get_device(struct vhost_device_ctx ctx); - -#endif |