diff options
Diffstat (limited to 'examples/vhost_scsi/vhost_scsi.c')
-rw-r--r-- | examples/vhost_scsi/vhost_scsi.c | 474 |
1 files changed, 474 insertions, 0 deletions
diff --git a/examples/vhost_scsi/vhost_scsi.c b/examples/vhost_scsi/vhost_scsi.c new file mode 100644 index 00000000..b4f1f8d2 --- /dev/null +++ b/examples/vhost_scsi/vhost_scsi.c @@ -0,0 +1,474 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <unistd.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <semaphore.h> +#include <linux/virtio_scsi.h> +#include <linux/virtio_ring.h> + +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_vhost.h> + +#include "vhost_scsi.h" +#include "scsi_spec.h" + +#define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\ + (1 << VIRTIO_RING_F_EVENT_IDX) |\ + (1 << VIRTIO_SCSI_F_INOUT) |\ + (1 << VIRTIO_SCSI_F_CHANGE)) + +/* Path to folder where character device will be created. Can be set by user. */ +static char dev_pathname[PATH_MAX] = ""; + +static struct vhost_scsi_ctrlr *g_vhost_ctrlr; +static int g_should_stop; +static sem_t exit_sem; + +static struct vhost_scsi_ctrlr * +vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name) +{ + /* currently we only support 1 socket file fd */ + return g_vhost_ctrlr; +} + +static uint64_t gpa_to_vva(int vid, uint64_t gpa) +{ + char path[PATH_MAX]; + struct vhost_scsi_ctrlr *ctrlr; + int ret = 0; + + ret = rte_vhost_get_ifname(vid, path, PATH_MAX); + if (ret) { + fprintf(stderr, "Cannot get socket name\n"); + assert(ret != 0); + } + + ctrlr = vhost_scsi_ctrlr_find(path); + if (!ctrlr) { + fprintf(stderr, "Controller is not ready\n"); + assert(ctrlr != NULL); + } + + assert(ctrlr->mem != NULL); + + return rte_vhost_gpa_to_vva(ctrlr->mem, gpa); +} + +static struct vring_desc * +descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc) +{ + return &vq_desc[cur_desc->next]; +} + +static bool +descriptor_has_next(struct vring_desc *cur_desc) +{ + return !!(cur_desc->flags & VRING_DESC_F_NEXT); +} + +static bool +descriptor_is_wr(struct vring_desc *cur_desc) +{ + return !!(cur_desc->flags & VRING_DESC_F_WRITE); +} + +static void +submit_completion(struct vhost_scsi_task *task) +{ + struct rte_vhost_vring *vq; + struct vring_used *used; + + vq = task->vq; + used = vq->used; + /* Fill out the next entry in the "used" ring. id = the + * index of the descriptor that contained the SCSI request. + * len = the total amount of data transferred for the SCSI + * request. We must report the correct len, for variable + * length SCSI CDBs, where we may return less data than + * allocated by the guest VM. + */ + used->ring[used->idx & (vq->size - 1)].id = task->req_idx; + used->ring[used->idx & (vq->size - 1)].len = task->data_len; + used->idx++; + + /* Send an interrupt back to the guest VM so that it knows + * a completion is ready to be processed. + */ + eventfd_write(vq->callfd, (eventfd_t)1); +} + +static void +vhost_process_read_payload_chain(struct vhost_scsi_task *task) +{ + void *data; + + task->iovs_cnt = 0; + task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr); + + while (descriptor_has_next(task->desc)) { + task->desc = descriptor_get_next(task->vq->desc, task->desc); + data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr); + task->iovs[task->iovs_cnt].iov_base = data; + task->iovs[task->iovs_cnt].iov_len = task->desc->len; + task->data_len += task->desc->len; + task->iovs_cnt++; + } +} + +static void +vhost_process_write_payload_chain(struct vhost_scsi_task *task) +{ + void *data; + + task->iovs_cnt = 0; + + do { + data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr); + task->iovs[task->iovs_cnt].iov_base = data; + task->iovs[task->iovs_cnt].iov_len = task->desc->len; + task->data_len += task->desc->len; + task->iovs_cnt++; + task->desc = descriptor_get_next(task->vq->desc, task->desc); + } while (descriptor_has_next(task->desc)); + + task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr); +} + +static struct vhost_block_dev * +vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial, + uint32_t blk_size, uint64_t blk_cnt, + bool wce_enable) +{ + struct vhost_block_dev *bdev; + + bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE); + if (!bdev) + return NULL; + + strncpy(bdev->name, bdev_name, sizeof(bdev->name)); + strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name)); + bdev->blocklen = blk_size; + bdev->blockcnt = blk_cnt; + bdev->write_cache = wce_enable; + + /* use memory as disk storage space */ + bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0); + if (!bdev->data) { + fprintf(stderr, "no enough reseverd huge memory for disk\n"); + return NULL; + } + + return bdev; +} + +static void +process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx) +{ + int ret; + struct vhost_scsi_queue *scsi_vq; + struct rte_vhost_vring *vq; + + scsi_vq = &ctrlr->bdev->queues[q_idx]; + vq = &scsi_vq->vq; + ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq); + assert(ret == 0); + + while (vq->avail->idx != scsi_vq->last_used_idx) { + int req_idx; + uint16_t last_idx; + struct vhost_scsi_task *task; + + last_idx = scsi_vq->last_used_idx & (vq->size - 1); + req_idx = vq->avail->ring[last_idx]; + + task = rte_zmalloc(NULL, sizeof(*task), 0); + assert(task != NULL); + + task->ctrlr = ctrlr; + task->bdev = ctrlr->bdev; + task->vq = vq; + task->req_idx = req_idx; + task->desc = &task->vq->desc[task->req_idx]; + + /* does not support indirect descriptors */ + assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0); + scsi_vq->last_used_idx++; + + task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr); + + task->desc = descriptor_get_next(task->vq->desc, task->desc); + if (!descriptor_has_next(task->desc)) { + task->dxfer_dir = SCSI_DIR_NONE; + task->resp = (void *)(uintptr_t) + gpa_to_vva(task->bdev->vid, + task->desc->addr); + + } else if (!descriptor_is_wr(task->desc)) { + task->dxfer_dir = SCSI_DIR_TO_DEV; + vhost_process_write_payload_chain(task); + } else { + task->dxfer_dir = SCSI_DIR_FROM_DEV; + vhost_process_read_payload_chain(task); + } + + ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task); + if (ret) { + /* invalid response */ + task->resp->response = VIRTIO_SCSI_S_BAD_TARGET; + } else { + /* successfully */ + task->resp->response = VIRTIO_SCSI_S_OK; + task->resp->status = 0; + task->resp->resid = 0; + } + submit_completion(task); + rte_free(task); + } +} + +/* Main framework for processing IOs */ +static void * +ctrlr_worker(void *arg) +{ + uint32_t idx, num; + struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg; + cpu_set_t cpuset; + pthread_t thread; + + thread = pthread_self(); + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + + num = rte_vhost_get_vring_num(ctrlr->bdev->vid); + fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num); + + if (num != NUM_OF_SCSI_QUEUES) { + fprintf(stderr, "Only 1 IO queue are supported\n"); + exit(0); + } + + while (!g_should_stop && ctrlr->bdev != NULL) { + /* At least 3 vrings, currently only can support 1 IO queue + * Queue 2 for IO queue, does not support TMF and hotplug + * for the example application now + */ + for (idx = 2; idx < num; idx++) + process_requestq(ctrlr, idx); + } + + fprintf(stdout, "Ctrlr Worker Thread Exiting\n"); + sem_post(&exit_sem); + return NULL; +} + +static int +new_device(int vid) +{ + char path[PATH_MAX]; + struct vhost_scsi_ctrlr *ctrlr; + struct vhost_scsi_queue *scsi_vq; + struct rte_vhost_vring *vq; + pthread_t tid; + int i, ret; + + ret = rte_vhost_get_ifname(vid, path, PATH_MAX); + if (ret) { + fprintf(stderr, "Cannot get socket name\n"); + return -1; + } + + ctrlr = vhost_scsi_ctrlr_find(path); + if (!ctrlr) { + fprintf(stderr, "Controller is not ready\n"); + return -1; + } + + ret = rte_vhost_get_mem_table(vid, &ctrlr->mem); + if (ret) { + fprintf(stderr, "Get Controller memory region failed\n"); + return -1; + } + assert(ctrlr->mem != NULL); + + /* hardcoded block device information with 128MiB */ + ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0", + 4096, 32768, 0); + if (!ctrlr->bdev) + return -1; + + ctrlr->bdev->vid = vid; + + /* Disable Notifications */ + for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) { + rte_vhost_enable_guest_notification(vid, i, 0); + /* restore used index */ + scsi_vq = &ctrlr->bdev->queues[i]; + vq = &scsi_vq->vq; + ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq); + assert(ret == 0); + scsi_vq->last_used_idx = vq->used->idx; + scsi_vq->last_avail_idx = vq->used->idx; + } + + g_should_stop = 0; + fprintf(stdout, "New Device %s, Device ID %d\n", path, vid); + if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) { + fprintf(stderr, "Worker Thread Started Failed\n"); + return -1; + } + pthread_detach(tid); + return 0; +} + +static void +destroy_device(int vid) +{ + char path[PATH_MAX]; + struct vhost_scsi_ctrlr *ctrlr; + + rte_vhost_get_ifname(vid, path, PATH_MAX); + fprintf(stdout, "Destroy %s Device ID %d\n", path, vid); + ctrlr = vhost_scsi_ctrlr_find(path); + if (!ctrlr) { + fprintf(stderr, "Destroy Ctrlr Failed\n"); + return; + } + ctrlr->bdev = NULL; + g_should_stop = 1; + + sem_wait(&exit_sem); +} + +static const struct vhost_device_ops vhost_scsi_device_ops = { + .new_device = new_device, + .destroy_device = destroy_device, +}; + +static struct vhost_scsi_ctrlr * +vhost_scsi_ctrlr_construct(const char *ctrlr_name) +{ + int ret; + struct vhost_scsi_ctrlr *ctrlr; + char *path; + char cwd[PATH_MAX]; + + /* always use current directory */ + path = getcwd(cwd, PATH_MAX); + if (!path) { + fprintf(stderr, "Cannot get current working directory\n"); + return NULL; + } + snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name); + + if (access(dev_pathname, F_OK) != -1) { + if (unlink(dev_pathname) != 0) + rte_exit(EXIT_FAILURE, "Cannot remove %s.\n", + dev_pathname); + } + + if (rte_vhost_driver_register(dev_pathname, 0) != 0) { + fprintf(stderr, "socket %s already exists\n", dev_pathname); + return NULL; + } + + fprintf(stdout, "socket file: %s created\n", dev_pathname); + + ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES); + if (ret != 0) { + fprintf(stderr, "Set vhost driver features failed\n"); + return NULL; + } + + ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE); + if (!ctrlr) + return NULL; + + rte_vhost_driver_callback_register(dev_pathname, + &vhost_scsi_device_ops); + + return ctrlr; +} + +static void +signal_handler(__rte_unused int signum) +{ + + if (access(dev_pathname, F_OK) == 0) + unlink(dev_pathname); + exit(0); +} + +int main(int argc, char *argv[]) +{ + int ret; + + signal(SIGINT, signal_handler); + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + + g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket"); + if (g_vhost_ctrlr == NULL) { + fprintf(stderr, "Construct vhost scsi controller failed\n"); + return 0; + } + + if (sem_init(&exit_sem, 0, 0) < 0) { + fprintf(stderr, "Error init exit_sem\n"); + return -1; + } + + rte_vhost_driver_start(dev_pathname); + + /* loop for exit the application */ + while (1) + sleep(1); + + return 0; +} + |