4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include <semaphore.h>
40 #include <linux/virtio_scsi.h>
41 #include <linux/virtio_ring.h>
43 #include <rte_atomic.h>
44 #include <rte_cycles.h>
46 #include <rte_malloc.h>
47 #include <rte_vhost.h>
49 #include "vhost_scsi.h"
50 #include "scsi_spec.h"
52 #define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\
53 (1 << VIRTIO_RING_F_EVENT_IDX) |\
54 (1 << VIRTIO_SCSI_F_INOUT) |\
55 (1 << VIRTIO_SCSI_F_CHANGE))
57 /* Path to folder where character device will be created. Can be set by user. */
58 static char dev_pathname[PATH_MAX] = "";
60 static struct vhost_scsi_ctrlr *g_vhost_ctrlr;
61 static int g_should_stop;
62 static sem_t exit_sem;
64 static struct vhost_scsi_ctrlr *
65 vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name)
67 /* currently we only support 1 socket file fd */
71 static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len)
74 struct vhost_scsi_ctrlr *ctrlr;
77 ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
79 fprintf(stderr, "Cannot get socket name\n");
83 ctrlr = vhost_scsi_ctrlr_find(path);
85 fprintf(stderr, "Controller is not ready\n");
86 assert(ctrlr != NULL);
89 assert(ctrlr->mem != NULL);
91 return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
94 static struct vring_desc *
95 descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc)
97 return &vq_desc[cur_desc->next];
101 descriptor_has_next(struct vring_desc *cur_desc)
103 return !!(cur_desc->flags & VRING_DESC_F_NEXT);
107 descriptor_is_wr(struct vring_desc *cur_desc)
109 return !!(cur_desc->flags & VRING_DESC_F_WRITE);
113 submit_completion(struct vhost_scsi_task *task)
115 struct rte_vhost_vring *vq;
116 struct vring_used *used;
120 /* Fill out the next entry in the "used" ring. id = the
121 * index of the descriptor that contained the SCSI request.
122 * len = the total amount of data transferred for the SCSI
123 * request. We must report the correct len, for variable
124 * length SCSI CDBs, where we may return less data than
125 * allocated by the guest VM.
127 used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
128 used->ring[used->idx & (vq->size - 1)].len = task->data_len;
131 /* Send an interrupt back to the guest VM so that it knows
132 * a completion is ready to be processed.
134 eventfd_write(vq->callfd, (eventfd_t)1);
138 vhost_process_read_payload_chain(struct vhost_scsi_task *task)
144 chunck_len = task->desc->len;
145 task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
148 if (!task->resp || chunck_len != task->desc->len) {
149 fprintf(stderr, "failed to translate desc address.\n");
153 while (descriptor_has_next(task->desc)) {
154 task->desc = descriptor_get_next(task->vq->desc, task->desc);
155 chunck_len = task->desc->len;
156 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
159 if (!data || chunck_len != task->desc->len) {
160 fprintf(stderr, "failed to translate desc address.\n");
164 task->iovs[task->iovs_cnt].iov_base = data;
165 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
166 task->data_len += task->desc->len;
172 vhost_process_write_payload_chain(struct vhost_scsi_task *task)
180 chunck_len = task->desc->len;
181 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
184 if (!data || chunck_len != task->desc->len) {
185 fprintf(stderr, "failed to translate desc address.\n");
189 task->iovs[task->iovs_cnt].iov_base = data;
190 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
191 task->data_len += task->desc->len;
193 task->desc = descriptor_get_next(task->vq->desc, task->desc);
194 } while (descriptor_has_next(task->desc));
196 chunck_len = task->desc->len;
197 task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
200 if (!task->resp || chunck_len != task->desc->len)
201 fprintf(stderr, "failed to translate desc address.\n");
204 static struct vhost_block_dev *
205 vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial,
206 uint32_t blk_size, uint64_t blk_cnt,
209 struct vhost_block_dev *bdev;
211 bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
215 strncpy(bdev->name, bdev_name, sizeof(bdev->name));
216 strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
217 bdev->blocklen = blk_size;
218 bdev->blockcnt = blk_cnt;
219 bdev->write_cache = wce_enable;
221 /* use memory as disk storage space */
222 bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
224 fprintf(stderr, "no enough reseverd huge memory for disk\n");
232 process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx)
235 struct vhost_scsi_queue *scsi_vq;
236 struct rte_vhost_vring *vq;
238 scsi_vq = &ctrlr->bdev->queues[q_idx];
240 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq);
243 while (vq->avail->idx != scsi_vq->last_used_idx) {
246 struct vhost_scsi_task *task;
249 last_idx = scsi_vq->last_used_idx & (vq->size - 1);
250 req_idx = vq->avail->ring[last_idx];
252 task = rte_zmalloc(NULL, sizeof(*task), 0);
253 assert(task != NULL);
256 task->bdev = ctrlr->bdev;
258 task->req_idx = req_idx;
259 task->desc = &task->vq->desc[task->req_idx];
261 /* does not support indirect descriptors */
262 assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0);
263 scsi_vq->last_used_idx++;
265 chunck_len = task->desc->len;
266 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
269 if (!task->req || chunck_len != task->desc->len) {
270 fprintf(stderr, "failed to translate desc address.\n");
274 task->desc = descriptor_get_next(task->vq->desc, task->desc);
275 if (!descriptor_has_next(task->desc)) {
276 task->dxfer_dir = SCSI_DIR_NONE;
277 chunck_len = task->desc->len;
278 task->resp = (void *)(uintptr_t)
279 gpa_to_vva(task->bdev->vid,
282 if (!task->resp || chunck_len != task->desc->len) {
283 fprintf(stderr, "failed to translate desc address.\n");
286 } else if (!descriptor_is_wr(task->desc)) {
287 task->dxfer_dir = SCSI_DIR_TO_DEV;
288 vhost_process_write_payload_chain(task);
290 task->dxfer_dir = SCSI_DIR_FROM_DEV;
291 vhost_process_read_payload_chain(task);
294 ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task);
296 /* invalid response */
297 task->resp->response = VIRTIO_SCSI_S_BAD_TARGET;
300 task->resp->response = VIRTIO_SCSI_S_OK;
301 task->resp->status = 0;
302 task->resp->resid = 0;
304 submit_completion(task);
309 /* Main framework for processing IOs */
311 ctrlr_worker(void *arg)
314 struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg;
318 thread = pthread_self();
321 pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
323 num = rte_vhost_get_vring_num(ctrlr->bdev->vid);
324 fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num);
326 if (num != NUM_OF_SCSI_QUEUES) {
327 fprintf(stderr, "Only 1 IO queue are supported\n");
331 while (!g_should_stop && ctrlr->bdev != NULL) {
332 /* At least 3 vrings, currently only can support 1 IO queue
333 * Queue 2 for IO queue, does not support TMF and hotplug
334 * for the example application now
336 for (idx = 2; idx < num; idx++)
337 process_requestq(ctrlr, idx);
340 fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
349 struct vhost_scsi_ctrlr *ctrlr;
350 struct vhost_scsi_queue *scsi_vq;
351 struct rte_vhost_vring *vq;
355 ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
357 fprintf(stderr, "Cannot get socket name\n");
361 ctrlr = vhost_scsi_ctrlr_find(path);
363 fprintf(stderr, "Controller is not ready\n");
367 ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
369 fprintf(stderr, "Get Controller memory region failed\n");
372 assert(ctrlr->mem != NULL);
374 /* hardcoded block device information with 128MiB */
375 ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0",
380 ctrlr->bdev->vid = vid;
382 /* Disable Notifications */
383 for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) {
384 rte_vhost_enable_guest_notification(vid, i, 0);
385 /* restore used index */
386 scsi_vq = &ctrlr->bdev->queues[i];
388 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
390 scsi_vq->last_used_idx = vq->used->idx;
391 scsi_vq->last_avail_idx = vq->used->idx;
395 fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
396 if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
397 fprintf(stderr, "Worker Thread Started Failed\n");
405 destroy_device(int vid)
408 struct vhost_scsi_ctrlr *ctrlr;
410 rte_vhost_get_ifname(vid, path, PATH_MAX);
411 fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
412 ctrlr = vhost_scsi_ctrlr_find(path);
414 fprintf(stderr, "Destroy Ctrlr Failed\n");
423 static const struct vhost_device_ops vhost_scsi_device_ops = {
424 .new_device = new_device,
425 .destroy_device = destroy_device,
428 static struct vhost_scsi_ctrlr *
429 vhost_scsi_ctrlr_construct(const char *ctrlr_name)
432 struct vhost_scsi_ctrlr *ctrlr;
436 /* always use current directory */
437 path = getcwd(cwd, PATH_MAX);
439 fprintf(stderr, "Cannot get current working directory\n");
442 snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
444 if (access(dev_pathname, F_OK) != -1) {
445 if (unlink(dev_pathname) != 0)
446 rte_exit(EXIT_FAILURE, "Cannot remove %s.\n",
450 if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
451 fprintf(stderr, "socket %s already exists\n", dev_pathname);
455 fprintf(stdout, "socket file: %s created\n", dev_pathname);
457 ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES);
459 fprintf(stderr, "Set vhost driver features failed\n");
463 ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
467 rte_vhost_driver_callback_register(dev_pathname,
468 &vhost_scsi_device_ops);
474 signal_handler(__rte_unused int signum)
477 if (access(dev_pathname, F_OK) == 0)
478 unlink(dev_pathname);
482 int main(int argc, char *argv[])
486 signal(SIGINT, signal_handler);
489 ret = rte_eal_init(argc, argv);
491 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
493 g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket");
494 if (g_vhost_ctrlr == NULL) {
495 fprintf(stderr, "Construct vhost scsi controller failed\n");
499 if (sem_init(&exit_sem, 0, 0) < 0) {
500 fprintf(stderr, "Error init exit_sem\n");
504 rte_vhost_driver_start(dev_pathname);
506 /* loop for exit the application */