New upstream version 17.11.2
[deb_dpdk.git] / examples / vhost_scsi / vhost_scsi.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <unistd.h>
36 #include <stdbool.h>
37 #include <signal.h>
38 #include <assert.h>
39 #include <semaphore.h>
40 #include <linux/virtio_scsi.h>
41 #include <linux/virtio_ring.h>
42
43 #include <rte_atomic.h>
44 #include <rte_cycles.h>
45 #include <rte_log.h>
46 #include <rte_malloc.h>
47 #include <rte_vhost.h>
48
49 #include "vhost_scsi.h"
50 #include "scsi_spec.h"
51
52 #define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\
53                               (1 << VIRTIO_RING_F_EVENT_IDX) |\
54                               (1 << VIRTIO_SCSI_F_INOUT) |\
55                               (1 << VIRTIO_SCSI_F_CHANGE))
56
57 /* Path to folder where character device will be created. Can be set by user. */
58 static char dev_pathname[PATH_MAX] = "";
59
60 static struct vhost_scsi_ctrlr *g_vhost_ctrlr;
61 static int g_should_stop;
62 static sem_t exit_sem;
63
64 static struct vhost_scsi_ctrlr *
65 vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name)
66 {
67         /* currently we only support 1 socket file fd */
68         return g_vhost_ctrlr;
69 }
70
71 static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len)
72 {
73         char path[PATH_MAX];
74         struct vhost_scsi_ctrlr *ctrlr;
75         int ret = 0;
76
77         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
78         if (ret) {
79                 fprintf(stderr, "Cannot get socket name\n");
80                 assert(ret != 0);
81         }
82
83         ctrlr = vhost_scsi_ctrlr_find(path);
84         if (!ctrlr) {
85                 fprintf(stderr, "Controller is not ready\n");
86                 assert(ctrlr != NULL);
87         }
88
89         assert(ctrlr->mem != NULL);
90
91         return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
92 }
93
94 static struct vring_desc *
95 descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc)
96 {
97         return &vq_desc[cur_desc->next];
98 }
99
100 static bool
101 descriptor_has_next(struct vring_desc *cur_desc)
102 {
103         return !!(cur_desc->flags & VRING_DESC_F_NEXT);
104 }
105
106 static bool
107 descriptor_is_wr(struct vring_desc *cur_desc)
108 {
109         return !!(cur_desc->flags & VRING_DESC_F_WRITE);
110 }
111
112 static void
113 submit_completion(struct vhost_scsi_task *task)
114 {
115         struct rte_vhost_vring *vq;
116         struct vring_used *used;
117
118         vq = task->vq;
119         used = vq->used;
120         /* Fill out the next entry in the "used" ring.  id = the
121          * index of the descriptor that contained the SCSI request.
122          * len = the total amount of data transferred for the SCSI
123          * request. We must report the correct len, for variable
124          * length SCSI CDBs, where we may return less data than
125          * allocated by the guest VM.
126          */
127         used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
128         used->ring[used->idx & (vq->size - 1)].len = task->data_len;
129         used->idx++;
130
131         /* Send an interrupt back to the guest VM so that it knows
132          * a completion is ready to be processed.
133          */
134         eventfd_write(vq->callfd, (eventfd_t)1);
135 }
136
137 static void
138 vhost_process_read_payload_chain(struct vhost_scsi_task *task)
139 {
140         void *data;
141         uint64_t chunck_len;
142
143         task->iovs_cnt = 0;
144         chunck_len = task->desc->len;
145         task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
146                                                    task->desc->addr,
147                                                    &chunck_len);
148         if (!task->resp || chunck_len != task->desc->len) {
149                 fprintf(stderr, "failed to translate desc address.\n");
150                 return;
151         }
152
153         while (descriptor_has_next(task->desc)) {
154                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
155                 chunck_len = task->desc->len;
156                 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
157                                                      task->desc->addr,
158                                                          &chunck_len);
159                 if (!data || chunck_len != task->desc->len) {
160                         fprintf(stderr, "failed to translate desc address.\n");
161                         return;
162                 }
163
164                 task->iovs[task->iovs_cnt].iov_base = data;
165                 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
166                 task->data_len += task->desc->len;
167                 task->iovs_cnt++;
168         }
169 }
170
171 static void
172 vhost_process_write_payload_chain(struct vhost_scsi_task *task)
173 {
174         void *data;
175         uint64_t chunck_len;
176
177         task->iovs_cnt = 0;
178
179         do {
180                 chunck_len = task->desc->len;
181                 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
182                                                      task->desc->addr,
183                                                          &chunck_len);
184                 if (!data || chunck_len != task->desc->len) {
185                         fprintf(stderr, "failed to translate desc address.\n");
186                         return;
187                 }
188
189                 task->iovs[task->iovs_cnt].iov_base = data;
190                 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
191                 task->data_len += task->desc->len;
192                 task->iovs_cnt++;
193                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
194         } while (descriptor_has_next(task->desc));
195
196         chunck_len = task->desc->len;
197         task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
198                                                    task->desc->addr,
199                                                    &chunck_len);
200         if (!task->resp || chunck_len != task->desc->len)
201                 fprintf(stderr, "failed to translate desc address.\n");
202 }
203
204 static struct vhost_block_dev *
205 vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial,
206                           uint32_t blk_size, uint64_t blk_cnt,
207                           bool wce_enable)
208 {
209         struct vhost_block_dev *bdev;
210
211         bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
212         if (!bdev)
213                 return NULL;
214
215         strncpy(bdev->name, bdev_name, sizeof(bdev->name));
216         strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
217         bdev->blocklen = blk_size;
218         bdev->blockcnt = blk_cnt;
219         bdev->write_cache = wce_enable;
220
221         /* use memory as disk storage space */
222         bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
223         if (!bdev->data) {
224                 fprintf(stderr, "no enough reseverd huge memory for disk\n");
225                 return NULL;
226         }
227
228         return bdev;
229 }
230
231 static void
232 process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx)
233 {
234         int ret;
235         struct vhost_scsi_queue *scsi_vq;
236         struct rte_vhost_vring *vq;
237
238         scsi_vq = &ctrlr->bdev->queues[q_idx];
239         vq = &scsi_vq->vq;
240         ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq);
241         assert(ret == 0);
242
243         while (vq->avail->idx != scsi_vq->last_used_idx) {
244                 int req_idx;
245                 uint16_t last_idx;
246                 struct vhost_scsi_task *task;
247                 uint64_t chunck_len;
248
249                 last_idx = scsi_vq->last_used_idx & (vq->size - 1);
250                 req_idx = vq->avail->ring[last_idx];
251
252                 task = rte_zmalloc(NULL, sizeof(*task), 0);
253                 assert(task != NULL);
254
255                 task->ctrlr = ctrlr;
256                 task->bdev = ctrlr->bdev;
257                 task->vq = vq;
258                 task->req_idx = req_idx;
259                 task->desc = &task->vq->desc[task->req_idx];
260
261                 /* does not support indirect descriptors */
262                 assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0);
263                 scsi_vq->last_used_idx++;
264
265                 chunck_len = task->desc->len;
266                 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
267                                                           task->desc->addr,
268                                                           &chunck_len);
269                 if (!task->req || chunck_len != task->desc->len) {
270                         fprintf(stderr, "failed to translate desc address.\n");
271                         return;
272                 }
273
274                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
275                 if (!descriptor_has_next(task->desc)) {
276                         task->dxfer_dir = SCSI_DIR_NONE;
277                         chunck_len = task->desc->len;
278                         task->resp = (void *)(uintptr_t)
279                                               gpa_to_vva(task->bdev->vid,
280                                                          task->desc->addr,
281                                                          &chunck_len);
282                         if (!task->resp || chunck_len != task->desc->len) {
283                                 fprintf(stderr, "failed to translate desc address.\n");
284                                 return;
285                         }
286                 } else if (!descriptor_is_wr(task->desc)) {
287                         task->dxfer_dir = SCSI_DIR_TO_DEV;
288                         vhost_process_write_payload_chain(task);
289                 } else {
290                         task->dxfer_dir = SCSI_DIR_FROM_DEV;
291                         vhost_process_read_payload_chain(task);
292                 }
293
294                 ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task);
295                 if (ret) {
296                         /* invalid response */
297                         task->resp->response = VIRTIO_SCSI_S_BAD_TARGET;
298                 } else {
299                         /* successfully */
300                         task->resp->response = VIRTIO_SCSI_S_OK;
301                         task->resp->status = 0;
302                         task->resp->resid = 0;
303                 }
304                 submit_completion(task);
305                 rte_free(task);
306         }
307 }
308
309 /* Main framework for processing IOs */
310 static void *
311 ctrlr_worker(void *arg)
312 {
313         uint32_t idx, num;
314         struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg;
315         cpu_set_t cpuset;
316         pthread_t thread;
317
318         thread = pthread_self();
319         CPU_ZERO(&cpuset);
320         CPU_SET(0, &cpuset);
321         pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
322
323         num =  rte_vhost_get_vring_num(ctrlr->bdev->vid);
324         fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num);
325
326         if (num != NUM_OF_SCSI_QUEUES) {
327                 fprintf(stderr, "Only 1 IO queue are supported\n");
328                 exit(0);
329         }
330
331         while (!g_should_stop && ctrlr->bdev != NULL) {
332                 /* At least 3 vrings, currently only can support 1 IO queue
333                  * Queue 2 for IO queue, does not support TMF and hotplug
334                  * for the example application now
335                  */
336                 for (idx = 2; idx < num; idx++)
337                         process_requestq(ctrlr, idx);
338         }
339
340         fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
341         sem_post(&exit_sem);
342         return NULL;
343 }
344
345 static int
346 new_device(int vid)
347 {
348         char path[PATH_MAX];
349         struct vhost_scsi_ctrlr *ctrlr;
350         struct vhost_scsi_queue *scsi_vq;
351         struct rte_vhost_vring *vq;
352         pthread_t tid;
353         int i, ret;
354
355         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
356         if (ret) {
357                 fprintf(stderr, "Cannot get socket name\n");
358                 return -1;
359         }
360
361         ctrlr = vhost_scsi_ctrlr_find(path);
362         if (!ctrlr) {
363                 fprintf(stderr, "Controller is not ready\n");
364                 return -1;
365         }
366
367         ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
368         if (ret) {
369                 fprintf(stderr, "Get Controller memory region failed\n");
370                 return -1;
371         }
372         assert(ctrlr->mem != NULL);
373
374         /* hardcoded block device information with 128MiB */
375         ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0",
376                                                 4096, 32768, 0);
377         if (!ctrlr->bdev)
378                 return -1;
379
380         ctrlr->bdev->vid = vid;
381
382         /* Disable Notifications */
383         for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) {
384                 rte_vhost_enable_guest_notification(vid, i, 0);
385                 /* restore used index */
386                 scsi_vq = &ctrlr->bdev->queues[i];
387                 vq = &scsi_vq->vq;
388                 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
389                 assert(ret == 0);
390                 scsi_vq->last_used_idx = vq->used->idx;
391                 scsi_vq->last_avail_idx = vq->used->idx;
392         }
393
394         g_should_stop = 0;
395         fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
396         if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
397                 fprintf(stderr, "Worker Thread Started Failed\n");
398                 return -1;
399         }
400         pthread_detach(tid);
401         return 0;
402 }
403
404 static void
405 destroy_device(int vid)
406 {
407         char path[PATH_MAX];
408         struct vhost_scsi_ctrlr *ctrlr;
409
410         rte_vhost_get_ifname(vid, path, PATH_MAX);
411         fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
412         ctrlr = vhost_scsi_ctrlr_find(path);
413         if (!ctrlr) {
414                 fprintf(stderr, "Destroy Ctrlr Failed\n");
415                 return;
416         }
417         ctrlr->bdev = NULL;
418         g_should_stop = 1;
419
420         sem_wait(&exit_sem);
421 }
422
423 static const struct vhost_device_ops vhost_scsi_device_ops = {
424         .new_device =  new_device,
425         .destroy_device = destroy_device,
426 };
427
428 static struct vhost_scsi_ctrlr *
429 vhost_scsi_ctrlr_construct(const char *ctrlr_name)
430 {
431         int ret;
432         struct vhost_scsi_ctrlr *ctrlr;
433         char *path;
434         char cwd[PATH_MAX];
435
436         /* always use current directory */
437         path = getcwd(cwd, PATH_MAX);
438         if (!path) {
439                 fprintf(stderr, "Cannot get current working directory\n");
440                 return NULL;
441         }
442         snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
443
444         if (access(dev_pathname, F_OK) != -1) {
445                 if (unlink(dev_pathname) != 0)
446                         rte_exit(EXIT_FAILURE, "Cannot remove %s.\n",
447                                  dev_pathname);
448         }
449
450         if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
451                 fprintf(stderr, "socket %s already exists\n", dev_pathname);
452                 return NULL;
453         }
454
455         fprintf(stdout, "socket file: %s created\n", dev_pathname);
456
457         ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES);
458         if (ret != 0) {
459                 fprintf(stderr, "Set vhost driver features failed\n");
460                 return NULL;
461         }
462
463         ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
464         if (!ctrlr)
465                 return NULL;
466
467         rte_vhost_driver_callback_register(dev_pathname,
468                                            &vhost_scsi_device_ops);
469
470         return ctrlr;
471 }
472
473 static void
474 signal_handler(__rte_unused int signum)
475 {
476
477         if (access(dev_pathname, F_OK) == 0)
478                 unlink(dev_pathname);
479         exit(0);
480 }
481
482 int main(int argc, char *argv[])
483 {
484         int ret;
485
486         signal(SIGINT, signal_handler);
487
488         /* init EAL */
489         ret = rte_eal_init(argc, argv);
490         if (ret < 0)
491                 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
492
493         g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket");
494         if (g_vhost_ctrlr == NULL) {
495                 fprintf(stderr, "Construct vhost scsi controller failed\n");
496                 return 0;
497         }
498
499         if (sem_init(&exit_sem, 0, 0) < 0) {
500                 fprintf(stderr, "Error init exit_sem\n");
501                 return -1;
502         }
503
504         rte_vhost_driver_start(dev_pathname);
505
506         /* loop for exit the application */
507         while (1)
508                 sleep(1);
509
510         return 0;
511 }
512