New upstream version 17.08
[deb_dpdk.git] / examples / vhost_scsi / vhost_scsi.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdint.h>
35 #include <unistd.h>
36 #include <stdbool.h>
37 #include <signal.h>
38 #include <assert.h>
39 #include <semaphore.h>
40 #include <linux/virtio_scsi.h>
41 #include <linux/virtio_ring.h>
42
43 #include <rte_atomic.h>
44 #include <rte_cycles.h>
45 #include <rte_log.h>
46 #include <rte_malloc.h>
47 #include <rte_vhost.h>
48
49 #include "vhost_scsi.h"
50 #include "scsi_spec.h"
51
52 #define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\
53                               (1 << VIRTIO_RING_F_EVENT_IDX) |\
54                               (1 << VIRTIO_SCSI_F_INOUT) |\
55                               (1 << VIRTIO_SCSI_F_CHANGE))
56
57 /* Path to folder where character device will be created. Can be set by user. */
58 static char dev_pathname[PATH_MAX] = "";
59
60 static struct vhost_scsi_ctrlr *g_vhost_ctrlr;
61 static int g_should_stop;
62 static sem_t exit_sem;
63
64 static struct vhost_scsi_ctrlr *
65 vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name)
66 {
67         /* currently we only support 1 socket file fd */
68         return g_vhost_ctrlr;
69 }
70
71 static uint64_t gpa_to_vva(int vid, uint64_t gpa)
72 {
73         char path[PATH_MAX];
74         struct vhost_scsi_ctrlr *ctrlr;
75         int ret = 0;
76
77         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
78         if (ret) {
79                 fprintf(stderr, "Cannot get socket name\n");
80                 assert(ret != 0);
81         }
82
83         ctrlr = vhost_scsi_ctrlr_find(path);
84         if (!ctrlr) {
85                 fprintf(stderr, "Controller is not ready\n");
86                 assert(ctrlr != NULL);
87         }
88
89         assert(ctrlr->mem != NULL);
90
91         return rte_vhost_gpa_to_vva(ctrlr->mem, gpa);
92 }
93
94 static struct vring_desc *
95 descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc)
96 {
97         return &vq_desc[cur_desc->next];
98 }
99
100 static bool
101 descriptor_has_next(struct vring_desc *cur_desc)
102 {
103         return !!(cur_desc->flags & VRING_DESC_F_NEXT);
104 }
105
106 static bool
107 descriptor_is_wr(struct vring_desc *cur_desc)
108 {
109         return !!(cur_desc->flags & VRING_DESC_F_WRITE);
110 }
111
112 static void
113 submit_completion(struct vhost_scsi_task *task)
114 {
115         struct rte_vhost_vring *vq;
116         struct vring_used *used;
117
118         vq = task->vq;
119         used = vq->used;
120         /* Fill out the next entry in the "used" ring.  id = the
121          * index of the descriptor that contained the SCSI request.
122          * len = the total amount of data transferred for the SCSI
123          * request. We must report the correct len, for variable
124          * length SCSI CDBs, where we may return less data than
125          * allocated by the guest VM.
126          */
127         used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
128         used->ring[used->idx & (vq->size - 1)].len = task->data_len;
129         used->idx++;
130
131         /* Send an interrupt back to the guest VM so that it knows
132          * a completion is ready to be processed.
133          */
134         eventfd_write(vq->callfd, (eventfd_t)1);
135 }
136
137 static void
138 vhost_process_read_payload_chain(struct vhost_scsi_task *task)
139 {
140         void *data;
141
142         task->iovs_cnt = 0;
143         task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
144                                                    task->desc->addr);
145
146         while (descriptor_has_next(task->desc)) {
147                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
148                 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
149                                                      task->desc->addr);
150                 task->iovs[task->iovs_cnt].iov_base = data;
151                 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
152                 task->data_len += task->desc->len;
153                 task->iovs_cnt++;
154         }
155 }
156
157 static void
158 vhost_process_write_payload_chain(struct vhost_scsi_task *task)
159 {
160         void *data;
161
162         task->iovs_cnt = 0;
163
164         do {
165                 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
166                                                      task->desc->addr);
167                 task->iovs[task->iovs_cnt].iov_base = data;
168                 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
169                 task->data_len += task->desc->len;
170                 task->iovs_cnt++;
171                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
172         } while (descriptor_has_next(task->desc));
173
174         task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
175                                                    task->desc->addr);
176 }
177
178 static struct vhost_block_dev *
179 vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial,
180                           uint32_t blk_size, uint64_t blk_cnt,
181                           bool wce_enable)
182 {
183         struct vhost_block_dev *bdev;
184
185         bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
186         if (!bdev)
187                 return NULL;
188
189         strncpy(bdev->name, bdev_name, sizeof(bdev->name));
190         strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
191         bdev->blocklen = blk_size;
192         bdev->blockcnt = blk_cnt;
193         bdev->write_cache = wce_enable;
194
195         /* use memory as disk storage space */
196         bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
197         if (!bdev->data) {
198                 fprintf(stderr, "no enough reseverd huge memory for disk\n");
199                 return NULL;
200         }
201
202         return bdev;
203 }
204
205 static void
206 process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx)
207 {
208         int ret;
209         struct vhost_scsi_queue *scsi_vq;
210         struct rte_vhost_vring *vq;
211
212         scsi_vq = &ctrlr->bdev->queues[q_idx];
213         vq = &scsi_vq->vq;
214         ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq);
215         assert(ret == 0);
216
217         while (vq->avail->idx != scsi_vq->last_used_idx) {
218                 int req_idx;
219                 uint16_t last_idx;
220                 struct vhost_scsi_task *task;
221
222                 last_idx = scsi_vq->last_used_idx & (vq->size - 1);
223                 req_idx = vq->avail->ring[last_idx];
224
225                 task = rte_zmalloc(NULL, sizeof(*task), 0);
226                 assert(task != NULL);
227
228                 task->ctrlr = ctrlr;
229                 task->bdev = ctrlr->bdev;
230                 task->vq = vq;
231                 task->req_idx = req_idx;
232                 task->desc = &task->vq->desc[task->req_idx];
233
234                 /* does not support indirect descriptors */
235                 assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0);
236                 scsi_vq->last_used_idx++;
237
238                 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
239                                                           task->desc->addr);
240
241                 task->desc = descriptor_get_next(task->vq->desc, task->desc);
242                 if (!descriptor_has_next(task->desc)) {
243                         task->dxfer_dir = SCSI_DIR_NONE;
244                         task->resp = (void *)(uintptr_t)
245                                               gpa_to_vva(task->bdev->vid,
246                                                          task->desc->addr);
247
248                 } else if (!descriptor_is_wr(task->desc)) {
249                         task->dxfer_dir = SCSI_DIR_TO_DEV;
250                         vhost_process_write_payload_chain(task);
251                 } else {
252                         task->dxfer_dir = SCSI_DIR_FROM_DEV;
253                         vhost_process_read_payload_chain(task);
254                 }
255
256                 ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task);
257                 if (ret) {
258                         /* invalid response */
259                         task->resp->response = VIRTIO_SCSI_S_BAD_TARGET;
260                 } else {
261                         /* successfully */
262                         task->resp->response = VIRTIO_SCSI_S_OK;
263                         task->resp->status = 0;
264                         task->resp->resid = 0;
265                 }
266                 submit_completion(task);
267                 rte_free(task);
268         }
269 }
270
271 /* Main framework for processing IOs */
272 static void *
273 ctrlr_worker(void *arg)
274 {
275         uint32_t idx, num;
276         struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg;
277         cpu_set_t cpuset;
278         pthread_t thread;
279
280         thread = pthread_self();
281         CPU_ZERO(&cpuset);
282         CPU_SET(0, &cpuset);
283         pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
284
285         num =  rte_vhost_get_vring_num(ctrlr->bdev->vid);
286         fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num);
287
288         if (num != NUM_OF_SCSI_QUEUES) {
289                 fprintf(stderr, "Only 1 IO queue are supported\n");
290                 exit(0);
291         }
292
293         while (!g_should_stop && ctrlr->bdev != NULL) {
294                 /* At least 3 vrings, currently only can support 1 IO queue
295                  * Queue 2 for IO queue, does not support TMF and hotplug
296                  * for the example application now
297                  */
298                 for (idx = 2; idx < num; idx++)
299                         process_requestq(ctrlr, idx);
300         }
301
302         fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
303         sem_post(&exit_sem);
304         return NULL;
305 }
306
307 static int
308 new_device(int vid)
309 {
310         char path[PATH_MAX];
311         struct vhost_scsi_ctrlr *ctrlr;
312         struct vhost_scsi_queue *scsi_vq;
313         struct rte_vhost_vring *vq;
314         pthread_t tid;
315         int i, ret;
316
317         ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
318         if (ret) {
319                 fprintf(stderr, "Cannot get socket name\n");
320                 return -1;
321         }
322
323         ctrlr = vhost_scsi_ctrlr_find(path);
324         if (!ctrlr) {
325                 fprintf(stderr, "Controller is not ready\n");
326                 return -1;
327         }
328
329         ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
330         if (ret) {
331                 fprintf(stderr, "Get Controller memory region failed\n");
332                 return -1;
333         }
334         assert(ctrlr->mem != NULL);
335
336         /* hardcoded block device information with 128MiB */
337         ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0",
338                                                 4096, 32768, 0);
339         if (!ctrlr->bdev)
340                 return -1;
341
342         ctrlr->bdev->vid = vid;
343
344         /* Disable Notifications */
345         for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) {
346                 rte_vhost_enable_guest_notification(vid, i, 0);
347                 /* restore used index */
348                 scsi_vq = &ctrlr->bdev->queues[i];
349                 vq = &scsi_vq->vq;
350                 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
351                 assert(ret == 0);
352                 scsi_vq->last_used_idx = vq->used->idx;
353                 scsi_vq->last_avail_idx = vq->used->idx;
354         }
355
356         g_should_stop = 0;
357         fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
358         if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
359                 fprintf(stderr, "Worker Thread Started Failed\n");
360                 return -1;
361         }
362         pthread_detach(tid);
363         return 0;
364 }
365
366 static void
367 destroy_device(int vid)
368 {
369         char path[PATH_MAX];
370         struct vhost_scsi_ctrlr *ctrlr;
371
372         rte_vhost_get_ifname(vid, path, PATH_MAX);
373         fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
374         ctrlr = vhost_scsi_ctrlr_find(path);
375         if (!ctrlr) {
376                 fprintf(stderr, "Destroy Ctrlr Failed\n");
377                 return;
378         }
379         ctrlr->bdev = NULL;
380         g_should_stop = 1;
381
382         sem_wait(&exit_sem);
383 }
384
385 static const struct vhost_device_ops vhost_scsi_device_ops = {
386         .new_device =  new_device,
387         .destroy_device = destroy_device,
388 };
389
390 static struct vhost_scsi_ctrlr *
391 vhost_scsi_ctrlr_construct(const char *ctrlr_name)
392 {
393         int ret;
394         struct vhost_scsi_ctrlr *ctrlr;
395         char *path;
396         char cwd[PATH_MAX];
397
398         /* always use current directory */
399         path = getcwd(cwd, PATH_MAX);
400         if (!path) {
401                 fprintf(stderr, "Cannot get current working directory\n");
402                 return NULL;
403         }
404         snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
405
406         if (access(dev_pathname, F_OK) != -1) {
407                 if (unlink(dev_pathname) != 0)
408                         rte_exit(EXIT_FAILURE, "Cannot remove %s.\n",
409                                  dev_pathname);
410         }
411
412         if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
413                 fprintf(stderr, "socket %s already exists\n", dev_pathname);
414                 return NULL;
415         }
416
417         fprintf(stdout, "socket file: %s created\n", dev_pathname);
418
419         ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES);
420         if (ret != 0) {
421                 fprintf(stderr, "Set vhost driver features failed\n");
422                 return NULL;
423         }
424
425         ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
426         if (!ctrlr)
427                 return NULL;
428
429         rte_vhost_driver_callback_register(dev_pathname,
430                                            &vhost_scsi_device_ops);
431
432         return ctrlr;
433 }
434
435 static void
436 signal_handler(__rte_unused int signum)
437 {
438
439         if (access(dev_pathname, F_OK) == 0)
440                 unlink(dev_pathname);
441         exit(0);
442 }
443
444 int main(int argc, char *argv[])
445 {
446         int ret;
447
448         signal(SIGINT, signal_handler);
449
450         /* init EAL */
451         ret = rte_eal_init(argc, argv);
452         if (ret < 0)
453                 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
454
455         g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket");
456         if (g_vhost_ctrlr == NULL) {
457                 fprintf(stderr, "Construct vhost scsi controller failed\n");
458                 return 0;
459         }
460
461         if (sem_init(&exit_sem, 0, 0) < 0) {
462                 fprintf(stderr, "Error init exit_sem\n");
463                 return -1;
464         }
465
466         rte_vhost_driver_start(dev_pathname);
467
468         /* loop for exit the application */
469         while (1)
470                 sleep(1);
471
472         return 0;
473 }
474